diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py index 2a3385e54ce36b025099e720779a7f8f49dd2e1e..06a9a7047de8320e971f8bfc4b188265780e52e6 100644 --- a/mlair/data_handler/data_handler_mixed_sampling.py +++ b/mlair/data_handler/data_handler_mixed_sampling.py @@ -159,32 +159,32 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi self.input_data = self._slice_prep(_input_data, start_inp, end_inp) self.target_data = self._slice_prep(_target_data, self.start, self.end) - -class DataHandlerMixedSamplingWithKzFilterSingleStation(DataHandlerMixedSamplingWithFilterSingleStation, - DataHandlerKzFilterSingleStation): - _requirements1 = DataHandlerKzFilterSingleStation.requirements() - _requirements2 = DataHandlerMixedSamplingWithFilterSingleStation.requirements() - _requirements = list(set(_requirements1 + _requirements2)) - - def estimate_filter_width(self): - """ - f = 0.5 / (len * sqrt(itr)) -> T = 1 / f - :return: - """ - return int(self.kz_filter_length[0] * np.sqrt(self.kz_filter_iter[0]) * 2) - - def _extract_lazy(self, lazy_data): - _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days, \ - self.filter_dim_order = lazy_data - super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data)) - - -class DataHandlerMixedSamplingWithKzFilter(DataHandlerKzFilter): - """Data handler using mixed sampling for input and target. Inputs are temporal filtered.""" - - data_handler = DataHandlerMixedSamplingWithKzFilterSingleStation - data_handler_transformation = DataHandlerMixedSamplingWithKzFilterSingleStation - _requirements = list(set(data_handler.requirements() + DataHandlerKzFilter.requirements())) +# +# class DataHandlerMixedSamplingWithKzFilterSingleStation(DataHandlerMixedSamplingWithFilterSingleStation, +# DataHandlerKzFilterSingleStation): +# _requirements1 = DataHandlerKzFilterSingleStation.requirements() +# _requirements2 = DataHandlerMixedSamplingWithFilterSingleStation.requirements() +# _requirements = list(set(_requirements1 + _requirements2)) +# +# def estimate_filter_width(self): +# """ +# f = 0.5 / (len * sqrt(itr)) -> T = 1 / f +# :return: +# """ +# return int(self.kz_filter_length[0] * np.sqrt(self.kz_filter_iter[0]) * 2) +# +# def _extract_lazy(self, lazy_data): +# _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days, \ +# self.filter_dim_order = lazy_data +# super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data)) +# +# +# class DataHandlerMixedSamplingWithKzFilter(DataHandlerKzFilter): +# """Data handler using mixed sampling for input and target. Inputs are temporal filtered.""" +# +# data_handler = DataHandlerMixedSamplingWithKzFilterSingleStation +# data_handler_transformation = DataHandlerMixedSamplingWithKzFilterSingleStation +# _requirements = list(set(data_handler.requirements() + DataHandlerKzFilter.requirements())) class DataHandlerMixedSamplingWithFirFilterSingleStation(DataHandlerMixedSamplingWithFilterSingleStation, @@ -319,74 +319,74 @@ class DataHandlerMixedSamplingWithClimateFirFilter(DataHandlerClimateFirFilter): else: return super().get_X_original() - -class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithKzFilterSingleStation): - """ - Data handler using mixed sampling for input and target. Inputs are temporal filtered and depending on the - separation frequency of a filtered time series the time step delta for input data is adjusted (see image below). - - .. image:: ../../../../../_source/_plots/separation_of_scales.png - :width: 400 - - """ - - _requirements = DataHandlerMixedSamplingWithKzFilterSingleStation.requirements() - _hash = DataHandlerMixedSamplingWithKzFilterSingleStation._hash + ["time_delta"] - - def __init__(self, *args, time_delta=np.sqrt, **kwargs): - assert isinstance(time_delta, Callable) - self.time_delta = time_delta - super().__init__(*args, **kwargs) - - def make_history_window(self, dim_name_of_inputs: str, window: int, dim_name_of_shift: str) -> None: - """ - Create a xr.DataArray containing history data. - - Shift the data window+1 times and return a xarray which has a new dimension 'window' containing the shifted - data. This is used to represent history in the data. Results are stored in history attribute. - - :param dim_name_of_inputs: Name of dimension which contains the input variables - :param window: number of time steps to look back in history - Note: window will be treated as negative value. This should be in agreement with looking back on - a time line. Nonetheless positive values are allowed but they are converted to its negative - expression - :param dim_name_of_shift: Dimension along shift will be applied - """ - window = -abs(window) - data = self.input_data - self.history = self.stride(data, dim_name_of_shift, window, offset=self.window_history_offset) - - def stride(self, data: xr.DataArray, dim: str, window: int, offset: int = 0) -> xr.DataArray: - time_deltas = np.round(self.time_delta(self.cutoff_period)).astype(int) - start, end = window, 1 - res = [] - _range = list(map(lambda x: x + offset, range(start, end))) - window_array = self.create_index_array(self.window_dim, _range, squeeze_dim=self.target_dim) - for delta, filter_name in zip(np.append(time_deltas, 1), data.coords["filter"]): - res_filter = [] - data_filter = data.sel({"filter": filter_name}) - for w in _range: - res_filter.append(data_filter.shift({dim: -(w - offset) * delta - offset})) - res_filter = xr.concat(res_filter, dim=window_array).chunk() - res.append(res_filter) - res = xr.concat(res, dim="filter").compute() - return res - - def estimate_filter_width(self): - """ - Attention: this method returns the maximum value of - * either estimated filter width f = 0.5 / (len * sqrt(itr)) -> T = 1 / f or - * time delta method applied on the estimated filter width mupliplied by window_history_size - to provide a sufficiently wide filter width. - """ - est = self.kz_filter_length[0] * np.sqrt(self.kz_filter_iter[0]) * 2 - return int(max([self.time_delta(est) * self.window_history_size, est])) - - -class DataHandlerSeparationOfScales(DefaultDataHandler): - """Data handler using mixed sampling for input and target. Inputs are temporal filtered and different time step - sizes are applied in relation to frequencies.""" - - data_handler = DataHandlerSeparationOfScalesSingleStation - data_handler_transformation = DataHandlerSeparationOfScalesSingleStation - _requirements = data_handler.requirements() +# +# class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithKzFilterSingleStation): +# """ +# Data handler using mixed sampling for input and target. Inputs are temporal filtered and depending on the +# separation frequency of a filtered time series the time step delta for input data is adjusted (see image below). +# +# .. image:: ../../../../../_source/_plots/separation_of_scales.png +# :width: 400 +# +# """ +# +# _requirements = DataHandlerMixedSamplingWithKzFilterSingleStation.requirements() +# _hash = DataHandlerMixedSamplingWithKzFilterSingleStation._hash + ["time_delta"] +# +# def __init__(self, *args, time_delta=np.sqrt, **kwargs): +# assert isinstance(time_delta, Callable) +# self.time_delta = time_delta +# super().__init__(*args, **kwargs) +# +# def make_history_window(self, dim_name_of_inputs: str, window: int, dim_name_of_shift: str) -> None: +# """ +# Create a xr.DataArray containing history data. +# +# Shift the data window+1 times and return a xarray which has a new dimension 'window' containing the shifted +# data. This is used to represent history in the data. Results are stored in history attribute. +# +# :param dim_name_of_inputs: Name of dimension which contains the input variables +# :param window: number of time steps to look back in history +# Note: window will be treated as negative value. This should be in agreement with looking back on +# a time line. Nonetheless positive values are allowed but they are converted to its negative +# expression +# :param dim_name_of_shift: Dimension along shift will be applied +# """ +# window = -abs(window) +# data = self.input_data +# self.history = self.stride(data, dim_name_of_shift, window, offset=self.window_history_offset) +# +# def stride(self, data: xr.DataArray, dim: str, window: int, offset: int = 0) -> xr.DataArray: +# time_deltas = np.round(self.time_delta(self.cutoff_period)).astype(int) +# start, end = window, 1 +# res = [] +# _range = list(map(lambda x: x + offset, range(start, end))) +# window_array = self.create_index_array(self.window_dim, _range, squeeze_dim=self.target_dim) +# for delta, filter_name in zip(np.append(time_deltas, 1), data.coords["filter"]): +# res_filter = [] +# data_filter = data.sel({"filter": filter_name}) +# for w in _range: +# res_filter.append(data_filter.shift({dim: -(w - offset) * delta - offset})) +# res_filter = xr.concat(res_filter, dim=window_array).chunk() +# res.append(res_filter) +# res = xr.concat(res, dim="filter").compute() +# return res +# +# def estimate_filter_width(self): +# """ +# Attention: this method returns the maximum value of +# * either estimated filter width f = 0.5 / (len * sqrt(itr)) -> T = 1 / f or +# * time delta method applied on the estimated filter width mupliplied by window_history_size +# to provide a sufficiently wide filter width. +# """ +# est = self.kz_filter_length[0] * np.sqrt(self.kz_filter_iter[0]) * 2 +# return int(max([self.time_delta(est) * self.window_history_size, est])) +# +# +# class DataHandlerSeparationOfScales(DefaultDataHandler): +# """Data handler using mixed sampling for input and target. Inputs are temporal filtered and different time step +# sizes are applied in relation to frequencies.""" +# +# data_handler = DataHandlerSeparationOfScalesSingleStation +# data_handler_transformation = DataHandlerSeparationOfScalesSingleStation +# _requirements = data_handler.requirements() diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py index 125b225d5d282707c1680be3f4cc8f63cb1bb235..1c72ed6e6aa1d6069d71f5a54dd0d9deadf173d3 100644 --- a/mlair/data_handler/data_handler_with_filter.py +++ b/mlair/data_handler/data_handler_with_filter.py @@ -124,7 +124,6 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation): """Data handler for a single station to be used by a superior data handler. Inputs are FIR filtered.""" _requirements = DataHandlerFilterSingleStation.requirements() - _hash = DataHandlerFilterSingleStation._hash + ["filter_cutoff_period", "filter_order", "filter_window_type"] DEFAULT_WINDOW_TYPE = ("kaiser", 5) @@ -311,68 +310,68 @@ class DataHandlerFirFilter(DataHandlerFilter): data_handler_transformation = DataHandlerFirFilterSingleStation _requirements = list(set(data_handler.requirements() + DataHandlerFilter.requirements())) - -class DataHandlerKzFilterSingleStation(DataHandlerFilterSingleStation): - """Data handler for a single station to be used by a superior data handler. Inputs are kz filtered.""" - - _requirements = DataHandlerFilterSingleStation.requirements() - _hash = DataHandlerFilterSingleStation._hash + ["kz_filter_length", "kz_filter_iter"] - - def __init__(self, *args, kz_filter_length, kz_filter_iter, **kwargs): - self._check_sampling(**kwargs) - # self.original_data = None # ToDo: implement here something to store unfiltered data - self.kz_filter_length = to_list(kz_filter_length) - self.kz_filter_iter = to_list(kz_filter_iter) - self.cutoff_period = None - self.cutoff_period_days = None - super().__init__(*args, **kwargs) - - @TimeTrackingWrapper - def apply_filter(self): - """Apply kolmogorov zurbenko filter only on inputs.""" - kz = KZFilter(self.input_data, wl=self.kz_filter_length, itr=self.kz_filter_iter, filter_dim=self.time_dim) - filtered_data: List[xr.DataArray] = kz.run() - self.cutoff_period = kz.period_null() - self.cutoff_period_days = kz.period_null_days() - self.input_data = xr.concat(filtered_data, pd.Index(self.create_filter_index(), name=self.filter_dim)) - # this is just a code snippet to check the results of the kz filter - # import matplotlib - # matplotlib.use("TkAgg") - # import matplotlib.pyplot as plt - # self.input_data.sel(filter="74d", variables="temp", Stations="DEBW107").plot() - # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter") - - def create_filter_index(self) -> pd.Index: - """ - Round cut off periods in days and append 'res' for residuum index. - - Round small numbers (<10) to single decimal, and higher numbers to int. Transform as list of str and append - 'res' for residuum index. - """ - index = np.round(self.cutoff_period_days, 1) - f = lambda x: int(np.round(x)) if x >= 10 else np.round(x, 1) - index = list(map(f, index.tolist())) - index = list(map(lambda x: str(x) + "d", index)) + ["res"] - self.filter_dim_order = index - return pd.Index(index, name=self.filter_dim) - - def _create_lazy_data(self): - return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days, - self.filter_dim_order] - - def _extract_lazy(self, lazy_data): - _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days, \ - self.filter_dim_order = lazy_data - super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data)) - - -class DataHandlerKzFilter(DataHandlerFilter): - """Data handler using kz filtered data.""" - - data_handler = DataHandlerKzFilterSingleStation - data_handler_transformation = DataHandlerKzFilterSingleStation - _requirements = list(set(data_handler.requirements() + DataHandlerFilter.requirements())) - +# +# class DataHandlerKzFilterSingleStation(DataHandlerFilterSingleStation): +# """Data handler for a single station to be used by a superior data handler. Inputs are kz filtered.""" +# +# _requirements = DataHandlerFilterSingleStation.requirements() +# _hash = DataHandlerFilterSingleStation._hash + ["kz_filter_length", "kz_filter_iter"] +# +# def __init__(self, *args, kz_filter_length, kz_filter_iter, **kwargs): +# self._check_sampling(**kwargs) +# # self.original_data = None # ToDo: implement here something to store unfiltered data +# self.kz_filter_length = to_list(kz_filter_length) +# self.kz_filter_iter = to_list(kz_filter_iter) +# self.cutoff_period = None +# self.cutoff_period_days = None +# super().__init__(*args, **kwargs) +# +# @TimeTrackingWrapper +# def apply_filter(self): +# """Apply kolmogorov zurbenko filter only on inputs.""" +# kz = KZFilter(self.input_data, wl=self.kz_filter_length, itr=self.kz_filter_iter, filter_dim=self.time_dim) +# filtered_data: List[xr.DataArray] = kz.run() +# self.cutoff_period = kz.period_null() +# self.cutoff_period_days = kz.period_null_days() +# self.input_data = xr.concat(filtered_data, pd.Index(self.create_filter_index(), name=self.filter_dim)) +# # this is just a code snippet to check the results of the kz filter +# # import matplotlib +# # matplotlib.use("TkAgg") +# # import matplotlib.pyplot as plt +# # self.input_data.sel(filter="74d", variables="temp", Stations="DEBW107").plot() +# # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter") +# +# def create_filter_index(self) -> pd.Index: +# """ +# Round cut off periods in days and append 'res' for residuum index. +# +# Round small numbers (<10) to single decimal, and higher numbers to int. Transform as list of str and append +# 'res' for residuum index. +# """ +# index = np.round(self.cutoff_period_days, 1) +# f = lambda x: int(np.round(x)) if x >= 10 else np.round(x, 1) +# index = list(map(f, index.tolist())) +# index = list(map(lambda x: str(x) + "d", index)) + ["res"] +# self.filter_dim_order = index +# return pd.Index(index, name=self.filter_dim) +# +# def _create_lazy_data(self): +# return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days, +# self.filter_dim_order] +# +# def _extract_lazy(self, lazy_data): +# _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days, \ +# self.filter_dim_order = lazy_data +# super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data)) +# +# +# class DataHandlerKzFilter(DataHandlerFilter): +# """Data handler using kz filtered data.""" +# +# data_handler = DataHandlerKzFilterSingleStation +# data_handler_transformation = DataHandlerKzFilterSingleStation +# _requirements = list(set(data_handler.requirements() + DataHandlerFilter.requirements())) +# class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation): """ diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py index 47051a500c29349197f3163861a0fe40cade525d..82ab4c88557c2ab9f4364607f18079ff1261f5e6 100644 --- a/mlair/plotting/data_insight_plotting.py +++ b/mlair/plotting/data_insight_plotting.py @@ -14,6 +14,7 @@ import numpy as np import pandas as pd import xarray as xr import matplotlib +# matplotlib.use("Agg") from matplotlib import lines as mlines, pyplot as plt, patches as mpatches, dates as mdates from astropy.timeseries import LombScargle @@ -21,8 +22,6 @@ from mlair.data_handler import DataCollection from mlair.helpers import TimeTrackingWrapper, to_list, remove_items from mlair.plotting.abstract_plot_class import AbstractPlotClass -matplotlib.use("Agg") - @TimeTrackingWrapper class PlotStationMap(AbstractPlotClass): # pragma: no cover @@ -907,7 +906,7 @@ def f_proc_hist(data, variables, n_bins, variables_dim): # pragma: no cover return res, interval_width, bin_edges -class PlotClimateFirFilter(AbstractPlotClass): +class PlotClimateFirFilter(AbstractPlotClass): # pragma: no cover """ Plot climate FIR filter components.