From a6cd5f5cef34984e3d9f0dbd5d03579e3c293dd4 Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz-juelich.de> Date: Fri, 28 May 2021 10:06:49 +0200 Subject: [PATCH] all filter data handlers can now return either filter portions as dimension or distinct branches --- .../data_handler_mixed_sampling.py | 16 +++-- .../data_handler/data_handler_with_filter.py | 66 ++++++++++++++++--- 2 files changed, 67 insertions(+), 15 deletions(-) diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py index 7446d005..62a354a2 100644 --- a/mlair/data_handler/data_handler_mixed_sampling.py +++ b/mlair/data_handler/data_handler_mixed_sampling.py @@ -4,6 +4,8 @@ __date__ = '2020-11-05' from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation from mlair.data_handler.data_handler_with_filter import DataHandlerKzFilterSingleStation, \ DataHandlerFirFilterSingleStation, DataHandlerFilterSingleStation, DataHandlerClimateFirFilterSingleStation +from mlair.data_handler.data_handler_with_filter import DataHandlerClimateFirFilter, DataHandlerFirFilter, \ + DataHandlerKzFilter from mlair.data_handler import DefaultDataHandler from mlair import helpers from mlair.helpers import remove_items @@ -176,11 +178,12 @@ class DataHandlerMixedSamplingWithKzFilterSingleStation(DataHandlerMixedSampling return int(self.kz_filter_length[0] * np.sqrt(self.kz_filter_iter[0]) * 2) def _extract_lazy(self, lazy_data): - _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data + _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days, \ + self.filter_dim_order = lazy_data super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data)) -class DataHandlerMixedSamplingWithKzFilter(DefaultDataHandler): +class DataHandlerMixedSamplingWithKzFilter(DataHandlerKzFilter): """Data handler using mixed sampling for input and target. Inputs are temporal filtered.""" data_handler = DataHandlerMixedSamplingWithKzFilterSingleStation @@ -199,7 +202,7 @@ class DataHandlerMixedSamplingWithFirFilterSingleStation(DataHandlerMixedSamplin return max(self.filter_order) def _extract_lazy(self, lazy_data): - _data, _meta, _input_data, _target_data, self.fir_coeff = lazy_data + _data, _meta, _input_data, _target_data, self.fir_coeff, self.filter_dim_order = lazy_data super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data)) @staticmethod @@ -214,7 +217,7 @@ class DataHandlerMixedSamplingWithFirFilterSingleStation(DataHandlerMixedSamplin raise ValueError(f"Unknown sampling rate {sampling}. Only daily and hourly resolution is supported.") -class DataHandlerMixedSamplingWithFirFilter(DefaultDataHandler): +class DataHandlerMixedSamplingWithFirFilter(DataHandlerFirFilter): """Data handler using mixed sampling for input and target. Inputs are temporal filtered.""" data_handler = DataHandlerMixedSamplingWithFirFilterSingleStation @@ -236,7 +239,8 @@ class DataHandlerMixedSamplingWithClimateFirFilterSingleStation(DataHandlerMixed super().__init__(*args, **kwargs) def _extract_lazy(self, lazy_data): - _data, _meta, _input_data, _target_data, self.climate_filter_coeff, self.apriori, self.all_apriori = lazy_data + _data, _meta, _input_data, _target_data, self.climate_filter_coeff, self.apriori, self.all_apriori, \ + self.filter_dim_order = lazy_data DataHandlerMixedSamplingWithFilterSingleStation._extract_lazy(self, (_data, _meta, _input_data, _target_data)) @staticmethod @@ -251,7 +255,7 @@ class DataHandlerMixedSamplingWithClimateFirFilterSingleStation(DataHandlerMixed raise ValueError(f"Unknown sampling rate {sampling}. Only daily and hourly resolution is supported.") -class DataHandlerMixedSamplingWithClimateFirFilter(DefaultDataHandler): +class DataHandlerMixedSamplingWithClimateFirFilter(DataHandlerClimateFirFilter): """Data handler using mixed sampling for input and target. Inputs are temporal filtered.""" data_handler = DataHandlerMixedSamplingWithClimateFirFilterSingleStation diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py index fa94b88c..5da1b893 100644 --- a/mlair/data_handler/data_handler_with_filter.py +++ b/mlair/data_handler/data_handler_with_filter.py @@ -47,6 +47,7 @@ class DataHandlerFilterSingleStation(DataHandlerSingleStation): def __init__(self, *args, filter_dim=DEFAULT_FILTER_DIM, **kwargs): # self.original_data = None # ToDo: implement here something to store unfiltered data self.filter_dim = filter_dim + self.filter_dim_order = None super().__init__(*args, **kwargs) def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]: @@ -104,6 +105,38 @@ class DataHandlerFilterSingleStation(DataHandlerSingleStation): self._data, self.input_data, self.target_data = list(map(f_prep, [_data, _input_data, _target_data])) +class DataHandlerFilter(DefaultDataHandler): + """Data handler using FIR filtered data.""" + + data_handler = DataHandlerFilterSingleStation + data_handler_transformation = DataHandlerFilterSingleStation + _requirements = data_handler.requirements() + + def __init__(self, *args, use_filter_branches=False, **kwargs): + self.use_filter_branches = use_filter_branches + super().__init__(*args, **kwargs) + + @classmethod + def own_args(cls, *args): + """Return all arguments (including kwonlyargs).""" + super_own_args = DefaultDataHandler.own_args(*args) + arg_spec = inspect.getfullargspec(cls) + list_of_args = arg_spec.args + arg_spec.kwonlyargs + super_own_args + return remove_items(list_of_args, ["self"] + list(args)) + + def get_X_original(self): + if self.use_filter_branches is True: + X = [] + for data in self._collection: + X_total = data.get_X() + filter_dim = data.filter_dim + for filter_name in data.filter_dim_order: + X.append(X_total.sel({filter_dim: filter_name}, drop=True)) + return X + else: + return super().get_X_original() + + class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation): """Data handler for a single station to be used by a superior data handler. Inputs are FIR filtered.""" @@ -203,17 +236,18 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation): band_num += 1 if self._add_unfiltered: index.append("unfiltered") + self.filter_dim_order = index return pd.Index(index, name=self.filter_dim) def _create_lazy_data(self): - return [self._data, self.meta, self.input_data, self.target_data, self.fir_coeff] + return [self._data, self.meta, self.input_data, self.target_data, self.fir_coeff, self.filter_dim_order] def _extract_lazy(self, lazy_data): - _data, _meta, _input_data, _target_data, self.fir_coeff = lazy_data + _data, _meta, _input_data, _target_data, self.fir_coeff, self.filter_dim_order = lazy_data super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data)) -class DataHandlerFirFilter(DefaultDataHandler): +class DataHandlerFirFilter(DataHandlerFilter): """Data handler using FIR filtered data.""" data_handler = DataHandlerFirFilterSingleStation @@ -262,17 +296,20 @@ class DataHandlerKzFilterSingleStation(DataHandlerFilterSingleStation): f = lambda x: int(np.round(x)) if x >= 10 else np.round(x, 1) index = list(map(f, index.tolist())) index = list(map(lambda x: str(x) + "d", index)) + ["res"] + self.filter_dim_order = index return pd.Index(index, name=self.filter_dim) def _create_lazy_data(self): - return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days] + return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days, + self.filter_dim_order] def _extract_lazy(self, lazy_data): - _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data + _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days, \ + self.filter_dim_order = lazy_data super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data)) -class DataHandlerKzFilter(DefaultDataHandler): +class DataHandlerKzFilter(DataHandlerFilter): """Data handler using kz filtered data.""" data_handler = DataHandlerKzFilterSingleStation @@ -372,14 +409,16 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation index = list(map(lambda x: str(x) + "d", index)) + ["res"] if self._add_unfiltered: index.append("unfiltered") + self.filter_dim_order = index return pd.Index(index, name=self.filter_dim) def _create_lazy_data(self): return [self._data, self.meta, self.input_data, self.target_data, self.climate_filter_coeff, - self.apriori, self.all_apriori] + self.apriori, self.all_apriori, self.filter_dim_order] def _extract_lazy(self, lazy_data): - _data, _meta, _input_data, _target_data, self.climate_filter_coeff, self.apriori, self.all_apriori = lazy_data + _data, _meta, _input_data, _target_data, self.climate_filter_coeff, self.apriori, self.all_apriori, \ + self.filter_dim_order = lazy_data DataHandlerSingleStation._extract_lazy(self, (_data, _meta, _input_data, _target_data)) @staticmethod @@ -440,10 +479,19 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation self._transformation = (opts_input, opts_target) -class DataHandlerClimateFirFilter(DefaultDataHandler): +class DataHandlerClimateFirFilter(DataHandlerFilter): """Data handler using climatic adjusted FIR filtered data.""" data_handler = DataHandlerClimateFirFilterSingleStation data_handler_transformation = DataHandlerClimateFirFilterSingleStation _requirements = data_handler.requirements() _store_attributes = data_handler.store_attributes() + + # def get_X_original(self): + # X = [] + # for data in self._collection: + # X_total = data.get_X() + # filter_dim = data.filter_dim + # for filter in data.filter_dim_order: + # X.append(X_total.sel({filter_dim: filter}, drop=True)) + # return X -- GitLab