From 32f3ff2203d47fea8ad1c2df7328506a7e5cd058 Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz-juelich.de> Date: Wed, 17 Mar 2021 18:55:22 +0100 Subject: [PATCH] data handlers with filters will create negative values, which is incompatible with log transformation. standardization will be used in this cases --- mlair/data_handler/data_handler_kz_filter.py | 15 ++++++++++++++- mlair/data_handler/data_handler_mixed_sampling.py | 2 +- mlair/data_handler/data_handler_single_station.py | 5 +++-- mlair/data_handler/default_data_handler.py | 4 +++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/mlair/data_handler/data_handler_kz_filter.py b/mlair/data_handler/data_handler_kz_filter.py index 1f2c63e5..539712b3 100644 --- a/mlair/data_handler/data_handler_kz_filter.py +++ b/mlair/data_handler/data_handler_kz_filter.py @@ -7,7 +7,7 @@ import inspect import numpy as np import pandas as pd import xarray as xr -from typing import List, Union +from typing import List, Union, Tuple, Optional from functools import partial from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation @@ -37,6 +37,19 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation): self.cutoff_period_days = None super().__init__(*args, **kwargs) + def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]: + """ + Adjust setup of transformation because kfz filtered data will have negative values which is not compatible with + the log transformation. Therefore, replace all log transformation methods by a default standardization. This is + only applied on input side. + """ + transformation = super(__class__, self).setup_transformation(transformation) + if transformation[0] is not None: + for k, v in transformation[0].items(): + if v["method"] == "log": + transformation[0][k]["method"] = "standardise" + return transformation + def _check_sampling(self, **kwargs): assert kwargs.get("sampling") == "hourly" # This data handler requires hourly data resolution diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py index 86e6f856..75e9e645 100644 --- a/mlair/data_handler/data_handler_mixed_sampling.py +++ b/mlair/data_handler/data_handler_mixed_sampling.py @@ -158,7 +158,7 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi def _extract_lazy(self, lazy_data): _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data start_inp, end_inp = self.update_start_end(0) - self._data = list(map(self._slice_prep, _data, [start_inp, self.start], [end_inp, self.end])) + self._data = list(map(lambda x: self._slice_prep(_data[x], *self.update_start_end(x)), [0, 1])) self.input_data = self._slice_prep(_input_data, start_inp, end_inp) self.target_data = self._slice_prep(_target_data, self.start, self.end) diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py index 0497bee0..19ff6fa1 100644 --- a/mlair/data_handler/data_handler_single_station.py +++ b/mlair/data_handler/data_handler_single_station.py @@ -252,7 +252,9 @@ class DataHandlerSingleStation(AbstractDataHandler): with open(filename, "rb") as pickle_file: lazy_data = dill.load(pickle_file) self._extract_lazy(lazy_data) + logging.info(f"{self.station}: used lazy data") except FileNotFoundError: + logging.info(f"{self.station}: could not use lazy data") self.make_input_target() def _extract_lazy(self, lazy_data): @@ -594,8 +596,7 @@ class DataHandlerSingleStation(AbstractDataHandler): """ return data.loc[{coord: slice(str(start), str(end))}] - @staticmethod - def setup_transformation(transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]: + def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]: """ Set up transformation by extracting all relevant information. diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py index 07a866ae..5eb6fd02 100644 --- a/mlair/data_handler/default_data_handler.py +++ b/mlair/data_handler/default_data_handler.py @@ -273,7 +273,9 @@ class DefaultDataHandler(AbstractDataHandler): if var not in transformation_dict[i].keys(): transformation_dict[i][var] = {} opts = transformation[var] - assert transformation_dict[i][var].get("method", opts["method"]) == opts["method"] + if not transformation_dict[i][var].get("method", opts["method"]) == opts["method"]: + # data handlers with filters are allowed to change transformation method to standardise + assert hasattr(dh, "filter_dim") and opts["method"] == "standardise" transformation_dict[i][var]["method"] = opts["method"] for k in ["mean", "std", "min", "max"]: old = transformation_dict[i][var].get(k, None) -- GitLab