From 32f3ff2203d47fea8ad1c2df7328506a7e5cd058 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 17 Mar 2021 18:55:22 +0100
Subject: [PATCH] data handlers with filters will create negative values, which
 is incompatible with log transformation. standardization will be used in this
 cases

---
 mlair/data_handler/data_handler_kz_filter.py      | 15 ++++++++++++++-
 mlair/data_handler/data_handler_mixed_sampling.py |  2 +-
 mlair/data_handler/data_handler_single_station.py |  5 +++--
 mlair/data_handler/default_data_handler.py        |  4 +++-
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/mlair/data_handler/data_handler_kz_filter.py b/mlair/data_handler/data_handler_kz_filter.py
index 1f2c63e5..539712b3 100644
--- a/mlair/data_handler/data_handler_kz_filter.py
+++ b/mlair/data_handler/data_handler_kz_filter.py
@@ -7,7 +7,7 @@ import inspect
 import numpy as np
 import pandas as pd
 import xarray as xr
-from typing import List, Union
+from typing import List, Union, Tuple, Optional
 from functools import partial
 
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
@@ -37,6 +37,19 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
         self.cutoff_period_days = None
         super().__init__(*args, **kwargs)
 
+    def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
+        """
+        Adjust setup of transformation because kfz filtered data will have negative values which is not compatible with
+        the log transformation. Therefore, replace all log transformation methods by a default standardization. This is
+        only applied on input side.
+        """
+        transformation = super(__class__, self).setup_transformation(transformation)
+        if transformation[0] is not None:
+            for k, v in transformation[0].items():
+                if v["method"] == "log":
+                    transformation[0][k]["method"] = "standardise"
+        return transformation
+
     def _check_sampling(self, **kwargs):
         assert kwargs.get("sampling") == "hourly"  # This data handler requires hourly data resolution
 
diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 86e6f856..75e9e645 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -158,7 +158,7 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
     def _extract_lazy(self, lazy_data):
         _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
         start_inp, end_inp = self.update_start_end(0)
-        self._data = list(map(self._slice_prep, _data, [start_inp, self.start], [end_inp, self.end]))
+        self._data = list(map(lambda x: self._slice_prep(_data[x], *self.update_start_end(x)), [0, 1]))
         self.input_data = self._slice_prep(_input_data, start_inp, end_inp)
         self.target_data = self._slice_prep(_target_data, self.start, self.end)
 
diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 0497bee0..19ff6fa1 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -252,7 +252,9 @@ class DataHandlerSingleStation(AbstractDataHandler):
             with open(filename, "rb") as pickle_file:
                 lazy_data = dill.load(pickle_file)
             self._extract_lazy(lazy_data)
+            logging.info(f"{self.station}: used lazy data")
         except FileNotFoundError:
+            logging.info(f"{self.station}: could not use lazy data")
             self.make_input_target()
 
     def _extract_lazy(self, lazy_data):
@@ -594,8 +596,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
         """
         return data.loc[{coord: slice(str(start), str(end))}]
 
-    @staticmethod
-    def setup_transformation(transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
+    def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
         """
         Set up transformation by extracting all relevant information.
 
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 07a866ae..5eb6fd02 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -273,7 +273,9 @@ class DefaultDataHandler(AbstractDataHandler):
                         if var not in transformation_dict[i].keys():
                             transformation_dict[i][var] = {}
                         opts = transformation[var]
-                        assert transformation_dict[i][var].get("method", opts["method"]) == opts["method"]
+                        if not transformation_dict[i][var].get("method", opts["method"]) == opts["method"]:
+                            # data handlers with filters are allowed to change transformation method to standardise
+                            assert hasattr(dh, "filter_dim") and opts["method"] == "standardise"
                         transformation_dict[i][var]["method"] = opts["method"]
                         for k in ["mean", "std", "min", "max"]:
                             old = transformation_dict[i][var].get(k, None)
-- 
GitLab