From 55ecf226c10735bc00de6d7b931c32210c2c1328 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 25 Feb 2021 10:29:51 +0100
Subject: [PATCH 001/175] moved abstract model class to separate file

---
 mlair/model_modules/__init__.py             |   2 +-
 mlair/model_modules/abstract_model_class.py | 241 ++++++++++++++++++++
 mlair/model_modules/model_class.py          | 225 +-----------------
 mlair/run_modules/post_processing.py        |   4 +-
 test/test_model_modules/test_model_class.py |   2 +-
 test/test_run_modules/test_model_setup.py   |   3 +-
 6 files changed, 249 insertions(+), 228 deletions(-)
 create mode 100644 mlair/model_modules/abstract_model_class.py

diff --git a/mlair/model_modules/__init__.py b/mlair/model_modules/__init__.py
index ea2067bd..96c92108 100644
--- a/mlair/model_modules/__init__.py
+++ b/mlair/model_modules/__init__.py
@@ -1,3 +1,3 @@
 """Collection of all modules that are related to a model."""
 
-from .model_class import AbstractModelClass
+from .abstract_model_class import AbstractModelClass
diff --git a/mlair/model_modules/abstract_model_class.py b/mlair/model_modules/abstract_model_class.py
new file mode 100644
index 00000000..894ff7ac
--- /dev/null
+++ b/mlair/model_modules/abstract_model_class.py
@@ -0,0 +1,241 @@
+import inspect
+from abc import ABC
+from typing import Any, Dict, Callable
+
+import keras
+import tensorflow as tf
+
+from mlair.helpers import remove_items
+
+
+class AbstractModelClass(ABC):
+    """
+    The AbstractModelClass provides a unified skeleton for any model provided to the machine learning workflow.
+
+    The model can always be accessed by calling ModelClass.model or directly by an model method without parsing the
+    model attribute name (e.g. ModelClass.model.compile -> ModelClass.compile). Beside the model, this class provides
+    the corresponding loss function.
+    """
+
+    _requirements = []
+
+    def __init__(self, input_shape, output_shape) -> None:
+        """Predefine internal attributes for model and loss."""
+        self.__model = None
+        self.model_name = self.__class__.__name__
+        self.__custom_objects = {}
+        self.__allowed_compile_options = {'optimizer': None,
+                                          'loss': None,
+                                          'metrics': None,
+                                          'loss_weights': None,
+                                          'sample_weight_mode': None,
+                                          'weighted_metrics': None,
+                                          'target_tensors': None
+                                          }
+        self.__compile_options = self.__allowed_compile_options
+        self.__compile_options_is_set = False
+        self._input_shape = input_shape
+        self._output_shape = self.__extract_from_tuple(output_shape)
+
+    def __getattr__(self, name: str) -> Any:
+        """
+        Is called if __getattribute__ is not able to find requested attribute.
+
+        Normally, the model class is saved into a variable like `model = ModelClass()`. To bypass a call like
+        `model.model` to access the _model attribute, this method tries to search for the named attribute in the
+        self.model namespace and returns this attribute if available. Therefore, following expression is true:
+        `ModelClass().compile == ModelClass().model.compile` as long the called attribute/method is not part if the
+        ModelClass itself.
+
+        :param name: name of the attribute or method to call
+
+        :return: attribute or method from self.model namespace
+        """
+        return self.model.__getattribute__(name)
+
+    @property
+    def model(self) -> keras.Model:
+        """
+        The model property containing a keras.Model instance.
+
+        :return: the keras model
+        """
+        return self.__model
+
+    @model.setter
+    def model(self, value):
+        self.__model = value
+
+    @property
+    def custom_objects(self) -> Dict:
+        """
+        The custom objects property collects all non-keras utilities that are used in the model class.
+
+        To load such a customised and already compiled model (e.g. from local disk), this information is required.
+
+        :return: custom objects in a dictionary
+        """
+        return self.__custom_objects
+
+    @custom_objects.setter
+    def custom_objects(self, value) -> None:
+        self.__custom_objects = value
+
+    @property
+    def compile_options(self) -> Callable:
+        """
+        The compile options property allows the user to use all keras.compile() arguments. They can ether be passed as
+        dictionary (1), as attribute, without setting compile_options (2) or as mixture (partly defined as instance
+        attributes and partly parsing a dictionary) of both of them (3).
+        The method will raise an Error when the same parameter is set differently.
+
+        Example (1) Recommended (includes check for valid keywords which are used as args in keras.compile)
+        .. code-block:: python
+            def set_compile_options(self):
+                self.compile_options = {"optimizer": keras.optimizers.SGD(),
+                                        "loss": keras.losses.mean_squared_error,
+                                        "metrics": ["mse", "mae"]}
+
+        Example (2)
+        .. code-block:: python
+            def set_compile_options(self):
+                self.optimizer = keras.optimizers.SGD()
+                self.loss = keras.losses.mean_squared_error
+                self.metrics = ["mse", "mae"]
+
+        Example (3)
+        Correct:
+        .. code-block:: python
+            def set_compile_options(self):
+                self.optimizer = keras.optimizers.SGD()
+                self.loss = keras.losses.mean_squared_error
+                self.compile_options = {"metrics": ["mse", "mae"]}
+
+        Incorrect: (Will raise an error)
+        .. code-block:: python
+            def set_compile_options(self):
+                self.optimizer = keras.optimizers.SGD()
+                self.loss = keras.losses.mean_squared_error
+                self.compile_options = {"optimizer" = keras.optimizers.Adam(), "metrics": ["mse", "mae"]}
+
+        Note:
+        * As long as the attribute and the dict value have exactly the same values, the setter method will not raise
+        an error
+        * For example (2) there is no check implemented, if the attributes are valid compile options
+
+
+        :return:
+        """
+        if self.__compile_options_is_set is False:
+            self.compile_options = None
+        return self.__compile_options
+
+    @compile_options.setter
+    def compile_options(self, value: Dict) -> None:
+        if isinstance(value, dict):
+            if not (set(value.keys()) <= set(self.__allowed_compile_options.keys())):
+                raise ValueError(f"Got invalid key for compile_options. {value.keys()}")
+
+        for allow_k in self.__allowed_compile_options.keys():
+            if hasattr(self, allow_k):
+                new_v_attr = getattr(self, allow_k)
+            else:
+                new_v_attr = None
+            if isinstance(value, dict):
+                new_v_dic = value.pop(allow_k, None)
+            elif value is None:
+                new_v_dic = None
+            else:
+                raise TypeError(f"`compile_options' must be `dict' or `None', but is {type(value)}.")
+            if (new_v_attr == new_v_dic or self.__compare_keras_optimizers(new_v_attr, new_v_dic)) or (
+                    (new_v_attr is None) ^ (new_v_dic is None)):
+                if new_v_attr is not None:
+                    self.__compile_options[allow_k] = new_v_attr
+                else:
+                    self.__compile_options[allow_k] = new_v_dic
+
+            else:
+                raise ValueError(
+                    f"Got different values or arguments for same argument: self.{allow_k}={new_v_attr.__class__} and '{allow_k}': {new_v_dic.__class__}")
+        self.__compile_options_is_set = True
+
+    @staticmethod
+    def __extract_from_tuple(tup):
+        """Return element of tuple if it contains only a single element."""
+        return tup[0] if isinstance(tup, tuple) and len(tup) == 1 else tup
+
+    @staticmethod
+    def __compare_keras_optimizers(first, second):
+        """
+        Compares if optimiser and all settings of the optimisers are exactly equal.
+
+        :return True if optimisers are interchangeable, or False if optimisers are distinguishable.
+        """
+        if first.__class__ == second.__class__ and first.__module__ == 'keras.optimizers':
+            res = True
+            init = tf.global_variables_initializer()
+            with tf.Session() as sess:
+                sess.run(init)
+                for k, v in first.__dict__.items():
+                    try:
+                        res *= sess.run(v) == sess.run(second.__dict__[k])
+                    except TypeError:
+                        res *= v == second.__dict__[k]
+        else:
+            res = False
+        return bool(res)
+
+    def get_settings(self) -> Dict:
+        """
+        Get all class attributes that are not protected in the AbstractModelClass as dictionary.
+
+        :return: all class attributes
+        """
+        return dict((k, v) for (k, v) in self.__dict__.items() if not k.startswith("_AbstractModelClass__"))
+
+    def set_model(self):
+        """Abstract method to set model."""
+        raise NotImplementedError
+
+    def set_compile_options(self):
+        """
+        This method only has to be defined in child class, when additional compile options should be used ()
+        (other options than optimizer and loss)
+        Has to be set as dictionary: {'optimizer': None,
+                                      'loss': None,
+                                      'metrics': None,
+                                      'loss_weights': None,
+                                      'sample_weight_mode': None,
+                                      'weighted_metrics': None,
+                                      'target_tensors': None
+                                      }
+
+        :return:
+        """
+        raise NotImplementedError
+
+    def set_custom_objects(self, **kwargs) -> None:
+        """
+        Set custom objects that are not part of keras framework.
+
+        These custom objects are needed if an already compiled model is loaded from disk. There is a special treatment
+        for the Padding2D class, which is a base class for different padding types. For a correct behaviour, all
+        supported subclasses are added as custom objects in addition to the given ones.
+
+        :param kwargs: all custom objects, that should be saved
+        """
+        if "Padding2D" in kwargs.keys():
+            kwargs.update(kwargs["Padding2D"].allowed_paddings)
+        self.custom_objects = kwargs
+
+    @classmethod
+    def requirements(cls):
+        """Return requirements and own arguments without duplicates."""
+        return list(set(cls._requirements + cls.own_args()))
+
+    @classmethod
+    def own_args(cls, *args):
+        """Return all arguments (including kwonlyargs)."""
+        arg_spec = inspect.getfullargspec(cls)
+        list_of_args = arg_spec.args + arg_spec.kwonlyargs
+        return remove_items(list_of_args, ["self"] + list(args))
diff --git a/mlair/model_modules/model_class.py b/mlair/model_modules/model_class.py
index a2eda6e8..f6e97987 100644
--- a/mlair/model_modules/model_class.py
+++ b/mlair/model_modules/model_class.py
@@ -120,235 +120,14 @@ import mlair.model_modules.keras_extensions
 __author__ = "Lukas Leufen, Felix Kleinert"
 __date__ = '2020-05-12'
 
-from abc import ABC
-from typing import Any, Callable, Dict
-
 import keras
-import tensorflow as tf
+
+from mlair.model_modules import AbstractModelClass
 from mlair.model_modules.inception_model import InceptionModelBase
 from mlair.model_modules.flatten import flatten_tail
 from mlair.model_modules.advanced_paddings import PadUtils, Padding2D, SymmetricPadding2D
 
 
-class AbstractModelClass(ABC):
-    """
-    The AbstractModelClass provides a unified skeleton for any model provided to the machine learning workflow.
-
-    The model can always be accessed by calling ModelClass.model or directly by an model method without parsing the
-    model attribute name (e.g. ModelClass.model.compile -> ModelClass.compile). Beside the model, this class provides
-    the corresponding loss function.
-    """
-
-    def __init__(self, input_shape, output_shape) -> None:
-        """Predefine internal attributes for model and loss."""
-        self.__model = None
-        self.model_name = self.__class__.__name__
-        self.__custom_objects = {}
-        self.__allowed_compile_options = {'optimizer': None,
-                                          'loss': None,
-                                          'metrics': None,
-                                          'loss_weights': None,
-                                          'sample_weight_mode': None,
-                                          'weighted_metrics': None,
-                                          'target_tensors': None
-                                          }
-        self.__compile_options = self.__allowed_compile_options
-        self.__compile_options_is_set = False
-        self._input_shape = input_shape
-        self._output_shape = self.__extract_from_tuple(output_shape)
-
-    def __getattr__(self, name: str) -> Any:
-        """
-        Is called if __getattribute__ is not able to find requested attribute.
-
-        Normally, the model class is saved into a variable like `model = ModelClass()`. To bypass a call like
-        `model.model` to access the _model attribute, this method tries to search for the named attribute in the
-        self.model namespace and returns this attribute if available. Therefore, following expression is true:
-        `ModelClass().compile == ModelClass().model.compile` as long the called attribute/method is not part if the
-        ModelClass itself.
-
-        :param name: name of the attribute or method to call
-
-        :return: attribute or method from self.model namespace
-        """
-        return self.model.__getattribute__(name)
-
-    @property
-    def model(self) -> keras.Model:
-        """
-        The model property containing a keras.Model instance.
-
-        :return: the keras model
-        """
-        return self.__model
-
-    @model.setter
-    def model(self, value):
-        self.__model = value
-
-    @property
-    def custom_objects(self) -> Dict:
-        """
-        The custom objects property collects all non-keras utilities that are used in the model class.
-
-        To load such a customised and already compiled model (e.g. from local disk), this information is required.
-
-        :return: custom objects in a dictionary
-        """
-        return self.__custom_objects
-
-    @custom_objects.setter
-    def custom_objects(self, value) -> None:
-        self.__custom_objects = value
-
-    @property
-    def compile_options(self) -> Callable:
-        """
-        The compile options property allows the user to use all keras.compile() arguments. They can ether be passed as
-        dictionary (1), as attribute, without setting compile_options (2) or as mixture (partly defined as instance
-        attributes and partly parsing a dictionary) of both of them (3).
-        The method will raise an Error when the same parameter is set differently.
-
-        Example (1) Recommended (includes check for valid keywords which are used as args in keras.compile)
-        .. code-block:: python
-            def set_compile_options(self):
-                self.compile_options = {"optimizer": keras.optimizers.SGD(),
-                                        "loss": keras.losses.mean_squared_error,
-                                        "metrics": ["mse", "mae"]}
-
-        Example (2)
-        .. code-block:: python
-            def set_compile_options(self):
-                self.optimizer = keras.optimizers.SGD()
-                self.loss = keras.losses.mean_squared_error
-                self.metrics = ["mse", "mae"]
-
-        Example (3)
-        Correct:
-        .. code-block:: python
-            def set_compile_options(self):
-                self.optimizer = keras.optimizers.SGD()
-                self.loss = keras.losses.mean_squared_error
-                self.compile_options = {"metrics": ["mse", "mae"]}
-
-        Incorrect: (Will raise an error)
-        .. code-block:: python
-            def set_compile_options(self):
-                self.optimizer = keras.optimizers.SGD()
-                self.loss = keras.losses.mean_squared_error
-                self.compile_options = {"optimizer" = keras.optimizers.Adam(), "metrics": ["mse", "mae"]}
-
-        Note:
-        * As long as the attribute and the dict value have exactly the same values, the setter method will not raise
-        an error
-        * For example (2) there is no check implemented, if the attributes are valid compile options
-
-
-        :return:
-        """
-        if self.__compile_options_is_set is False:
-            self.compile_options = None
-        return self.__compile_options
-
-    @compile_options.setter
-    def compile_options(self, value: Dict) -> None:
-        if isinstance(value, dict):
-            if not (set(value.keys()) <= set(self.__allowed_compile_options.keys())):
-                raise ValueError(f"Got invalid key for compile_options. {value.keys()}")
-
-        for allow_k in self.__allowed_compile_options.keys():
-            if hasattr(self, allow_k):
-                new_v_attr = getattr(self, allow_k)
-            else:
-                new_v_attr = None
-            if isinstance(value, dict):
-                new_v_dic = value.pop(allow_k, None)
-            elif value is None:
-                new_v_dic = None
-            else:
-                raise TypeError(f"`compile_options' must be `dict' or `None', but is {type(value)}.")
-            if (new_v_attr == new_v_dic or self.__compare_keras_optimizers(new_v_attr, new_v_dic)) or (
-                    (new_v_attr is None) ^ (new_v_dic is None)):
-                if new_v_attr is not None:
-                    self.__compile_options[allow_k] = new_v_attr
-                else:
-                    self.__compile_options[allow_k] = new_v_dic
-
-            else:
-                raise ValueError(
-                    f"Got different values or arguments for same argument: self.{allow_k}={new_v_attr.__class__} and '{allow_k}': {new_v_dic.__class__}")
-        self.__compile_options_is_set = True
-
-    @staticmethod
-    def __extract_from_tuple(tup):
-        """Return element of tuple if it contains only a single element."""
-        return tup[0] if isinstance(tup, tuple) and len(tup) == 1 else tup
-
-    @staticmethod
-    def __compare_keras_optimizers(first, second):
-        """
-        Compares if optimiser and all settings of the optimisers are exactly equal.
-
-        :return True if optimisers are interchangeable, or False if optimisers are distinguishable.
-        """
-        if first.__class__ == second.__class__ and first.__module__ == 'keras.optimizers':
-            res = True
-            init = tf.global_variables_initializer()
-            with tf.Session() as sess:
-                sess.run(init)
-                for k, v in first.__dict__.items():
-                    try:
-                        res *= sess.run(v) == sess.run(second.__dict__[k])
-                    except TypeError:
-                        res *= v == second.__dict__[k]
-        else:
-            res = False
-        return bool(res)
-
-    def get_settings(self) -> Dict:
-        """
-        Get all class attributes that are not protected in the AbstractModelClass as dictionary.
-
-        :return: all class attributes
-        """
-        return dict((k, v) for (k, v) in self.__dict__.items() if not k.startswith("_AbstractModelClass__"))
-
-    def set_model(self):
-        """Abstract method to set model."""
-        raise NotImplementedError
-
-    def set_compile_options(self):
-        """
-        This method only has to be defined in child class, when additional compile options should be used ()
-        (other options than optimizer and loss)
-        Has to be set as dictionary: {'optimizer': None,
-                                      'loss': None,
-                                      'metrics': None,
-                                      'loss_weights': None,
-                                      'sample_weight_mode': None,
-                                      'weighted_metrics': None,
-                                      'target_tensors': None
-                                      }
-
-        :return:
-        """
-        raise NotImplementedError
-
-    def set_custom_objects(self, **kwargs) -> None:
-        """
-        Set custom objects that are not part of keras framework.
-
-        These custom objects are needed if an already compiled model is loaded from disk. There is a special treatment
-        for the Padding2D class, which is a base class for different padding types. For a correct behaviour, all
-        supported subclasses are added as custom objects in addition to the given ones.
-
-        :param kwargs: all custom objects, that should be saved
-        """
-        if "Padding2D" in kwargs.keys():
-            kwargs.update(kwargs["Padding2D"].allowed_paddings)
-        self.custom_objects = kwargs
-
-
 class MyLittleModel(AbstractModelClass):
     """
     A customised model 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the output layer depending
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 127066b8..85b272cf 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -17,9 +17,9 @@ from mlair.data_handler import BootStraps, KerasIterator
 from mlair.helpers.datastore import NameNotFoundInDataStore
 from mlair.helpers import TimeTracking, statistics, extract_value, remove_items, to_list
 from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel
-from mlair.model_modules.model_class import AbstractModelClass
+from mlair.model_modules import AbstractModelClass
 from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStationMap, PlotClimatologicalSkillScore, \
-    PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotAvailability, PlotAvailabilityHistogram,  \
+    PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotAvailability, PlotAvailabilityHistogram, \
     PlotConditionalQuantiles, PlotSeparationOfScales
 from mlair.run_modules.run_environment import RunEnvironment
 
diff --git a/test/test_model_modules/test_model_class.py b/test/test_model_modules/test_model_class.py
index 28218eb6..f93f9154 100644
--- a/test/test_model_modules/test_model_class.py
+++ b/test/test_model_modules/test_model_class.py
@@ -1,7 +1,7 @@
 import keras
 import pytest
 
-from mlair.model_modules.model_class import AbstractModelClass
+from mlair.model_modules import AbstractModelClass
 from mlair.model_modules.model_class import MyPaperModel
 
 
diff --git a/test/test_run_modules/test_model_setup.py b/test/test_run_modules/test_model_setup.py
index 38210534..bc442126 100644
--- a/test/test_run_modules/test_model_setup.py
+++ b/test/test_run_modules/test_model_setup.py
@@ -8,7 +8,8 @@ from mlair.data_handler import KerasIterator
 from mlair.data_handler import DataCollection
 from mlair.helpers.datastore import EmptyScope
 from mlair.model_modules.keras_extensions import CallbackHandler
-from mlair.model_modules.model_class import AbstractModelClass, MyLittleModel
+from mlair.model_modules.model_class import MyLittleModel
+from mlair.model_modules import AbstractModelClass
 from mlair.run_modules.model_setup import ModelSetup
 from mlair.run_modules.run_environment import RunEnvironment
 
-- 
GitLab


From 72f271c64ef8069bed389f6757265f89696716fa Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 25 Feb 2021 10:34:18 +0100
Subject: [PATCH 002/175] new FCN module, model classes can now use external
 parameters if provided

---
 .../model_modules/fully_connected_networks.py | 128 ++++++++++++++++++
 mlair/run_modules/model_setup.py              |   6 +-
 2 files changed, 131 insertions(+), 3 deletions(-)
 create mode 100644 mlair/model_modules/fully_connected_networks.py

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
new file mode 100644
index 00000000..e9d577e8
--- /dev/null
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -0,0 +1,128 @@
+__author__ = "Lukas Leufen"
+__date__ = '2021-02-'
+
+from mlair.model_modules import AbstractModelClass
+from mlair.helpers import select_from_dict
+
+import keras
+
+
+class FCN_64_32_16(AbstractModelClass):
+    """
+    A customised model 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the output layer depending
+    on the window_lead_time parameter.
+    """
+
+    def __init__(self, input_shape: list, output_shape: list):
+        """
+        Sets model and loss depending on the given arguments.
+
+        :param input_shape: list of input shapes (expect len=1 with shape=(window_hist, station, variables))
+        :param output_shape: list of output shapes (expect len=1 with shape=(window_forecast))
+        """
+
+        assert len(input_shape) == 1
+        assert len(output_shape) == 1
+        super().__init__(input_shape[0], output_shape[0])
+
+        # settings
+        self.dropout_rate = 0.1
+        self.regularizer = keras.regularizers.l2(0.1)
+        self.activation = keras.layers.PReLU
+
+        # apply to model
+        self.set_model()
+        self.set_compile_options()
+        self.set_custom_objects(loss=self.compile_options['loss'])
+
+    def set_model(self):
+        """
+        Build the model.
+        """
+        x_input = keras.layers.Input(shape=self._input_shape)
+        x_in = keras.layers.Flatten(name='{}'.format("major"))(x_input)
+        x_in = keras.layers.Dense(64, name='{}_Dense_64'.format("major"))(x_in)
+        x_in = self.activation()(x_in)
+        x_in = keras.layers.Dense(32, name='{}_Dense_32'.format("major"))(x_in)
+        x_in = self.activation()(x_in)
+        x_in = keras.layers.Dense(16, name='{}_Dense_16'.format("major"))(x_in)
+        x_in = self.activation()(x_in)
+        x_in = keras.layers.Dense(self._output_shape, name='{}_Dense'.format("major"))(x_in)
+        out_main = self.activation()(x_in)
+        self.model = keras.Model(inputs=x_input, outputs=[out_main])
+
+    def set_compile_options(self):
+        self.initial_lr = 1e-2
+        self.optimizer = keras.optimizers.adam(lr=self.initial_lr)
+        self.compile_options = {"loss": [keras.losses.mean_squared_error], "metrics": ["mse", "mae"]}
+
+
+class FCN(AbstractModelClass):
+    """
+    A customised model 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the output layer depending
+    on the window_lead_time parameter.
+    """
+
+    _activation = {"relu": keras.layers.ReLU(), "tanh": keras.layers.Activation("tanh"),
+                   "sigmoid": keras.layers.Activation("sigmoid")}
+    _optimizer = {"adam": keras.optimizers.adam, "sgd": keras.optimizers.SGD}
+    _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov"]
+
+    def __init__(self, input_shape: list, output_shape: list, activation="relu", optimizer="adam",
+                 layers=1, neurons=10, **kwargs):
+        """
+        Sets model and loss depending on the given arguments.
+
+        :param input_shape: list of input shapes (expect len=1 with shape=(window_hist, station, variables))
+        :param output_shape: list of output shapes (expect len=1 with shape=(window_forecast))
+        """
+
+        assert len(input_shape) == 1
+        assert len(output_shape) == 1
+        super().__init__(input_shape[0], output_shape[0])
+
+        # settings
+        self.activation = self._set_activation(activation)
+        self.optimizer = self._set_optimizer(optimizer, **kwargs)
+        self.layer_configuration = (layers, neurons)
+
+        # apply to model
+        self.set_model()
+        self.set_compile_options()
+        # self.set_custom_objects(loss=self.compile_options['loss'])
+
+    def _set_activation(self, activation):
+        try:
+            return self._activation.get(activation.lower())
+        except KeyError:
+            raise AttributeError(f"Given activation {activation} is not supported in this model class.")
+
+    def _set_optimizer(self, optimizer, **kwargs):
+        try:
+            opt_name = optimizer.lower()
+            opt = self._optimizer.get(opt_name)
+            opt_kwargs = {}
+            if opt_name == "adam":
+                opt_kwargs = select_from_dict(kwargs, ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad"])
+            elif opt_name == "sgd":
+                opt_kwargs = select_from_dict(kwargs, ["lr", "momentum", "decay", "nesterov"])
+            return opt(**opt_kwargs)
+        except KeyError:
+            raise AttributeError(f"Given optimizer {optimizer} is not supported in this model class.")
+
+    def set_model(self):
+        """
+        Build the model.
+        """
+        x_input = keras.layers.Input(shape=self._input_shape)
+        x_in = keras.layers.Flatten()(x_input)
+        n_layer, n_hidden = self.layer_configuration
+        for layer in range(n_layer):
+            x_in = keras.layers.Dense(n_hidden)(x_in)
+            x_in = self.activation(x_in)
+        x_in = keras.layers.Dense(self._output_shape)(x_in)
+        out = self.activation(x_in)
+        self.model = keras.Model(inputs=x_input, outputs=[out])
+
+    def set_compile_options(self):
+        self.compile_options = {"loss": [keras.losses.mean_squared_error], "metrics": ["mse", "mae"]}
diff --git a/mlair/run_modules/model_setup.py b/mlair/run_modules/model_setup.py
index dda18fac..feaaff9b 100644
--- a/mlair/run_modules/model_setup.py
+++ b/mlair/run_modules/model_setup.py
@@ -56,7 +56,6 @@ class ModelSetup(RunEnvironment):
         """Initialise and run model setup."""
         super().__init__()
         self.model = None
-        # path = self.data_store.get("experiment_path")
         exp_name = self.data_store.get("experiment_name")
         path = self.data_store.get("model_path")
         self.scope = "model"
@@ -138,9 +137,10 @@ class ModelSetup(RunEnvironment):
 
     def build_model(self):
         """Build model using input and output shapes from data store."""
-        args_list = ["input_shape", "output_shape"]
-        args = self.data_store.create_args_dict(args_list, self.scope)
+        # args_list = ["input_shape", "output_shape"]
         model = self.data_store.get("model_class")
+        args_list = model.requirements()
+        args = self.data_store.create_args_dict(args_list, self.scope)
         self.model = model(**args)
         self.get_model_settings()
 
-- 
GitLab


From 56dd2c7a1ec97ecb6337d163a49a12bc9c60cebd Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 25 Feb 2021 11:06:48 +0100
Subject: [PATCH 003/175] FCN name is adjusted to number of layers and hidden
 units, model setup can now report all settings of optimizer too

---
 mlair/model_modules/fully_connected_networks.py | 15 ++++++++++++---
 mlair/run_modules/model_setup.py                |  2 +-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index e9d577e8..a4c61b5b 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -1,6 +1,8 @@
 __author__ = "Lukas Leufen"
 __date__ = '2021-02-'
 
+from functools import reduce
+
 from mlair.model_modules import AbstractModelClass
 from mlair.helpers import select_from_dict
 
@@ -59,7 +61,7 @@ class FCN_64_32_16(AbstractModelClass):
 
 class FCN(AbstractModelClass):
     """
-    A customised model 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the output layer depending
+    A customisable fully connected network (64, 32, 16, window_lead_time), where the last layer is the output layer depending
     on the window_lead_time parameter.
     """
 
@@ -69,7 +71,7 @@ class FCN(AbstractModelClass):
     _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov"]
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", optimizer="adam",
-                 layers=1, neurons=10, **kwargs):
+                 n_layer=1, n_hidden=10, **kwargs):
         """
         Sets model and loss depending on the given arguments.
 
@@ -84,7 +86,8 @@ class FCN(AbstractModelClass):
         # settings
         self.activation = self._set_activation(activation)
         self.optimizer = self._set_optimizer(optimizer, **kwargs)
-        self.layer_configuration = (layers, neurons)
+        self.layer_configuration = (n_layer, n_hidden)
+        self._update_model_name()
 
         # apply to model
         self.set_model()
@@ -110,6 +113,12 @@ class FCN(AbstractModelClass):
         except KeyError:
             raise AttributeError(f"Given optimizer {optimizer} is not supported in this model class.")
 
+    def _update_model_name(self):
+        n_layer, n_hidden = self.layer_configuration
+        n_input = str(reduce(lambda x, y: x * y, self._input_shape))
+        n_output = str(self._output_shape)
+        self.model_name += "_".join([n_input, *[f"{n_hidden}" for _ in range(n_layer)], n_output])
+
     def set_model(self):
         """
         Build the model.
diff --git a/mlair/run_modules/model_setup.py b/mlair/run_modules/model_setup.py
index feaaff9b..5dd73d50 100644
--- a/mlair/run_modules/model_setup.py
+++ b/mlair/run_modules/model_setup.py
@@ -137,7 +137,6 @@ class ModelSetup(RunEnvironment):
 
     def build_model(self):
         """Build model using input and output shapes from data store."""
-        # args_list = ["input_shape", "output_shape"]
         model = self.data_store.get("model_class")
         args_list = model.requirements()
         args = self.data_store.create_args_dict(args_list, self.scope)
@@ -170,6 +169,7 @@ class ModelSetup(RunEnvironment):
     def report_model(self):
         model_settings = self.model.get_settings()
         model_settings.update(self.model.compile_options)
+        model_settings.update(self.model.optimizer.get_config())
         df = pd.DataFrame(columns=["model setting"])
         for k, v in model_settings.items():
             if v is None:
-- 
GitLab


From 32a171f77192917a674483425cd38c355f237055 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 25 Feb 2021 11:19:48 +0100
Subject: [PATCH 004/175] MyLittleModel is replaced by FCN_64_32_16 (both
 models have same architecture)

---
 .../model_modules/fully_connected_networks.py | 17 +++---
 mlair/model_modules/model_class.py            | 57 -------------------
 mlair/run_modules/experiment_setup.py         |  2 +-
 test/test_data_handler/test_iterator.py       |  6 +-
 test/test_run_modules/test_model_setup.py     |  4 +-
 5 files changed, 13 insertions(+), 73 deletions(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index a4c61b5b..313fc837 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -28,8 +28,6 @@ class FCN_64_32_16(AbstractModelClass):
         super().__init__(input_shape[0], output_shape[0])
 
         # settings
-        self.dropout_rate = 0.1
-        self.regularizer = keras.regularizers.l2(0.1)
         self.activation = keras.layers.PReLU
 
         # apply to model
@@ -42,20 +40,19 @@ class FCN_64_32_16(AbstractModelClass):
         Build the model.
         """
         x_input = keras.layers.Input(shape=self._input_shape)
-        x_in = keras.layers.Flatten(name='{}'.format("major"))(x_input)
-        x_in = keras.layers.Dense(64, name='{}_Dense_64'.format("major"))(x_in)
+        x_in = keras.layers.Flatten()(x_input)
+        x_in = keras.layers.Dense(64, name="Dense_64")(x_in)
         x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(32, name='{}_Dense_32'.format("major"))(x_in)
+        x_in = keras.layers.Dense(32, name="Dense_32")(x_in)
         x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(16, name='{}_Dense_16'.format("major"))(x_in)
+        x_in = keras.layers.Dense(16, name="Dense_16")(x_in)
         x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(self._output_shape, name='{}_Dense'.format("major"))(x_in)
+        x_in = keras.layers.Dense(self._output_shape, name="Dense_output")(x_in)
         out_main = self.activation()(x_in)
         self.model = keras.Model(inputs=x_input, outputs=[out_main])
 
     def set_compile_options(self):
-        self.initial_lr = 1e-2
-        self.optimizer = keras.optimizers.adam(lr=self.initial_lr)
+        self.optimizer = keras.optimizers.adam(lr=1e-2)
         self.compile_options = {"loss": [keras.losses.mean_squared_error], "metrics": ["mse", "mae"]}
 
 
@@ -117,7 +114,7 @@ class FCN(AbstractModelClass):
         n_layer, n_hidden = self.layer_configuration
         n_input = str(reduce(lambda x, y: x * y, self._input_shape))
         n_output = str(self._output_shape)
-        self.model_name += "_".join([n_input, *[f"{n_hidden}" for _ in range(n_layer)], n_output])
+        self.model_name += "_".join(["", n_input, *[f"{n_hidden}" for _ in range(n_layer)], n_output])
 
     def set_model(self):
         """
diff --git a/mlair/model_modules/model_class.py b/mlair/model_modules/model_class.py
index f6e97987..f8e3a21a 100644
--- a/mlair/model_modules/model_class.py
+++ b/mlair/model_modules/model_class.py
@@ -128,58 +128,6 @@ from mlair.model_modules.flatten import flatten_tail
 from mlair.model_modules.advanced_paddings import PadUtils, Padding2D, SymmetricPadding2D
 
 
-class MyLittleModel(AbstractModelClass):
-    """
-    A customised model 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the output layer depending
-    on the window_lead_time parameter.
-    """
-
-    def __init__(self, input_shape: list, output_shape: list):
-        """
-        Sets model and loss depending on the given arguments.
-
-        :param input_shape: list of input shapes (expect len=1 with shape=(window_hist, station, variables))
-        :param output_shape: list of output shapes (expect len=1 with shape=(window_forecast))
-        """
-
-        assert len(input_shape) == 1
-        assert len(output_shape) == 1
-        super().__init__(input_shape[0], output_shape[0])
-
-        # settings
-        self.dropout_rate = 0.1
-        self.regularizer = keras.regularizers.l2(0.1)
-        self.activation = keras.layers.PReLU
-
-        # apply to model
-        self.set_model()
-        self.set_compile_options()
-        self.set_custom_objects(loss=self.compile_options['loss'])
-
-    def set_model(self):
-        """
-        Build the model.
-        """
-        x_input = keras.layers.Input(shape=self._input_shape)
-        x_in = keras.layers.Flatten(name='{}'.format("major"))(x_input)
-        x_in = keras.layers.Dense(64, name='{}_Dense_64'.format("major"))(x_in)
-        x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(32, name='{}_Dense_32'.format("major"))(x_in)
-        x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(16, name='{}_Dense_16'.format("major"))(x_in)
-        x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(self._output_shape, name='{}_Dense'.format("major"))(x_in)
-        out_main = self.activation()(x_in)
-        self.model = keras.Model(inputs=x_input, outputs=[out_main])
-
-    def set_compile_options(self):
-        self.initial_lr = 1e-2
-        self.optimizer = keras.optimizers.adam(lr=self.initial_lr)
-        # self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, drop=.94,
-        #                                                                        epochs_drop=10)
-        self.compile_options = {"loss": [keras.losses.mean_squared_error], "metrics": ["mse", "mae"]}
-
-
 class MyLittleModelHourly(AbstractModelClass):
     """
     A customised model with a 1x1 Conv, and 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the
@@ -529,8 +477,3 @@ class MyPaperModel(AbstractModelClass):
         self.optimizer = keras.optimizers.SGD(lr=self.initial_lr, momentum=0.9)
         self.compile_options = {"loss": [keras.losses.mean_squared_error, keras.losses.mean_squared_error],
                                 "metrics": ['mse', 'mae']}
-
-
-if __name__ == "__main__":
-    model = MyLittleModel([(1, 3, 10)], [2])
-    print(model.compile_options)
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index af540fc2..30672ecc 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -20,7 +20,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
     DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM
 from mlair.data_handler import DefaultDataHandler
 from mlair.run_modules.run_environment import RunEnvironment
-from mlair.model_modules.model_class import MyLittleModel as VanillaModel
+from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel
 
 
 class ExperimentSetup(RunEnvironment):
diff --git a/test/test_data_handler/test_iterator.py b/test/test_data_handler/test_iterator.py
index ade5c192..e47d725a 100644
--- a/test/test_data_handler/test_iterator.py
+++ b/test/test_data_handler/test_iterator.py
@@ -1,7 +1,7 @@
-
 from mlair.data_handler.iterator import DataCollection, StandardIterator, KerasIterator
 from mlair.helpers.testing import PyTestAllEqual
-from mlair.model_modules.model_class import MyLittleModel, MyBranchedModel
+from mlair.model_modules.model_class import MyBranchedModel
+from mlair.model_modules.fully_connected_networks import FCN_64_32_16
 
 import numpy as np
 import pytest
@@ -275,7 +275,7 @@ class TestKerasIterator:
 
     def test_get_model_rank_single_output_branch(self):
         iterator = object.__new__(KerasIterator)
-        iterator.model = MyLittleModel(input_shape=[(14, 1, 2)], output_shape=[(3,)])
+        iterator.model = FCN_64_32_16(input_shape=[(14, 1, 2)], output_shape=[(3,)])
         assert iterator._get_model_rank() == 1
 
     def test_get_model_rank_multiple_output_branch(self):
diff --git a/test/test_run_modules/test_model_setup.py b/test/test_run_modules/test_model_setup.py
index bc442126..7a437853 100644
--- a/test/test_run_modules/test_model_setup.py
+++ b/test/test_run_modules/test_model_setup.py
@@ -8,7 +8,7 @@ from mlair.data_handler import KerasIterator
 from mlair.data_handler import DataCollection
 from mlair.helpers.datastore import EmptyScope
 from mlair.model_modules.keras_extensions import CallbackHandler
-from mlair.model_modules.model_class import MyLittleModel
+from mlair.model_modules.fully_connected_networks import FCN_64_32_16
 from mlair.model_modules import AbstractModelClass
 from mlair.run_modules.model_setup import ModelSetup
 from mlair.run_modules.run_environment import RunEnvironment
@@ -23,7 +23,7 @@ class TestModelSetup:
         obj.scope = "general.model"
         obj.model = None
         obj.callbacks_name = "placeholder_%s_str.pickle"
-        obj.data_store.set("model_class", MyLittleModel)
+        obj.data_store.set("model_class", FCN_64_32_16)
         obj.data_store.set("lr_decay", "dummy_str", "general.model")
         obj.data_store.set("hist", "dummy_str", "general.model")
         obj.data_store.set("epochs", 2)
-- 
GitLab


From bae9ec837d57327f07db555961d8ac368f71f52b Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 25 Feb 2021 12:21:08 +0100
Subject: [PATCH 005/175] tests are updated, conftest shouldn't fail on
 teardown

---
 conftest.py                                  | 13 ++++++++-----
 test/test_model_modules/test_flatten_tail.py |  2 +-
 test/test_run_modules/test_model_setup.py    |  3 +--
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/conftest.py b/conftest.py
index 08641ff3..abb0c0f5 100644
--- a/conftest.py
+++ b/conftest.py
@@ -58,10 +58,13 @@ def default_session_fixture(request):
     :type request: _pytest.python.SubRequest
     :return:
     """
-    patched = mock.patch("multiprocessing.cpu_count", return_value=1)
-    patched.__enter__()
+    # patched = mock.patch("multiprocessing.cpu_count", return_value=1)
+    # patched.__enter__()
 
-    def unpatch():
-        patched.__exit__()
+    # def unpatch():
+    #    patched.__exit__()
 
-    request.addfinalizer(unpatch)
+    # request.addfinalizer(unpatch)
+
+    with mock.patch("multiprocessing.cpu_count", return_value=1):
+        yield
diff --git a/test/test_model_modules/test_flatten_tail.py b/test/test_model_modules/test_flatten_tail.py
index 623d51c0..46d63e75 100644
--- a/test/test_model_modules/test_flatten_tail.py
+++ b/test/test_model_modules/test_flatten_tail.py
@@ -91,7 +91,7 @@ class TestFlattenTail:
         assert final_dropout.rate == 0.35
 
         inner_act = self.step_in(final_dropout)
-        assert inner_act.get_config() == {'name': 'activation_1', 'trainable': True, 'activation': 'tanh'}
+        assert inner_act.get_config() == {'name': 'activation_3', 'trainable': True, 'activation': 'tanh'}
 
         inner_dense = self.step_in(inner_act)
         assert inner_dense.units == 64
diff --git a/test/test_run_modules/test_model_setup.py b/test/test_run_modules/test_model_setup.py
index 7a437853..8a757214 100644
--- a/test/test_run_modules/test_model_setup.py
+++ b/test/test_run_modules/test_model_setup.py
@@ -103,8 +103,7 @@ class TestModelSetup:
         assert setup_with_gen.model is None
         setup_with_gen.build_model()
         assert isinstance(setup_with_gen.model, AbstractModelClass)
-        expected = {"lr_decay", "model_name", "dropout_rate", "regularizer", "initial_lr", "optimizer", "activation",
-                    "input_shape", "output_shape"}
+        expected = {"lr_decay", "model_name", "optimizer", "activation", "input_shape", "output_shape"}
         assert expected <= self.current_scope_as_set(setup_with_gen)
 
     def test_set_shapes(self, setup_with_gen_tiny):
-- 
GitLab


From 33a2d164c2bc781787a261a0f27f7c5729bd2caf Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 25 Feb 2021 15:22:45 +0100
Subject: [PATCH 006/175] corrected activation

---
 mlair/model_modules/fully_connected_networks.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 313fc837..940c9846 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -1,7 +1,7 @@
 __author__ = "Lukas Leufen"
 __date__ = '2021-02-'
 
-from functools import reduce
+from functools import reduce, partial
 
 from mlair.model_modules import AbstractModelClass
 from mlair.helpers import select_from_dict
@@ -62,8 +62,8 @@ class FCN(AbstractModelClass):
     on the window_lead_time parameter.
     """
 
-    _activation = {"relu": keras.layers.ReLU(), "tanh": keras.layers.Activation("tanh"),
-                   "sigmoid": keras.layers.Activation("sigmoid")}
+    _activation = {"relu": keras.layers.ReLU, "tanh": partial(keras.layers.Activation, "tanh"),
+                   "sigmoid": partial(keras.layers.Activation, "sigmoid")}
     _optimizer = {"adam": keras.optimizers.adam, "sgd": keras.optimizers.SGD}
     _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov"]
 
@@ -125,9 +125,9 @@ class FCN(AbstractModelClass):
         n_layer, n_hidden = self.layer_configuration
         for layer in range(n_layer):
             x_in = keras.layers.Dense(n_hidden)(x_in)
-            x_in = self.activation(x_in)
+            x_in = self.activation()(x_in)
         x_in = keras.layers.Dense(self._output_shape)(x_in)
-        out = self.activation(x_in)
+        out = self.activation()(x_in)
         self.model = keras.Model(inputs=x_input, outputs=[out])
 
     def set_compile_options(self):
-- 
GitLab


From 97367ce0f2273717df060d0fd642ebd557148f4c Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 26 Feb 2021 08:07:49 +0100
Subject: [PATCH 007/175] separated model class tests

---
 .../test_abstract_model_class.py              | 199 ++++++++++++++++++
 test/test_model_modules/test_flatten_tail.py  |   2 +-
 test/test_model_modules/test_model_class.py   | 196 -----------------
 3 files changed, 200 insertions(+), 197 deletions(-)
 create mode 100644 test/test_model_modules/test_abstract_model_class.py

diff --git a/test/test_model_modules/test_abstract_model_class.py b/test/test_model_modules/test_abstract_model_class.py
new file mode 100644
index 00000000..dfef68d5
--- /dev/null
+++ b/test/test_model_modules/test_abstract_model_class.py
@@ -0,0 +1,199 @@
+import keras
+import pytest
+
+from mlair import AbstractModelClass
+
+
+class Paddings:
+    allowed_paddings = {"pad1": 34, "another_pad": True}
+
+
+class AbstractModelSubClass(AbstractModelClass):
+
+    def __init__(self):
+        super().__init__(input_shape=(12, 1, 2), output_shape=3)
+        self.test_attr = "testAttr"
+
+
+class TestAbstractModelClass:
+
+    @pytest.fixture
+    def amc(self):
+        return AbstractModelClass(input_shape=(14, 1, 2), output_shape=(3,))
+
+    @pytest.fixture
+    def amsc(self):
+        return AbstractModelSubClass()
+
+    def test_init(self, amc):
+        assert amc.model is None
+        # assert amc.loss is None
+        assert amc.model_name == "AbstractModelClass"
+        assert amc.custom_objects == {}
+        assert amc._input_shape == (14, 1, 2)
+        assert amc._output_shape == 3
+
+    def test_model_property(self, amc):
+        amc.model = keras.Model()
+        assert isinstance(amc.model, keras.Model) is True
+
+    # def test_loss_property(self, amc):
+    #     amc.loss = keras.losses.mean_absolute_error
+    #     assert amc.loss == keras.losses.mean_absolute_error
+
+    def test_compile_options_setter_all_empty(self, amc):
+        amc.compile_options = None
+        assert amc.compile_options == {'optimizer': None,
+                                       'loss': None,
+                                       'metrics': None,
+                                       'loss_weights': None,
+                                       'sample_weight_mode': None,
+                                       'weighted_metrics': None,
+                                       'target_tensors': None
+                                       }
+
+    def test_compile_options_setter_as_dict(self, amc):
+        amc.compile_options = {"optimizer": keras.optimizers.SGD(),
+                               "loss": keras.losses.mean_absolute_error,
+                               "metrics": ["mse", "mae"]}
+        assert isinstance(amc.compile_options["optimizer"], keras.optimizers.SGD)
+        assert amc.compile_options["loss"] == keras.losses.mean_absolute_error
+        assert amc.compile_options["metrics"] == ["mse", "mae"]
+        assert amc.compile_options["loss_weights"] is None
+        assert amc.compile_options["sample_weight_mode"] is None
+        assert amc.compile_options["target_tensors"] is None
+        assert amc.compile_options["weighted_metrics"] is None
+
+    def test_compile_options_setter_as_attr(self, amc):
+        amc.optimizer = keras.optimizers.SGD()
+        amc.loss = keras.losses.mean_absolute_error
+        amc.compile_options = None  # This line has to be called!
+        # optimizer check
+        assert isinstance(amc.optimizer, keras.optimizers.SGD)
+        assert isinstance(amc.compile_options["optimizer"], keras.optimizers.SGD)
+        # loss check
+        assert amc.loss == keras.losses.mean_absolute_error
+        assert amc.compile_options["loss"] == keras.losses.mean_absolute_error
+        # check rest (all None as not set)
+        assert amc.compile_options["metrics"] is None
+        assert amc.compile_options["loss_weights"] is None
+        assert amc.compile_options["sample_weight_mode"] is None
+        assert amc.compile_options["target_tensors"] is None
+        assert amc.compile_options["weighted_metrics"] is None
+
+    def test_compile_options_setter_as_mix_attr_dict_no_duplicates(self, amc):
+        amc.optimizer = keras.optimizers.SGD()
+        amc.compile_options = {"loss": keras.losses.mean_absolute_error,
+                               "loss_weights": [0.2, 0.8]}
+        # check setting by attribute
+        assert isinstance(amc.optimizer, keras.optimizers.SGD)
+        assert isinstance(amc.compile_options["optimizer"], keras.optimizers.SGD)
+        # check setting by dict
+        assert amc.compile_options["loss"] == keras.losses.mean_absolute_error
+        assert amc.compile_options["loss_weights"] == [0.2, 0.8]
+        # check rest (all None as not set)
+        assert amc.compile_options["metrics"] is None
+        assert amc.compile_options["sample_weight_mode"] is None
+        assert amc.compile_options["target_tensors"] is None
+        assert amc.compile_options["weighted_metrics"] is None
+
+    def test_compile_options_setter_as_mix_attr_dict_valid_duplicates_optimizer(self, amc):
+        amc.optimizer = keras.optimizers.SGD()
+        amc.metrics = ['mse']
+        amc.compile_options = {"optimizer": keras.optimizers.SGD(),
+                               "loss": keras.losses.mean_absolute_error}
+        # check duplicate (attr and dic)
+        assert isinstance(amc.optimizer, keras.optimizers.SGD)
+        assert isinstance(amc.compile_options["optimizer"], keras.optimizers.SGD)
+        # check setting by dict
+        assert amc.compile_options["loss"] == keras.losses.mean_absolute_error
+        # check setting by attr
+        assert amc.metrics == ['mse']
+        assert amc.compile_options["metrics"] == ['mse']
+        # check rest (all None as not set)
+        assert amc.compile_options["loss_weights"] is None
+        assert amc.compile_options["sample_weight_mode"] is None
+        assert amc.compile_options["target_tensors"] is None
+        assert amc.compile_options["weighted_metrics"] is None
+
+    def test_compile_options_setter_as_mix_attr_dict_valid_duplicates_none_optimizer(self, amc):
+        amc.optimizer = keras.optimizers.SGD()
+        amc.metrics = ['mse']
+        amc.compile_options = {"metrics": ['mse'],
+                               "loss": keras.losses.mean_absolute_error}
+        # check duplicate (attr and dic)
+        assert amc.metrics == ['mse']
+        assert amc.compile_options["metrics"] == ['mse']
+        # check setting by dict
+        assert amc.compile_options["loss"] == keras.losses.mean_absolute_error
+        # check setting by attr
+        assert isinstance(amc.optimizer, keras.optimizers.SGD)
+        assert isinstance(amc.compile_options["optimizer"], keras.optimizers.SGD)
+        # check rest (all None as not set)
+        assert amc.compile_options["loss_weights"] is None
+        assert amc.compile_options["sample_weight_mode"] is None
+        assert amc.compile_options["target_tensors"] is None
+        assert amc.compile_options["weighted_metrics"] is None
+
+    def test_compile_options_property_type_error(self, amc):
+        with pytest.raises(TypeError) as einfo:
+            amc.compile_options = 'hello world'
+        assert "`compile_options' must be `dict' or `None', but is <class 'str'>." in str(einfo.value)
+
+    def test_compile_options_setter_as_mix_attr_dict_invalid_duplicates_other_optimizer(self, amc):
+        amc.optimizer = keras.optimizers.SGD()
+        with pytest.raises(ValueError) as einfo:
+            amc.compile_options = {"optimizer": keras.optimizers.Adam()}
+        assert "Got different values or arguments for same argument: self.optimizer=<class" \
+               " 'keras.optimizers.SGD'> and 'optimizer': <class 'keras.optimizers.Adam'>" in str(einfo.value)
+
+    def test_compile_options_setter_as_mix_attr_dict_invalid_duplicates_same_optimizer_other_args(self, amc):
+        amc.optimizer = keras.optimizers.SGD(lr=0.1)
+        with pytest.raises(ValueError) as einfo:
+            amc.compile_options = {"optimizer": keras.optimizers.SGD(lr=0.001)}
+        assert "Got different values or arguments for same argument: self.optimizer=<class" \
+               " 'keras.optimizers.SGD'> and 'optimizer': <class 'keras.optimizers.SGD'>" in str(einfo.value)
+
+    def test_compile_options_setter_as_dict_invalid_keys(self, amc):
+        with pytest.raises(ValueError) as einfo:
+            amc.compile_options = {"optimizer": keras.optimizers.SGD(), "InvalidKeyword": [1, 2, 3]}
+        assert "Got invalid key for compile_options. dict_keys(['optimizer', 'InvalidKeyword'])" in str(einfo.value)
+
+    def test_compare_keras_optimizers_equal(self, amc):
+        assert amc._AbstractModelClass__compare_keras_optimizers(keras.optimizers.SGD(), keras.optimizers.SGD()) is True
+
+    def test_compare_keras_optimizers_no_optimizer(self, amc):
+        assert amc._AbstractModelClass__compare_keras_optimizers('NoOptimizer', keras.optimizers.SGD()) is False
+
+    def test_compare_keras_optimizers_other_parameters_run_sess(self, amc):
+        assert amc._AbstractModelClass__compare_keras_optimizers(keras.optimizers.SGD(lr=0.1),
+                                                                 keras.optimizers.SGD(lr=0.01)) is False
+
+    def test_compare_keras_optimizers_other_parameters_none_sess(self, amc):
+        assert amc._AbstractModelClass__compare_keras_optimizers(keras.optimizers.SGD(decay=1),
+                                                                 keras.optimizers.SGD(decay=0.01)) is False
+
+    def test_getattr(self, amc):
+        amc.model = keras.Model()
+        assert hasattr(amc, "compile") is True
+        assert hasattr(amc.model, "compile") is True
+        assert amc.compile == amc.model.compile
+
+    def test_get_settings(self, amc, amsc):
+        assert amc.get_settings() == {"model_name": "AbstractModelClass", "_input_shape": (14, 1, 2),
+                                      "_output_shape": 3}
+        assert amsc.get_settings() == {"test_attr": "testAttr", "model_name": "AbstractModelSubClass",
+                                       "_input_shape": (12, 1, 2), "_output_shape": 3}
+
+    def test_custom_objects(self, amc):
+        amc.custom_objects = {"Test": 123}
+        assert amc.custom_objects == {"Test": 123}
+
+    def test_set_custom_objects(self, amc):
+        amc.set_custom_objects(Test=22, minor_param="minor")
+        assert amc.custom_objects == {"Test": 22, "minor_param": "minor"}
+        amc.set_custom_objects(Test=2, minor_param1="minor1")
+        assert amc.custom_objects == {"Test": 2, "minor_param1": "minor1"}
+        paddings = Paddings()
+        amc.set_custom_objects(Test=1, Padding2D=paddings)
+        assert amc.custom_objects == {"Test": 1, "Padding2D": paddings, "pad1": 34, "another_pad": True}
diff --git a/test/test_model_modules/test_flatten_tail.py b/test/test_model_modules/test_flatten_tail.py
index 46d63e75..623d51c0 100644
--- a/test/test_model_modules/test_flatten_tail.py
+++ b/test/test_model_modules/test_flatten_tail.py
@@ -91,7 +91,7 @@ class TestFlattenTail:
         assert final_dropout.rate == 0.35
 
         inner_act = self.step_in(final_dropout)
-        assert inner_act.get_config() == {'name': 'activation_3', 'trainable': True, 'activation': 'tanh'}
+        assert inner_act.get_config() == {'name': 'activation_1', 'trainable': True, 'activation': 'tanh'}
 
         inner_dense = self.step_in(inner_act)
         assert inner_dense.units == 64
diff --git a/test/test_model_modules/test_model_class.py b/test/test_model_modules/test_model_class.py
index f93f9154..cbff4cec 100644
--- a/test/test_model_modules/test_model_class.py
+++ b/test/test_model_modules/test_model_class.py
@@ -1,205 +1,9 @@
 import keras
 import pytest
 
-from mlair.model_modules import AbstractModelClass
 from mlair.model_modules.model_class import MyPaperModel
 
 
-class Paddings:
-    allowed_paddings = {"pad1": 34, "another_pad": True}
-
-
-class AbstractModelSubClass(AbstractModelClass):
-
-    def __init__(self):
-        super().__init__(input_shape=(12, 1, 2), output_shape=3)
-        self.test_attr = "testAttr"
-
-
-class TestAbstractModelClass:
-
-    @pytest.fixture
-    def amc(self):
-        return AbstractModelClass(input_shape=(14, 1, 2), output_shape=(3,))
-
-    @pytest.fixture
-    def amsc(self):
-        return AbstractModelSubClass()
-
-    def test_init(self, amc):
-        assert amc.model is None
-        # assert amc.loss is None
-        assert amc.model_name == "AbstractModelClass"
-        assert amc.custom_objects == {}
-        assert amc._input_shape == (14, 1, 2)
-        assert amc._output_shape == 3
-
-    def test_model_property(self, amc):
-        amc.model = keras.Model()
-        assert isinstance(amc.model, keras.Model) is True
-
-    # def test_loss_property(self, amc):
-    #     amc.loss = keras.losses.mean_absolute_error
-    #     assert amc.loss == keras.losses.mean_absolute_error
-
-    def test_compile_options_setter_all_empty(self, amc):
-        amc.compile_options = None
-        assert amc.compile_options == {'optimizer': None,
-                                       'loss': None,
-                                       'metrics': None,
-                                       'loss_weights': None,
-                                       'sample_weight_mode': None,
-                                       'weighted_metrics': None,
-                                       'target_tensors': None
-                                       }
-
-    def test_compile_options_setter_as_dict(self, amc):
-        amc.compile_options = {"optimizer": keras.optimizers.SGD(),
-                               "loss": keras.losses.mean_absolute_error,
-                               "metrics": ["mse", "mae"]}
-        assert isinstance(amc.compile_options["optimizer"], keras.optimizers.SGD)
-        assert amc.compile_options["loss"] == keras.losses.mean_absolute_error
-        assert amc.compile_options["metrics"] == ["mse", "mae"]
-        assert amc.compile_options["loss_weights"] is None
-        assert amc.compile_options["sample_weight_mode"] is None
-        assert amc.compile_options["target_tensors"] is None
-        assert amc.compile_options["weighted_metrics"] is None
-
-    def test_compile_options_setter_as_attr(self, amc):
-        amc.optimizer = keras.optimizers.SGD()
-        amc.loss = keras.losses.mean_absolute_error
-        amc.compile_options = None  # This line has to be called!
-        # optimizer check
-        assert isinstance(amc.optimizer, keras.optimizers.SGD)
-        assert isinstance(amc.compile_options["optimizer"], keras.optimizers.SGD)
-        # loss check
-        assert amc.loss == keras.losses.mean_absolute_error
-        assert amc.compile_options["loss"] == keras.losses.mean_absolute_error
-        # check rest (all None as not set)
-        assert amc.compile_options["metrics"] is None
-        assert amc.compile_options["loss_weights"] is None
-        assert amc.compile_options["sample_weight_mode"] is None
-        assert amc.compile_options["target_tensors"] is None
-        assert amc.compile_options["weighted_metrics"] is None
-
-    def test_compile_options_setter_as_mix_attr_dict_no_duplicates(self, amc):
-        amc.optimizer = keras.optimizers.SGD()
-        amc.compile_options = {"loss": keras.losses.mean_absolute_error,
-                               "loss_weights": [0.2, 0.8]}
-        # check setting by attribute
-        assert isinstance(amc.optimizer, keras.optimizers.SGD)
-        assert isinstance(amc.compile_options["optimizer"], keras.optimizers.SGD)
-        # check setting by dict
-        assert amc.compile_options["loss"] == keras.losses.mean_absolute_error
-        assert amc.compile_options["loss_weights"] == [0.2, 0.8]
-        # check rest (all None as not set)
-        assert amc.compile_options["metrics"] is None
-        assert amc.compile_options["sample_weight_mode"] is None
-        assert amc.compile_options["target_tensors"] is None
-        assert amc.compile_options["weighted_metrics"] is None
-
-    def test_compile_options_setter_as_mix_attr_dict_valid_duplicates_optimizer(self, amc):
-        amc.optimizer = keras.optimizers.SGD()
-        amc.metrics = ['mse']
-        amc.compile_options = {"optimizer": keras.optimizers.SGD(),
-                               "loss": keras.losses.mean_absolute_error}
-        # check duplicate (attr and dic)
-        assert isinstance(amc.optimizer, keras.optimizers.SGD)
-        assert isinstance(amc.compile_options["optimizer"], keras.optimizers.SGD)
-        # check setting by dict
-        assert amc.compile_options["loss"] == keras.losses.mean_absolute_error
-        # check setting by attr
-        assert amc.metrics == ['mse']
-        assert amc.compile_options["metrics"] == ['mse']
-        # check rest (all None as not set)
-        assert amc.compile_options["loss_weights"] is None
-        assert amc.compile_options["sample_weight_mode"] is None
-        assert amc.compile_options["target_tensors"] is None
-        assert amc.compile_options["weighted_metrics"] is None
-
-    def test_compile_options_setter_as_mix_attr_dict_valid_duplicates_none_optimizer(self, amc):
-        amc.optimizer = keras.optimizers.SGD()
-        amc.metrics = ['mse']
-        amc.compile_options = {"metrics": ['mse'],
-                               "loss": keras.losses.mean_absolute_error}
-        # check duplicate (attr and dic)
-        assert amc.metrics == ['mse']
-        assert amc.compile_options["metrics"] == ['mse']
-        # check setting by dict
-        assert amc.compile_options["loss"] == keras.losses.mean_absolute_error
-        # check setting by attr
-        assert isinstance(amc.optimizer, keras.optimizers.SGD)
-        assert isinstance(amc.compile_options["optimizer"], keras.optimizers.SGD)
-        # check rest (all None as not set)
-        assert amc.compile_options["loss_weights"] is None
-        assert amc.compile_options["sample_weight_mode"] is None
-        assert amc.compile_options["target_tensors"] is None
-        assert amc.compile_options["weighted_metrics"] is None
-
-    def test_compile_options_property_type_error(self, amc):
-        with pytest.raises(TypeError) as einfo:
-            amc.compile_options = 'hello world'
-        assert "`compile_options' must be `dict' or `None', but is <class 'str'>." in str(einfo.value)
-
-    def test_compile_options_setter_as_mix_attr_dict_invalid_duplicates_other_optimizer(self, amc):
-        amc.optimizer = keras.optimizers.SGD()
-        with pytest.raises(ValueError) as einfo:
-            amc.compile_options = {"optimizer": keras.optimizers.Adam()}
-        assert "Got different values or arguments for same argument: self.optimizer=<class" \
-               " 'keras.optimizers.SGD'> and 'optimizer': <class 'keras.optimizers.Adam'>" in str(einfo.value)
-
-    def test_compile_options_setter_as_mix_attr_dict_invalid_duplicates_same_optimizer_other_args(self, amc):
-        amc.optimizer = keras.optimizers.SGD(lr=0.1)
-        with pytest.raises(ValueError) as einfo:
-            amc.compile_options = {"optimizer": keras.optimizers.SGD(lr=0.001)}
-        assert "Got different values or arguments for same argument: self.optimizer=<class" \
-               " 'keras.optimizers.SGD'> and 'optimizer': <class 'keras.optimizers.SGD'>" in str(einfo.value)
-
-    def test_compile_options_setter_as_dict_invalid_keys(self, amc):
-        with pytest.raises(ValueError) as einfo:
-            amc.compile_options = {"optimizer": keras.optimizers.SGD(), "InvalidKeyword": [1, 2, 3]}
-        assert "Got invalid key for compile_options. dict_keys(['optimizer', 'InvalidKeyword'])" in str(einfo.value)
-
-    def test_compare_keras_optimizers_equal(self, amc):
-        assert amc._AbstractModelClass__compare_keras_optimizers(keras.optimizers.SGD(), keras.optimizers.SGD()) is True
-
-    def test_compare_keras_optimizers_no_optimizer(self, amc):
-        assert amc._AbstractModelClass__compare_keras_optimizers('NoOptimizer', keras.optimizers.SGD()) is False
-
-    def test_compare_keras_optimizers_other_parameters_run_sess(self, amc):
-        assert amc._AbstractModelClass__compare_keras_optimizers(keras.optimizers.SGD(lr=0.1),
-                                                                 keras.optimizers.SGD(lr=0.01)) is False
-
-    def test_compare_keras_optimizers_other_parameters_none_sess(self, amc):
-        assert amc._AbstractModelClass__compare_keras_optimizers(keras.optimizers.SGD(decay=1),
-                                                                 keras.optimizers.SGD(decay=0.01)) is False
-
-    def test_getattr(self, amc):
-        amc.model = keras.Model()
-        assert hasattr(amc, "compile") is True
-        assert hasattr(amc.model, "compile") is True
-        assert amc.compile == amc.model.compile
-
-    def test_get_settings(self, amc, amsc):
-        assert amc.get_settings() == {"model_name": "AbstractModelClass", "_input_shape": (14, 1, 2),
-                                      "_output_shape": 3}
-        assert amsc.get_settings() == {"test_attr": "testAttr", "model_name": "AbstractModelSubClass",
-                                       "_input_shape": (12, 1, 2), "_output_shape": 3}
-
-    def test_custom_objects(self, amc):
-        amc.custom_objects = {"Test": 123}
-        assert amc.custom_objects == {"Test": 123}
-
-    def test_set_custom_objects(self, amc):
-        amc.set_custom_objects(Test=22, minor_param="minor")
-        assert amc.custom_objects == {"Test": 22, "minor_param": "minor"}
-        amc.set_custom_objects(Test=2, minor_param1="minor1")
-        assert amc.custom_objects == {"Test": 2, "minor_param1": "minor1"}
-        paddings = Paddings()
-        amc.set_custom_objects(Test=1, Padding2D=paddings)
-        assert amc.custom_objects == {"Test": 1, "Padding2D": paddings, "pad1": 34, "another_pad": True}
-
-
 class TestMyPaperModel:
 
     @pytest.fixture
-- 
GitLab


From adff3e91e59ca5b7dcbb374dde3756f7c098f924 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 26 Feb 2021 16:18:26 +0100
Subject: [PATCH 008/175] report val loss, use log loss monitoring plot scale

---
 ACKNOWLEDGMENTS .md => ACKNOWLEDGMENTS.md | 0
 mlair/plotting/training_monitoring.py     | 2 +-
 mlair/run_modules/training.py             | 6 +++++-
 3 files changed, 6 insertions(+), 2 deletions(-)
 rename ACKNOWLEDGMENTS .md => ACKNOWLEDGMENTS.md (100%)

diff --git a/ACKNOWLEDGMENTS .md b/ACKNOWLEDGMENTS.md
similarity index 100%
rename from ACKNOWLEDGMENTS .md
rename to ACKNOWLEDGMENTS.md
diff --git a/mlair/plotting/training_monitoring.py b/mlair/plotting/training_monitoring.py
index 09f49c84..4b4ebbc3 100644
--- a/mlair/plotting/training_monitoring.py
+++ b/mlair/plotting/training_monitoring.py
@@ -86,7 +86,7 @@ class PlotModelHistory:
         """
         ax = self._data[[self._plot_metric, f"val_{self._plot_metric}"]].plot(linewidth=0.7)
         if len(self._additional_columns) > 0:
-            self._data[self._additional_columns].plot(linewidth=0.7, secondary_y=True, ax=ax)
+            self._data[self._additional_columns].plot(linewidth=0.7, secondary_y=True, ax=ax, logy=True)
         title = f"Model {self._plot_metric}: best = {self._data[[f'val_{self._plot_metric}']].min().values}"
         ax.set(xlabel="epoch", ylabel=self._plot_metric, title=title)
         ax.axhline(y=0, color="gray", linewidth=0.5)
diff --git a/mlair/run_modules/training.py b/mlair/run_modules/training.py
index 6c993d56..4409b643 100644
--- a/mlair/run_modules/training.py
+++ b/mlair/run_modules/training.py
@@ -246,4 +246,8 @@ class Training(RunEnvironment):
         path_config.check_path_and_create(path)
         df.to_latex(os.path.join(path, "training_settings.tex"), na_rep='---', column_format=column_format)
         df.to_markdown(open(os.path.join(path, "training_settings.md"), mode="w", encoding='utf-8'),
-                       tablefmt="github")
\ No newline at end of file
+                       tablefmt="github")
+
+        val_score = self.model.evaluate_generator(generator=self.val_set, use_multiprocessing=True, verbose=0, steps=1)
+        for index, item in enumerate(val_score):
+            logging.info(f"{self.model.metrics_names[index]}, {item}")
-- 
GitLab


From 6982e322a27e8b94873653d08da8791d67e07fff Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 26 Feb 2021 16:19:34 +0100
Subject: [PATCH 009/175] add annotation if loss is on val or test

---
 mlair/run_modules/post_processing.py | 2 +-
 mlair/run_modules/training.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 85b272cf..807f32bb 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -391,7 +391,7 @@ class PostProcessing(RunEnvironment):
         path = self.data_store.get("model_path")
         with open(os.path.join(path, "test_scores.txt"), "a") as f:
             for index, item in enumerate(test_score):
-                logging.info(f"{self.model.metrics_names[index]}, {item}")
+                logging.info(f"{self.model.metrics_names[index]} (test), {item}")
                 f.write(f"{self.model.metrics_names[index]}, {item}\n")
 
     def train_ols_model(self):
diff --git a/mlair/run_modules/training.py b/mlair/run_modules/training.py
index 4409b643..bbb3fabf 100644
--- a/mlair/run_modules/training.py
+++ b/mlair/run_modules/training.py
@@ -250,4 +250,4 @@ class Training(RunEnvironment):
 
         val_score = self.model.evaluate_generator(generator=self.val_set, use_multiprocessing=True, verbose=0, steps=1)
         for index, item in enumerate(val_score):
-            logging.info(f"{self.model.metrics_names[index]}, {item}")
+            logging.info(f"{self.model.metrics_names[index]} (val), {item}")
-- 
GitLab


From 01dc3e2bffa60d443f23ebd8102dc54f5fac8524 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 26 Feb 2021 16:37:51 +0100
Subject: [PATCH 010/175] ensure loss to be a list when logging

---
 mlair/run_modules/post_processing.py | 2 +-
 mlair/run_modules/training.py        | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 807f32bb..6f78a03d 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -390,7 +390,7 @@ class PostProcessing(RunEnvironment):
                                                    use_multiprocessing=True, verbose=0, steps=1)
         path = self.data_store.get("model_path")
         with open(os.path.join(path, "test_scores.txt"), "a") as f:
-            for index, item in enumerate(test_score):
+            for index, item in enumerate(to_list(test_score)):
                 logging.info(f"{self.model.metrics_names[index]} (test), {item}")
                 f.write(f"{self.model.metrics_names[index]}, {item}\n")
 
diff --git a/mlair/run_modules/training.py b/mlair/run_modules/training.py
index bbb3fabf..d4badfe2 100644
--- a/mlair/run_modules/training.py
+++ b/mlair/run_modules/training.py
@@ -16,6 +16,7 @@ from mlair.model_modules.keras_extensions import CallbackHandler
 from mlair.plotting.training_monitoring import PlotModelHistory, PlotModelLearningRate
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.configuration import path_config
+from mlair.helpers import to_list
 
 
 class Training(RunEnvironment):
@@ -249,5 +250,5 @@ class Training(RunEnvironment):
                        tablefmt="github")
 
         val_score = self.model.evaluate_generator(generator=self.val_set, use_multiprocessing=True, verbose=0, steps=1)
-        for index, item in enumerate(val_score):
+        for index, item in enumerate(to_list(val_score)):
             logging.info(f"{self.model.metrics_names[index]} (val), {item}")
-- 
GitLab


From cc14b8b6a1fe322be0cf4c1a2973d5dd5ecfe36f Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 26 Feb 2021 16:49:45 +0100
Subject: [PATCH 011/175] monitoring plot is now always log

---
 mlair/plotting/training_monitoring.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/plotting/training_monitoring.py b/mlair/plotting/training_monitoring.py
index 4b4ebbc3..7cc79a6e 100644
--- a/mlair/plotting/training_monitoring.py
+++ b/mlair/plotting/training_monitoring.py
@@ -84,7 +84,7 @@ class PlotModelHistory:
 
         :param filename: name (including total path) of the plot to save.
         """
-        ax = self._data[[self._plot_metric, f"val_{self._plot_metric}"]].plot(linewidth=0.7)
+        ax = self._data[[self._plot_metric, f"val_{self._plot_metric}"]].plot(linewidth=0.7, logy=True)
         if len(self._additional_columns) > 0:
             self._data[self._additional_columns].plot(linewidth=0.7, secondary_y=True, ax=ax, logy=True)
         title = f"Model {self._plot_metric}: best = {self._data[[f'val_{self._plot_metric}']].min().values}"
-- 
GitLab


From 571be20d7f6cd65b8fc74adf2ed794538edcd567 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 26 Feb 2021 17:07:21 +0100
Subject: [PATCH 012/175] change log plot call

---
 mlair/plotting/training_monitoring.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlair/plotting/training_monitoring.py b/mlair/plotting/training_monitoring.py
index 7cc79a6e..9cad9fd0 100644
--- a/mlair/plotting/training_monitoring.py
+++ b/mlair/plotting/training_monitoring.py
@@ -84,7 +84,8 @@ class PlotModelHistory:
 
         :param filename: name (including total path) of the plot to save.
         """
-        ax = self._data[[self._plot_metric, f"val_{self._plot_metric}"]].plot(linewidth=0.7, logy=True)
+        ax = self._data[[self._plot_metric, f"val_{self._plot_metric}"]].plot(linewidth=0.7)
+        ax.set_yscale('log')
         if len(self._additional_columns) > 0:
             self._data[self._additional_columns].plot(linewidth=0.7, secondary_y=True, ax=ax, logy=True)
         title = f"Model {self._plot_metric}: best = {self._data[[f'val_{self._plot_metric}']].min().values}"
-- 
GitLab


From 363994d40a9c579545123a11b81e25c6d1ca040d Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 3 Mar 2021 17:58:58 +0100
Subject: [PATCH 013/175] new methods to calculate mae and a bunch of error
 metrics

---
 mlair/helpers/statistics.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index ad6a368f..3631597a 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -196,9 +196,23 @@ def log_apply(data: Data, mean: Data, std: Data) -> Data:
     return standardise_apply(np.log1p(data), mean, std)
 
 
-def mean_squared_error(a, b):
+def mean_squared_error(a, b, dim=None):
     """Calculate mean squared error."""
-    return np.square(a - b).mean()
+    return np.square(a - b).mean(dim)
+
+
+def mean_absolute_error(a, b, dim=None):
+    """Calculate mean absolute error."""
+    return np.abs(a - b).mean(dim)
+
+
+def calculate_error_metrics(a, b, dim):
+    """Calculate MSE, RMSE, and MAE. Additionally return number of used values for calculation."""
+    mse = mean_squared_error(a, b, dim)
+    rmse = np.sqrt(mse)
+    mae = mean_absolute_error(a, b, dim)
+    n = (a - b).notnull().sum(dim)
+    return {"mse": mse, "rmse": rmse, "mae": mae, "n": n}
 
 
 class SkillScores:
-- 
GitLab


From e54ef5410152cf9b377f655bd4dd3cbbceea9421 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 3 Mar 2021 18:00:54 +0100
Subject: [PATCH 014/175] moved table creation to new module

---
 mlair/helpers/tables.py             | 24 ++++++++++++++++
 mlair/run_modules/pre_processing.py | 43 ++++++++---------------------
 2 files changed, 35 insertions(+), 32 deletions(-)
 create mode 100644 mlair/helpers/tables.py

diff --git a/mlair/helpers/tables.py b/mlair/helpers/tables.py
new file mode 100644
index 00000000..e7628ba4
--- /dev/null
+++ b/mlair/helpers/tables.py
@@ -0,0 +1,24 @@
+import pandas as pd
+import numpy as np
+import os
+
+
+def create_column_format_for_tex(df: pd.DataFrame) -> str:
+    """
+    Creates column format for latex table based on the shape of a given DataFrame.
+
+    Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r'
+    """
+    column_format = np.repeat('c', df.shape[1] + 1)
+    column_format[0] = 'l'
+    column_format[-1] = 'r'
+    column_format = ''.join(column_format.tolist())
+    return column_format
+
+
+def save_to_tex(path, filename, column_format, df, na_rep='---'):
+    df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format)
+
+
+def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"):
+    df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding), tablefmt=tablefmt)
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index cdf195e7..813873b8 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -14,7 +14,7 @@ import numpy as np
 import pandas as pd
 
 from mlair.data_handler import DataCollection, AbstractDataHandler
-from mlair.helpers import TimeTracking, to_list
+from mlair.helpers import TimeTracking, to_list, tables
 from mlair.configuration import path_config
 from mlair.helpers.join import EmptyQueryResult
 from mlair.run_modules.run_environment import RunEnvironment
@@ -119,19 +119,20 @@ class PreProcessing(RunEnvironment):
         path_config.check_path_and_create(path)
         names_of_set = ["train", "val", "test"]
         df = self.create_info_df(meta_data, meta_round, names_of_set, precision)
-        column_format = self.create_column_format_for_tex(df)
-        self.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df)
-        self.save_to_md(path=path, filename="station_sample_size.md", df=df)
+        column_format = tables.create_column_format_for_tex(df)
+        tables.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df)
+        tables.save_to_md(path=path, filename="station_sample_size.md", df=df)
         df_nometa = df.drop(meta_data, axis=1)
-        column_format = self.create_column_format_for_tex(df)
-        self.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format, df=df_nometa)
-        self.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa)
+        column_format = tables.create_column_format_for_tex(df)
+        tables.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format,
+                           df=df_nometa)
+        tables.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa)
         # df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
         #                    column_format=column_format)
         df_descr = self.create_describe_df(df_nometa)
-        column_format = self.create_column_format_for_tex(df_descr)
-        self.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr)
-        self.save_to_md(path=path, filename="station_describe_short.md", df=df_descr)
+        column_format = tables.create_column_format_for_tex(df_descr)
+        tables.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr)
+        tables.save_to_md(path=path, filename="station_describe_short.md", df=df_descr)
         # df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---', column_format=column_format)
 
     @staticmethod
@@ -147,15 +148,6 @@ class PreProcessing(RunEnvironment):
         df_descr = df_descr[df_descr_colnames]
         return df_descr
 
-    @staticmethod
-    def save_to_tex(path, filename, column_format, df, na_rep='---'):
-        df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format)
-
-    @staticmethod
-    def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"):
-        df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding),
-                       tablefmt=tablefmt)
-
     def create_info_df(self, meta_data, meta_round, names_of_set, precision):
         df = pd.DataFrame(columns=meta_data + names_of_set)
         for set_name in names_of_set:
@@ -174,19 +166,6 @@ class PreProcessing(RunEnvironment):
         df.index.name = 'stat. ID'
         return df
 
-    @staticmethod
-    def create_column_format_for_tex(df: pd.DataFrame) -> str:
-        """
-        Creates column format for latex table based on the shape of a given DataFrame.
-
-        Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r'
-        """
-        column_format = np.repeat('c', df.shape[1] + 1)
-        column_format[0] = 'l'
-        column_format[-1] = 'r'
-        column_format = ''.join(column_format.tolist())
-        return column_format
-
     def split_train_val_test(self) -> None:
         """
         Split data into subsets.
-- 
GitLab


From cba186aaecc141619d401cc40c78daff20d07532 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 3 Mar 2021 18:08:31 +0100
Subject: [PATCH 015/175] use table methods in training too,

---
 mlair/run_modules/training.py | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/mlair/run_modules/training.py b/mlair/run_modules/training.py
index d4badfe2..5f895b77 100644
--- a/mlair/run_modules/training.py
+++ b/mlair/run_modules/training.py
@@ -10,13 +10,15 @@ from typing import Union
 
 import keras
 from keras.callbacks import Callback, History
+import psutil
+import pandas as pd
 
 from mlair.data_handler import KerasIterator
 from mlair.model_modules.keras_extensions import CallbackHandler
 from mlair.plotting.training_monitoring import PlotModelHistory, PlotModelLearningRate
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.configuration import path_config
-from mlair.helpers import to_list
+from mlair.helpers import to_list, tables
 
 
 class Training(RunEnvironment):
@@ -141,7 +143,8 @@ class Training(RunEnvironment):
                                                verbose=2,
                                                validation_data=self.val_set,
                                                validation_steps=len(self.val_set),
-                                               callbacks=self.callbacks.get_callbacks(as_dict=False))
+                                               callbacks=self.callbacks.get_callbacks(as_dict=False),
+                                               workers=psutil.cpu_count(logical=False))
         else:
             logging.info("Found locally stored model and checkpoints. Training is resumed from the last checkpoint.")
             self.callbacks.load_callbacks()
@@ -156,7 +159,8 @@ class Training(RunEnvironment):
                                          validation_data=self.val_set,
                                          validation_steps=len(self.val_set),
                                          callbacks=self.callbacks.get_callbacks(as_dict=False),
-                                         initial_epoch=initial_epoch)
+                                         initial_epoch=initial_epoch,
+                                         workers=psutil.cpu_count(logical=False))
             history = hist
         try:
             lr = self.callbacks.get_callback_by_name("lr")
@@ -233,22 +237,26 @@ class Training(RunEnvironment):
             PlotModelLearningRate(filename=os.path.join(path, f"{name}_history_learning_rate.pdf"), lr_sc=lr_sc)
 
     def report_training(self):
+        # create training summary
         data = {"mini batches": len(self.train_set),
                 "upsampling extremes": self.train_set.upsampling,
                 "shuffling": self.train_set.shuffle,
                 "created new model": self._create_new_model,
                 "epochs": self.epochs,
                 "batch size": self.batch_size}
-        import pandas as pd
         df = pd.DataFrame.from_dict(data, orient="index", columns=["training setting"])
         df.sort_index(inplace=True)
-        column_format = "ll"
         path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
         path_config.check_path_and_create(path)
-        df.to_latex(os.path.join(path, "training_settings.tex"), na_rep='---', column_format=column_format)
-        df.to_markdown(open(os.path.join(path, "training_settings.md"), mode="w", encoding='utf-8'),
-                       tablefmt="github")
 
-        val_score = self.model.evaluate_generator(generator=self.val_set, use_multiprocessing=True, verbose=0, steps=1)
-        for index, item in enumerate(to_list(val_score)):
-            logging.info(f"{self.model.metrics_names[index]} (val), {item}")
+        # store as .tex and .md
+        tables.save_to_tex(path, "training_settings.tex", column_format="ll", df=df)
+        tables.save_to_md(path, "training_settings.md", df=df)
+
+        # calculate val scores
+        val_score = self.model.evaluate_generator(generator=self.val_set, use_multiprocessing=True, verbose=0)
+        path = self.data_store.get("model_path")
+        with open(os.path.join(path, "val_scores.txt"), "a") as f:
+            for index, item in enumerate(to_list(val_score)):
+                logging.info(f"{self.model.metrics_names[index]} (val), {item}")
+                f.write(f"{self.model.metrics_names[index]}, {item}\n")
-- 
GitLab


From 69fb063b3fbd4ce41bd03ec6bedb00daee3871d5 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 3 Mar 2021 18:09:50 +0100
Subject: [PATCH 016/175] postprocessing now reports different error metrics of
 test set

---
 mlair/run_modules/post_processing.py | 63 ++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 6f78a03d..5216157f 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -13,9 +13,10 @@ import numpy as np
 import pandas as pd
 import xarray as xr
 
+from mlair.configuration import path_config
 from mlair.data_handler import BootStraps, KerasIterator
 from mlair.helpers.datastore import NameNotFoundInDataStore
-from mlair.helpers import TimeTracking, statistics, extract_value, remove_items, to_list
+from mlair.helpers import TimeTracking, statistics, extract_value, remove_items, to_list, tables
 from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel
 from mlair.model_modules import AbstractModelClass
 from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStationMap, PlotClimatologicalSkillScore, \
@@ -102,9 +103,11 @@ class PostProcessing(RunEnvironment):
                 create_new_bootstraps = self.data_store.get("create_new_bootstraps", "postprocessing")
                 self.bootstrap_postprocessing(create_new_bootstraps)
 
-        # skill scores
+        # skill scores and error metrics
         with TimeTracking(name="calculate skill scores"):
-            self.skill_scores = self.calculate_skill_scores()
+            skill_score_competitive, skill_score_climatological, errors = self.calculate_error_metrics()
+            self.skill_scores = (skill_score_competitive, skill_score_climatological)
+        self.report_error_metrics(errors)
 
         # plotting
         self.plot()
@@ -386,8 +389,10 @@ class PostProcessing(RunEnvironment):
 
     def calculate_test_score(self):
         """Evaluate test score of model and save locally."""
+
+        # test scores on transformed data
         test_score = self.model.evaluate_generator(generator=self.test_data_distributed,
-                                                   use_multiprocessing=True, verbose=0, steps=1)
+                                                   use_multiprocessing=True, verbose=0)
         path = self.data_store.get("model_path")
         with open(os.path.join(path, "test_scores.txt"), "a") as f:
             for index, item in enumerate(to_list(test_score)):
@@ -656,22 +661,29 @@ class PostProcessing(RunEnvironment):
         except (TypeError, AttributeError):
             return forecast if competitor is None else competitor
 
-    def calculate_skill_scores(self) -> Tuple[Dict, Dict]:
+    def calculate_error_metrics(self) -> Tuple[Dict, Dict, Dict]:
         """
-        Calculate skill scores of NN forecast.
+        Calculate error metrics and skill scores of NN forecast.
 
         The competitive skill score compares the NN prediction with persistence and ordinary least squares forecasts.
         Whereas, the climatological skill scores evaluates the NN prediction in terms of meaningfulness in comparison
         to different climatological references.
 
-        :return: competitive and climatological skill scores
+        :return: competitive and climatological skill scores, error metrics
         """
         path = self.data_store.get("forecast_path")
         all_stations = self.data_store.get("stations")
         skill_score_competitive = {}
         skill_score_climatological = {}
+        errors = {}
         for station in all_stations:
-            external_data = self._get_external_data(station, path)
+            external_data = self._get_external_data(station, path)  # test data
+
+            # test errors
+            errors[station] = statistics.calculate_error_metrics(*map(lambda x: external_data.sel(type=x),
+                                                                      [self.forecast_indicator, "obs"]),
+                                                                 dim="index")
+            # skill score
             competitor = self.load_competitors(station)
             combined = self._combine_forecasts(external_data, competitor, dim="type")
             model_list = remove_items(list(combined.type.values), "obs") if combined is not None else None
@@ -683,4 +695,37 @@ class PostProcessing(RunEnvironment):
             if internal_data is not None:
                 skill_score_climatological[station] = skill_score.climatological_skill_scores(
                     internal_data, self.window_lead_time, forecast_name=self.forecast_indicator)
-        return skill_score_competitive, skill_score_climatological
+
+        errors.update({"total": self.calculate_average_errors(errors)})
+        return skill_score_competitive, skill_score_climatological, errors
+
+    @staticmethod
+    def calculate_average_errors(errors):
+        avg_error = {}
+        n_total = sum([x.get("n", 0) for _, x in errors.items()])
+        for station, station_errors in errors.items():
+            n_station = station_errors.get("n")
+            for error_metric, val in station_errors.items():
+                new_val = avg_error.get(error_metric, 0) + val * n_station / n_total
+                avg_error[error_metric] = new_val
+        return avg_error
+
+    def report_error_metrics(self, errors):
+        report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
+        path_config.check_path_and_create(report_path)
+        metric_collection = {}
+        for station, station_errors in errors.items():
+            for metric, vals in station_errors.items():
+                if metric == "n":
+                    continue
+                pd_vals = pd.DataFrame.from_dict({station: vals}).T
+                pd_vals.columns = [f"{metric}(t+{x})" for x in vals.coords["ahead"].values]
+                mc = metric_collection.get(metric, pd.DataFrame())
+                mc = mc.append(pd_vals)
+                metric_collection[metric] = mc
+        for metric, error_df in metric_collection.items():
+            df = error_df.sort_index()
+            df.reindex(df.index.drop(["total"]).to_list() + ["total"], )
+            column_format = tables.create_column_format_for_tex(df)
+            tables.save_to_tex(report_path, f"error_report_{metric}.tex", column_format=column_format, df=df)
+            tables.save_to_md(report_path, f"error_report_{metric}.md", df=df)
-- 
GitLab


From dfbbdd31f71d965144d0a42f6333ec7305cc53fe Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 3 Mar 2021 18:18:56 +0100
Subject: [PATCH 017/175] moved table test, /close #286

---
 test/test_helpers/test_tables.py             | 21 ++++++++
 test/test_run_modules/test_pre_processing.py | 56 ++++++++------------
 2 files changed, 43 insertions(+), 34 deletions(-)
 create mode 100644 test/test_helpers/test_tables.py

diff --git a/test/test_helpers/test_tables.py b/test/test_helpers/test_tables.py
new file mode 100644
index 00000000..1b19b19b
--- /dev/null
+++ b/test/test_helpers/test_tables.py
@@ -0,0 +1,21 @@
+import pandas as pd
+import numpy as np
+
+from mlair.helpers import tables
+
+
+class TestTables:
+
+    def test_create_column_format_for_tex(self):
+        df = pd.DataFrame(np.ones((2, 1)))
+        df_col = tables.create_column_format_for_tex(df)  # len: 1+1
+        assert df_col == 'lr'
+        assert len(df_col) == 2
+        df = pd.DataFrame(np.ones((2, 2)))
+        df_col = tables.create_column_format_for_tex(df)  # len: 2+1
+        assert df_col == 'lcr'
+        assert len(df_col) == 3
+        df = pd.DataFrame(np.ones((2, 3)))
+        df_col = tables.create_column_format_for_tex(df)  # len: 3+1
+        assert df_col == 'lccr'
+        assert len(df_col) == 4
diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py
index b5a1914e..5ae64bf3 100644
--- a/test/test_run_modules/test_pre_processing.py
+++ b/test/test_run_modules/test_pre_processing.py
@@ -140,40 +140,28 @@ class TestPreProcessing:
         data_preparation = AbstractDataHandler
         stations = ['DEBW107', 'DEBY081']
         assert pre.transformation(data_preparation, stations) is None
+
         class data_preparation_no_trans: pass
+
         assert pre.transformation(data_preparation_no_trans, stations) is None
 
-    @pytest.fixture
-    def dummy_df(self):
-        data_dict = {'station_name': {'DEBW013': 'Stuttgart Bad Cannstatt', 'DEBW076': 'Baden-Baden',
-                                      'DEBW087': 'Schwäbische_Alb', 'DEBW107': 'Tübingen',
-                                      'DEBY081': 'Garmisch-Partenkirchen/Kreuzeckbahnstraße', '# Stations': np.nan,
-                                      '# Samples': np.nan},
-                     'station_lon': {'DEBW013': 9.2297, 'DEBW076': 8.2202, 'DEBW087': 9.2076, 'DEBW107': 9.0512,
-                                     'DEBY081': 11.0631, '# Stations': np.nan, '# Samples': np.nan},
-                     'station_lat': {'DEBW013': 48.8088, 'DEBW076': 48.7731, 'DEBW087': 48.3458, 'DEBW107': 48.5077,
-                                     'DEBY081': 47.4764, '# Stations': np.nan, '# Samples': np.nan},
-                     'station_alt': {'DEBW013': 235.0, 'DEBW076': 148.0, 'DEBW087': 798.0, 'DEBW107': 325.0,
-                                     'DEBY081': 735.0, '# Stations': np.nan, '# Samples': np.nan},
-                     'train': {'DEBW013': 1413, 'DEBW076': 3002, 'DEBW087': 3016, 'DEBW107': 1782, 'DEBY081': 2837,
-                               '# Stations': 6, '# Samples': 12050},
-                     'val': {'DEBW013': 698, 'DEBW076': 715, 'DEBW087': 700, 'DEBW107': 701, 'DEBY081': 456,
-                             '# Stations': 6, '# Samples': 3270},
-                     'test': {'DEBW013': 1066, 'DEBW076': 696, 'DEBW087': 1080, 'DEBW107': 1080, 'DEBY081': 700,
-                              '# Stations': 6, '# Samples': 4622}}
-        df = pd.DataFrame.from_dict(data_dict)
-        return df
-
-    def test_create_column_format_for_tex(self):
-        df = pd.DataFrame(np.ones((2, 1)))
-        df_col = PreProcessing.create_column_format_for_tex(df)  # len: 1+1
-        assert df_col == 'lr'
-        assert len(df_col) == 2
-        df = pd.DataFrame(np.ones((2, 2)))
-        df_col = PreProcessing.create_column_format_for_tex(df)  # len: 2+1
-        assert df_col == 'lcr'
-        assert len(df_col) == 3
-        df = pd.DataFrame(np.ones((2, 3)))
-        df_col = PreProcessing.create_column_format_for_tex(df) # len: 3+1
-        assert df_col == 'lccr'
-        assert len(df_col) == 4
+    # @pytest.fixture
+    # def dummy_df(self):
+    #     data_dict = {'station_name': {'DEBW013': 'Stuttgart Bad Cannstatt', 'DEBW076': 'Baden-Baden',
+    #                                   'DEBW087': 'Schwäbische_Alb', 'DEBW107': 'Tübingen',
+    #                                   'DEBY081': 'Garmisch-Partenkirchen/Kreuzeckbahnstraße', '# Stations': np.nan,
+    #                                   '# Samples': np.nan},
+    #                  'station_lon': {'DEBW013': 9.2297, 'DEBW076': 8.2202, 'DEBW087': 9.2076, 'DEBW107': 9.0512,
+    #                                  'DEBY081': 11.0631, '# Stations': np.nan, '# Samples': np.nan},
+    #                  'station_lat': {'DEBW013': 48.8088, 'DEBW076': 48.7731, 'DEBW087': 48.3458, 'DEBW107': 48.5077,
+    #                                  'DEBY081': 47.4764, '# Stations': np.nan, '# Samples': np.nan},
+    #                  'station_alt': {'DEBW013': 235.0, 'DEBW076': 148.0, 'DEBW087': 798.0, 'DEBW107': 325.0,
+    #                                  'DEBY081': 735.0, '# Stations': np.nan, '# Samples': np.nan},
+    #                  'train': {'DEBW013': 1413, 'DEBW076': 3002, 'DEBW087': 3016, 'DEBW107': 1782, 'DEBY081': 2837,
+    #                            '# Stations': 6, '# Samples': 12050},
+    #                  'val': {'DEBW013': 698, 'DEBW076': 715, 'DEBW087': 700, 'DEBW107': 701, 'DEBY081': 456,
+    #                          '# Stations': 6, '# Samples': 3270},
+    #                  'test': {'DEBW013': 1066, 'DEBW076': 696, 'DEBW087': 1080, 'DEBW107': 1080, 'DEBY081': 700,
+    #                           '# Stations': 6, '# Samples': 4622}}
+    #     df = pd.DataFrame.from_dict(data_dict)
+    #     return df
-- 
GitLab


From 782f111e76b2ab656e499e38a001eec3ac3df537 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 4 Mar 2021 10:46:12 +0100
Subject: [PATCH 018/175] addendum to FCN class: enable linear activations and
 add output_activation parameter

---
 mlair/model_modules/fully_connected_networks.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 940c9846..dbcd3a9f 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -63,12 +63,13 @@ class FCN(AbstractModelClass):
     """
 
     _activation = {"relu": keras.layers.ReLU, "tanh": partial(keras.layers.Activation, "tanh"),
-                   "sigmoid": partial(keras.layers.Activation, "sigmoid")}
+                   "sigmoid": partial(keras.layers.Activation, "sigmoid"),
+                   "linear": partial(keras.layers.Activation, "linear")}
     _optimizer = {"adam": keras.optimizers.adam, "sgd": keras.optimizers.SGD}
     _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov"]
 
-    def __init__(self, input_shape: list, output_shape: list, activation="relu", optimizer="adam",
-                 n_layer=1, n_hidden=10, **kwargs):
+    def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
+                 optimizer="adam", n_layer=1, n_hidden=10, **kwargs):
         """
         Sets model and loss depending on the given arguments.
 
@@ -82,6 +83,7 @@ class FCN(AbstractModelClass):
 
         # settings
         self.activation = self._set_activation(activation)
+        self.activation_output = self._set_activation(activation_output)
         self.optimizer = self._set_optimizer(optimizer, **kwargs)
         self.layer_configuration = (n_layer, n_hidden)
         self._update_model_name()
@@ -127,7 +129,7 @@ class FCN(AbstractModelClass):
             x_in = keras.layers.Dense(n_hidden)(x_in)
             x_in = self.activation()(x_in)
         x_in = keras.layers.Dense(self._output_shape)(x_in)
-        out = self.activation()(x_in)
+        out = self.activation_output()(x_in)
         self.model = keras.Model(inputs=x_input, outputs=[out])
 
     def set_compile_options(self):
-- 
GitLab


From a18730525f189e9edd6c9210a79b072fb3f1fdb8 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 4 Mar 2021 11:20:54 +0100
Subject: [PATCH 019/175] minor fix to skip calculate error metrics if external
 data is none

---
 mlair/run_modules/post_processing.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 5216157f..b16028fa 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -680,9 +680,10 @@ class PostProcessing(RunEnvironment):
             external_data = self._get_external_data(station, path)  # test data
 
             # test errors
-            errors[station] = statistics.calculate_error_metrics(*map(lambda x: external_data.sel(type=x),
-                                                                      [self.forecast_indicator, "obs"]),
-                                                                 dim="index")
+            if external_data is not None:
+                errors[station] = statistics.calculate_error_metrics(*map(lambda x: external_data.sel(type=x),
+                                                                          [self.forecast_indicator, "obs"]),
+                                                                     dim="index")
             # skill score
             competitor = self.load_competitors(station)
             combined = self._combine_forecasts(external_data, competitor, dim="type")
-- 
GitLab


From 4b6e0f72ca795aeac64d30873f0c11c6d5ab52b8 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 4 Mar 2021 12:00:55 +0100
Subject: [PATCH 020/175] log clim skill scores too

---
 mlair/run_modules/post_processing.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index b16028fa..3b9b5634 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -108,6 +108,7 @@ class PostProcessing(RunEnvironment):
             skill_score_competitive, skill_score_climatological, errors = self.calculate_error_metrics()
             self.skill_scores = (skill_score_competitive, skill_score_climatological)
         self.report_error_metrics(errors)
+        self.report_error_metrics(skill_score_climatological)
 
         # plotting
         self.plot()
@@ -716,6 +717,10 @@ class PostProcessing(RunEnvironment):
         path_config.check_path_and_create(report_path)
         metric_collection = {}
         for station, station_errors in errors.items():
+            if isinstance(station_errors, xr.DataArray):
+                dim = station_errors.dims[0]
+                sel_index = [sel for sel in station_errors.coords[dim] if "CASE" in str(sel)]
+                station_errors = {str(i.values): station_errors.sel(**{dim: i}) for i in sel_index}
             for metric, vals in station_errors.items():
                 if metric == "n":
                     continue
@@ -726,7 +731,9 @@ class PostProcessing(RunEnvironment):
                 metric_collection[metric] = mc
         for metric, error_df in metric_collection.items():
             df = error_df.sort_index()
-            df.reindex(df.index.drop(["total"]).to_list() + ["total"], )
+            if "total" in df.index:
+                df.reindex(df.index.drop(["total"]).to_list() + ["total"], )
             column_format = tables.create_column_format_for_tex(df)
-            tables.save_to_tex(report_path, f"error_report_{metric}.tex", column_format=column_format, df=df)
-            tables.save_to_md(report_path, f"error_report_{metric}.md", df=df)
+            file_name = f"error_report_{metric}.%s".replace(' ', '_')
+            tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df)
+            tables.save_to_md(report_path, file_name % "md", df=df)
-- 
GitLab


From eec379fcb9f274c60b64dc088f0c9d955cd5eb0c Mon Sep 17 00:00:00 2001
From: Felix Kleinert <f.kleinert@fz-juelich.de>
Date: Fri, 5 Mar 2021 13:41:12 +0100
Subject: [PATCH 021/175] set dask.compute

---
 mlair/data_handler/default_data_handler.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 52835975..e4367184 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -13,6 +13,7 @@ from functools import reduce
 from typing import Tuple, Union, List
 import multiprocessing
 import psutil
+import dask
 
 import numpy as np
 import xarray as xr
@@ -84,7 +85,7 @@ class DefaultDataHandler(AbstractDataHandler):
             self._cleanup() if fresh_store is True else None
             data = {"X": self._X, "Y": self._Y, "X_extreme": self._X_extreme, "Y_extreme": self._Y_extreme}
             with open(self._save_file, "wb") as f:
-                pickle.dump(data, f)
+                pickle.dump(dask.compute(data), f)
             logging.debug(f"save pickle data to {self._save_file}")
             self._reset_data()
 
-- 
GitLab


From fc6625d210e59925194f2d0f239bc78b0f7f653a Mon Sep 17 00:00:00 2001
From: Felix Kleinert <f.kleinert@fz-juelich.de>
Date: Fri, 5 Mar 2021 13:53:09 +0100
Subject: [PATCH 022/175] remove dask from defaultdh

---
 mlair/data_handler/default_data_handler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index e4367184..8553aec7 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -85,7 +85,7 @@ class DefaultDataHandler(AbstractDataHandler):
             self._cleanup() if fresh_store is True else None
             data = {"X": self._X, "Y": self._Y, "X_extreme": self._X_extreme, "Y_extreme": self._Y_extreme}
             with open(self._save_file, "wb") as f:
-                pickle.dump(dask.compute(data), f)
+                pickle.dump(data, f)
             logging.debug(f"save pickle data to {self._save_file}")
             self._reset_data()
 
-- 
GitLab


From e8e8aa69bd28828af6a5c8e018efb8dad013bd49 Mon Sep 17 00:00:00 2001
From: Felix Kleinert <f.kleinert@fz-juelich.de>
Date: Fri, 5 Mar 2021 14:02:37 +0100
Subject: [PATCH 023/175] force dask.compute in try except

---
 mlair/data_handler/default_data_handler.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 8553aec7..ddf276cf 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -84,11 +84,20 @@ class DefaultDataHandler(AbstractDataHandler):
         if store_processed_data is True:
             self._cleanup() if fresh_store is True else None
             data = {"X": self._X, "Y": self._Y, "X_extreme": self._X_extreme, "Y_extreme": self._Y_extreme}
+            data = self._force_dask_computation(data)
             with open(self._save_file, "wb") as f:
                 pickle.dump(data, f)
             logging.debug(f"save pickle data to {self._save_file}")
             self._reset_data()
 
+    @staticmethod
+    def _force_dask_computation(data):
+        try:
+            data = dask.compute(data)[0]
+        except:
+            pass
+        return data
+
     def _load(self):
         try:
             with open(self._save_file, "rb") as f:
-- 
GitLab


From f033d85fb9568c8e08ab2a4af5b20c1a1eeb47ab Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 8 Mar 2021 13:16:29 +0100
Subject: [PATCH 024/175] new var loss and custom loss methods

---
 .../model_modules/fully_connected_networks.py | 11 +++--
 mlair/model_modules/loss.py                   | 18 ++++++++
 test/test_model_modules/test_loss.py          | 46 +++++++++++++++++--
 3 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index dbcd3a9f..45b8eb63 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -5,6 +5,7 @@ from functools import reduce, partial
 
 from mlair.model_modules import AbstractModelClass
 from mlair.helpers import select_from_dict
+from mlair.model_modules.loss import var_loss, custom_loss
 
 import keras
 
@@ -64,7 +65,9 @@ class FCN(AbstractModelClass):
 
     _activation = {"relu": keras.layers.ReLU, "tanh": partial(keras.layers.Activation, "tanh"),
                    "sigmoid": partial(keras.layers.Activation, "sigmoid"),
-                   "linear": partial(keras.layers.Activation, "linear")}
+                   "linear": partial(keras.layers.Activation, "linear"),
+                   "selu": partial(keras.layers.Activation, "selu")}
+    _initializer = {"selu": keras.initializers.lecun_normal()}
     _optimizer = {"adam": keras.optimizers.adam, "sgd": keras.optimizers.SGD}
     _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov"]
 
@@ -87,6 +90,7 @@ class FCN(AbstractModelClass):
         self.optimizer = self._set_optimizer(optimizer, **kwargs)
         self.layer_configuration = (n_layer, n_hidden)
         self._update_model_name()
+        self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
 
         # apply to model
         self.set_model()
@@ -126,11 +130,12 @@ class FCN(AbstractModelClass):
         x_in = keras.layers.Flatten()(x_input)
         n_layer, n_hidden = self.layer_configuration
         for layer in range(n_layer):
-            x_in = keras.layers.Dense(n_hidden)(x_in)
+            x_in = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer)(x_in)
             x_in = self.activation()(x_in)
         x_in = keras.layers.Dense(self._output_shape)(x_in)
         out = self.activation_output()(x_in)
         self.model = keras.Model(inputs=x_input, outputs=[out])
 
     def set_compile_options(self):
-        self.compile_options = {"loss": [keras.losses.mean_squared_error], "metrics": ["mse", "mae"]}
+        self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss])],
+                                "metrics": ["mse", "mae", var_loss]}
diff --git a/mlair/model_modules/loss.py b/mlair/model_modules/loss.py
index bcb85282..ba871e98 100644
--- a/mlair/model_modules/loss.py
+++ b/mlair/model_modules/loss.py
@@ -20,3 +20,21 @@ def l_p_loss(power: int) -> Callable:
         return K.mean(K.pow(K.abs(y_pred - y_true), power), axis=-1)
 
     return loss
+
+
+def var_loss(y_true, y_pred) -> Callable:
+    return K.mean(K.square(K.var(y_true) - K.var(y_pred)))
+
+
+def custom_loss(loss_list, loss_weights=None) -> Callable:
+    n = len(loss_list)
+    if loss_weights is None:
+        loss_weights = [1. / n for _ in range(n)]
+    else:
+        assert len(loss_weights) == n
+        loss_weights = [w / sum(loss_weights) for w in loss_weights]
+
+    def loss(y_true, y_pred):
+        return sum([loss_weights[i] * loss_list[i](y_true, y_pred) for i in range(n)])
+
+    return loss
diff --git a/test/test_model_modules/test_loss.py b/test/test_model_modules/test_loss.py
index e54e0b00..c993830c 100644
--- a/test/test_model_modules/test_loss.py
+++ b/test/test_model_modules/test_loss.py
@@ -1,10 +1,12 @@
 import keras
 import numpy as np
 
-from mlair.model_modules.loss import l_p_loss
+from mlair.model_modules.loss import l_p_loss, var_loss, custom_loss
 
+import pytest
 
-class TestLoss:
+
+class TestLPLoss:
 
     def test_l_p_loss(self):
         model = keras.Sequential()
@@ -14,4 +16,42 @@ class TestLoss:
         assert hist.history['loss'][0] == 1.25
         model.compile(optimizer=keras.optimizers.Adam(), loss=l_p_loss(3))
         hist = model.fit(np.array([1, 0, -2, 0.5]), np.array([1, 1, 0, 0.5]), epochs=1)
-        assert hist.history['loss'][0] == 2.25
\ No newline at end of file
+        assert hist.history['loss'][0] == 2.25
+
+
+class TestVarLoss:
+
+    def test_var_loss(self):
+        model = keras.Sequential()
+        model.add(keras.layers.Lambda(lambda x: x, input_shape=(None,)))
+        model.compile(optimizer=keras.optimizers.Adam(), loss=var_loss)
+        hist = model.fit(np.array([1, 0, 2, 0.5]), np.array([1, 1, 0, 0.5]), epochs=1)
+        assert hist.history['loss'][0] == 0.140625
+
+
+class TestCustomLoss:
+
+    def test_custom_loss_no_weights(self):
+        cust_loss = custom_loss([l_p_loss(2), var_loss])
+        model = keras.Sequential()
+        model.add(keras.layers.Lambda(lambda x: x, input_shape=(None,)))
+        model.compile(optimizer=keras.optimizers.Adam(), loss=cust_loss)
+        hist = model.fit(np.array([1, 0, 2, 0.5]), np.array([1, 1, 0, 0.5]), epochs=1)
+        assert hist.history['loss'][0] == (0.5 * 0.140625 + 0.5 * 1.25)
+
+    @pytest.mark.parametrize("weights", [[0.3, 0.7], [0.5, 0.5], [1, 1], [4, 1]])
+    def test_custom_loss_with_weights(self, weights):
+        cust_loss = custom_loss([l_p_loss(2), var_loss], weights)
+        model = keras.Sequential()
+        model.add(keras.layers.Lambda(lambda x: x, input_shape=(None,)))
+        model.compile(optimizer=keras.optimizers.Adam(), loss=cust_loss)
+        hist = model.fit(np.array([1, 0, 2, 0.5]), np.array([1, 1, 0, 0.5]), epochs=1)
+        weights_adjusted = list(map(lambda x: x / sum(weights), weights))
+        expected = (weights_adjusted[0] * 1.25 + weights_adjusted[1] * 0.140625)
+        assert np.testing.assert_almost_equal(hist.history['loss'][0], expected, decimal=6) is None
+
+    def test_custom_loss_invalid_weights(self):
+        with pytest.raises(AssertionError):
+            custom_loss([l_p_loss(2), var_loss], [0.3])
+        with pytest.raises(AssertionError):
+            custom_loss([l_p_loss(2), var_loss], [0.4, 3, 1])
-- 
GitLab


From 29c65d634d9107b6251ad4d83fc72e4b391f08bc Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 8 Mar 2021 15:02:20 +0100
Subject: [PATCH 025/175] custom objects were missing

---
 mlair/model_modules/fully_connected_networks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 45b8eb63..9e3657c3 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -95,7 +95,7 @@ class FCN(AbstractModelClass):
         # apply to model
         self.set_model()
         self.set_compile_options()
-        # self.set_custom_objects(loss=self.compile_options['loss'])
+        self.set_custom_objects(loss=custom_loss([keras.losses.mean_squared_error, var_loss]), var_loss=var_loss)
 
     def _set_activation(self, activation):
         try:
-- 
GitLab


From b7e2b29a040c812d34fcb70e623a5a7b01b263f7 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 9 Mar 2021 10:36:48 +0100
Subject: [PATCH 026/175] select from dict can remove none entries, FCN
 supports dropout and l1/l2 regularization

---
 mlair/helpers/helpers.py                      |  3 +-
 .../model_modules/fully_connected_networks.py | 36 +++++++++++++++++--
 test/test_helpers/test_helpers.py             |  6 +++-
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/mlair/helpers/helpers.py b/mlair/helpers/helpers.py
index ee727ef5..b57b733b 100644
--- a/mlair/helpers/helpers.py
+++ b/mlair/helpers/helpers.py
@@ -103,7 +103,7 @@ def remove_items(obj: Union[List, Dict], items: Any):
         raise TypeError(f"{inspect.stack()[0][3]} does not support type {type(obj)}.")
 
 
-def select_from_dict(dict_obj: dict, sel_list: Any):
+def select_from_dict(dict_obj: dict, sel_list: Any, remove_none=False):
     """
     Extract all key values pairs whose key is contained in the sel_list.
 
@@ -113,6 +113,7 @@ def select_from_dict(dict_obj: dict, sel_list: Any):
     sel_list = to_list(sel_list)
     assert isinstance(dict_obj, dict)
     sel_dict = {k: v for k, v in dict_obj.items() if k in sel_list}
+    sel_dict = sel_dict if not remove_none else {k: v for k, v in sel_dict.items() if v is not None}
     return sel_dict
 
 
diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 9e3657c3..948d2b06 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -69,10 +69,11 @@ class FCN(AbstractModelClass):
                    "selu": partial(keras.layers.Activation, "selu")}
     _initializer = {"selu": keras.initializers.lecun_normal()}
     _optimizer = {"adam": keras.optimizers.adam, "sgd": keras.optimizers.SGD}
-    _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov"]
+    _regularizer = {"l1": keras.regularizers.l1, "l2": keras.regularizers.l2, "l1_l2": keras.regularizers.l1_l2}
+    _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov", "l1", "l2"]
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
-                 optimizer="adam", n_layer=1, n_hidden=10, **kwargs):
+                 optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, **kwargs):
         """
         Sets model and loss depending on the given arguments.
 
@@ -91,6 +92,8 @@ class FCN(AbstractModelClass):
         self.layer_configuration = (n_layer, n_hidden)
         self._update_model_name()
         self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
+        self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
+        self.dropout = self._set_dropout(dropout)
 
         # apply to model
         self.set_model()
@@ -116,6 +119,30 @@ class FCN(AbstractModelClass):
         except KeyError:
             raise AttributeError(f"Given optimizer {optimizer} is not supported in this model class.")
 
+    def _set_regularizer(self, regularizer, **kwargs):
+        if regularizer is None:
+            return regularizer
+        try:
+            reg_name = regularizer.lower()
+            reg = self._regularizer.get(reg_name)
+            reg_kwargs = {}
+            if reg_name in ["l1", "l2"]:
+                reg_kwargs = select_from_dict(kwargs, reg_name, remove_none=True)
+                if reg_name in reg_kwargs:
+                    reg_kwargs["l"] = reg_kwargs.pop(reg_name)
+            elif reg_name == "l1_l2":
+                reg_kwargs = select_from_dict(kwargs, ["l1", "l2"], remove_none=True)
+            return reg(**reg_kwargs)
+        except KeyError:
+            raise AttributeError(f"Given regularizer {regularizer} is not supported in this model class.")
+
+    @staticmethod
+    def _set_dropout(dropout):
+        if dropout is None:
+            return dropout
+        assert 0 <= dropout < 1
+        return dropout
+
     def _update_model_name(self):
         n_layer, n_hidden = self.layer_configuration
         n_input = str(reduce(lambda x, y: x * y, self._input_shape))
@@ -130,8 +157,11 @@ class FCN(AbstractModelClass):
         x_in = keras.layers.Flatten()(x_input)
         n_layer, n_hidden = self.layer_configuration
         for layer in range(n_layer):
-            x_in = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer)(x_in)
+            x_in = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer,
+                                      kernel_regularizer=self.kernel_regularizer)(x_in)
             x_in = self.activation()(x_in)
+            if self.dropout is not None:
+                x_in = keras.layers.Dropout(self.dropout)(x_in)
         x_in = keras.layers.Dense(self._output_shape)(x_in)
         out = self.activation_output()(x_in)
         self.model = keras.Model(inputs=x_input, outputs=[out])
diff --git a/test/test_helpers/test_helpers.py b/test/test_helpers/test_helpers.py
index f2e2b341..91f2278a 100644
--- a/test/test_helpers/test_helpers.py
+++ b/test/test_helpers/test_helpers.py
@@ -175,7 +175,7 @@ class TestSelectFromDict:
 
     @pytest.fixture
     def dictionary(self):
-        return {"a": 1, "b": 23, "c": "last"}
+        return {"a": 1, "b": 23, "c": "last", "e": None}
 
     def test_select(self, dictionary):
         assert select_from_dict(dictionary, "c") == {"c": "last"}
@@ -186,6 +186,10 @@ class TestSelectFromDict:
         with pytest.raises(AssertionError):
             select_from_dict(["we"], "now")
 
+    def test_select_remove_none(self, dictionary):
+        assert select_from_dict(dictionary, ["a", "e"]) == {"a": 1, "e": None}
+        assert select_from_dict(dictionary, ["a", "e"], remove_none=True) == {"a": 1}
+
 
 class TestRemoveItems:
 
-- 
GitLab


From 17e37aa85b22b615155cd0938123ad4a451a93b9 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 9 Mar 2021 19:16:05 +0100
Subject: [PATCH 027/175] catch if regularizer is None but as string

---
 mlair/model_modules/fully_connected_networks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 948d2b06..1f965f3c 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -120,8 +120,8 @@ class FCN(AbstractModelClass):
             raise AttributeError(f"Given optimizer {optimizer} is not supported in this model class.")
 
     def _set_regularizer(self, regularizer, **kwargs):
-        if regularizer is None:
-            return regularizer
+        if regularizer is None or (isinstance(regularizer, str) and regularizer.lower() == "none"):
+            return None
         try:
             reg_name = regularizer.lower()
             reg = self._regularizer.get(reg_name)
-- 
GitLab


From 920fbcb80c20462b440355233bd2aeb230bd4bca Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 10 Mar 2021 16:33:41 +0100
Subject: [PATCH 028/175] FCN can use an explicite layer configuration,
 activation layers are now named according to the act fct

---
 .../model_modules/fully_connected_networks.py | 37 +++++++++++++------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 1f965f3c..fb2ee26e 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -73,7 +73,8 @@ class FCN(AbstractModelClass):
     _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov", "l1", "l2"]
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
-                 optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, **kwargs):
+                 optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, explicite_layers=None,
+                 **kwargs):
         """
         Sets model and loss depending on the given arguments.
 
@@ -89,7 +90,7 @@ class FCN(AbstractModelClass):
         self.activation = self._set_activation(activation)
         self.activation_output = self._set_activation(activation_output)
         self.optimizer = self._set_optimizer(optimizer, **kwargs)
-        self.layer_configuration = (n_layer, n_hidden)
+        self.layer_configuration = (n_layer, n_hidden) if explicite_layers is None else explicite_layers
         self._update_model_name()
         self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
         self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
@@ -144,10 +145,13 @@ class FCN(AbstractModelClass):
         return dropout
 
     def _update_model_name(self):
-        n_layer, n_hidden = self.layer_configuration
         n_input = str(reduce(lambda x, y: x * y, self._input_shape))
         n_output = str(self._output_shape)
-        self.model_name += "_".join(["", n_input, *[f"{n_hidden}" for _ in range(n_layer)], n_output])
+        if isinstance(self.layer_configuration, tuple) and len(self.layer_configuration) == 2:
+            n_layer, n_hidden = self.layer_configuration
+            self.model_name += "_".join(["", n_input, *[f"{n_hidden}" for _ in range(n_layer)], n_output])
+        else:
+            self.model_name += "_".join(["", n_input, *[f"{n}" for n in self.layer_configuration], n_output])
 
     def set_model(self):
         """
@@ -155,15 +159,24 @@ class FCN(AbstractModelClass):
         """
         x_input = keras.layers.Input(shape=self._input_shape)
         x_in = keras.layers.Flatten()(x_input)
-        n_layer, n_hidden = self.layer_configuration
-        for layer in range(n_layer):
-            x_in = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer,
-                                      kernel_regularizer=self.kernel_regularizer)(x_in)
-            x_in = self.activation()(x_in)
-            if self.dropout is not None:
-                x_in = keras.layers.Dropout(self.dropout)(x_in)
+        if isinstance(self.layer_configuration, tuple) is True:
+            n_layer, n_hidden = self.layer_configuration
+            for layer in range(n_layer):
+                x_in = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer,
+                                          kernel_regularizer=self.kernel_regularizer)(x_in)
+                x_in = self.activation(name=f"{self.activation.args[0]}_{layer + 1}")(x_in)
+                if self.dropout is not None:
+                    x_in = keras.layers.Dropout(self.dropout)(x_in)
+        else:
+            assert isinstance(self.layer_configuration, list) is True
+            for layer, n_hidden in enumerate(self.layer_configuration):
+                x_in = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer,
+                                          kernel_regularizer=self.kernel_regularizer)(x_in)
+                x_in = self.activation(name=f"{self.activation.args[0]}_{layer + 1}")(x_in)
+                if self.dropout is not None:
+                    x_in = keras.layers.Dropout(self.dropout)(x_in)
         x_in = keras.layers.Dense(self._output_shape)(x_in)
-        out = self.activation_output()(x_in)
+        out = self.activation_output(name=f"{self.activation_output.args[0]}_output")(x_in)
         self.model = keras.Model(inputs=x_input, outputs=[out])
 
     def set_compile_options(self):
-- 
GitLab


From 584668a20b42e431a61c3286f80500c094eb6732 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 10 Mar 2021 16:46:15 +0100
Subject: [PATCH 029/175] renamed explicite_layer to layer_configuration

---
 mlair/model_modules/fully_connected_networks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index fb2ee26e..1fd61d98 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -73,7 +73,7 @@ class FCN(AbstractModelClass):
     _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov", "l1", "l2"]
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
-                 optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, explicite_layers=None,
+                 optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None,
                  **kwargs):
         """
         Sets model and loss depending on the given arguments.
@@ -90,7 +90,7 @@ class FCN(AbstractModelClass):
         self.activation = self._set_activation(activation)
         self.activation_output = self._set_activation(activation_output)
         self.optimizer = self._set_optimizer(optimizer, **kwargs)
-        self.layer_configuration = (n_layer, n_hidden) if explicite_layers is None else explicite_layers
+        self.layer_configuration = (n_layer, n_hidden) if layer_configuration is None else layer_configuration
         self._update_model_name()
         self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
         self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
-- 
GitLab


From 8b5f1346ce54c7c7d0e0d43b8983877fbeb12df4 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 10 Mar 2021 17:24:16 +0100
Subject: [PATCH 030/175] FCN activation name now works with relu

---
 mlair/model_modules/fully_connected_networks.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 1fd61d98..007b8f0d 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -88,7 +88,9 @@ class FCN(AbstractModelClass):
 
         # settings
         self.activation = self._set_activation(activation)
+        self.activation_name = activation
         self.activation_output = self._set_activation(activation_output)
+        self.activation_output_name = activation_output
         self.optimizer = self._set_optimizer(optimizer, **kwargs)
         self.layer_configuration = (n_layer, n_hidden) if layer_configuration is None else layer_configuration
         self._update_model_name()
@@ -164,7 +166,7 @@ class FCN(AbstractModelClass):
             for layer in range(n_layer):
                 x_in = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer,
                                           kernel_regularizer=self.kernel_regularizer)(x_in)
-                x_in = self.activation(name=f"{self.activation.args[0]}_{layer + 1}")(x_in)
+                x_in = self.activation(name=f"{self.activation_name}_{layer + 1}")(x_in)
                 if self.dropout is not None:
                     x_in = keras.layers.Dropout(self.dropout)(x_in)
         else:
@@ -172,11 +174,11 @@ class FCN(AbstractModelClass):
             for layer, n_hidden in enumerate(self.layer_configuration):
                 x_in = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer,
                                           kernel_regularizer=self.kernel_regularizer)(x_in)
-                x_in = self.activation(name=f"{self.activation.args[0]}_{layer + 1}")(x_in)
+                x_in = self.activation(name=f"{self.activation_name}_{layer + 1}")(x_in)
                 if self.dropout is not None:
                     x_in = keras.layers.Dropout(self.dropout)(x_in)
         x_in = keras.layers.Dense(self._output_shape)(x_in)
-        out = self.activation_output(name=f"{self.activation_output.args[0]}_output")(x_in)
+        out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in)
         self.model = keras.Model(inputs=x_input, outputs=[out])
 
     def set_compile_options(self):
-- 
GitLab


From 33940965f7812decca45b5e23ebafcaaff243d10 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 12 Mar 2021 12:02:20 +0100
Subject: [PATCH 031/175] first CNN class try

---
 mlair/model_modules/convolutional_networks.py | 113 ++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 mlair/model_modules/convolutional_networks.py

diff --git a/mlair/model_modules/convolutional_networks.py b/mlair/model_modules/convolutional_networks.py
new file mode 100644
index 00000000..f9acdb72
--- /dev/null
+++ b/mlair/model_modules/convolutional_networks.py
@@ -0,0 +1,113 @@
+__author__ = "Lukas Leufen"
+__date__ = '2021-02-'
+
+from functools import reduce, partial
+
+from mlair.model_modules import AbstractModelClass
+from mlair.helpers import select_from_dict
+from mlair.model_modules.loss import var_loss, custom_loss
+from mlair.model_modules.advanced_paddings import PadUtils, Padding2D, SymmetricPadding2D
+
+import keras
+
+
+class CNN(AbstractModelClass):
+    _activation = {"relu": keras.layers.ReLU, "tanh": partial(keras.layers.Activation, "tanh"),
+                   "sigmoid": partial(keras.layers.Activation, "sigmoid"),
+                   "linear": partial(keras.layers.Activation, "linear"),
+                   "selu": partial(keras.layers.Activation, "selu")}
+    _initializer = {"selu": keras.initializers.lecun_normal()}
+    _optimizer = {"adam": keras.optimizers.adam}
+    _regularizer = {"l1": keras.regularizers.l1, "l2": keras.regularizers.l2, "l1_l2": keras.regularizers.l1_l2}
+    _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad"]
+
+    def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
+                 optimizer="adam", regularizer=None, **kwargs):
+
+        assert len(input_shape) == 1
+        assert len(output_shape) == 1
+        super().__init__(input_shape[0], output_shape[0])
+
+        # settings
+        self.activation = self._set_activation(activation)
+        self.activation_name = activation
+        self.activation_output = self._set_activation(activation_output)
+        self.activation_output_name = activation_output
+        self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
+        self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
+        self.optimizer = self._set_optimizer(optimizer, **kwargs)
+
+        # apply to model
+        self.set_model()
+        self.set_compile_options()
+        self.set_custom_objects(loss=custom_loss([keras.losses.mean_squared_error, var_loss]), var_loss=var_loss)
+
+    def _set_activation(self, activation):
+        try:
+            return self._activation.get(activation.lower())
+        except KeyError:
+            raise AttributeError(f"Given activation {activation} is not supported in this model class.")
+
+    def _set_optimizer(self, optimizer, **kwargs):
+        try:
+            opt_name = optimizer.lower()
+            opt = self._optimizer.get(opt_name)
+            opt_kwargs = {}
+            if opt_name == "adam":
+                opt_kwargs = select_from_dict(kwargs, ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad"])
+            return opt(**opt_kwargs)
+        except KeyError:
+            raise AttributeError(f"Given optimizer {optimizer} is not supported in this model class.")
+
+    def _set_regularizer(self, regularizer, **kwargs):
+        if regularizer is None or (isinstance(regularizer, str) and regularizer.lower() == "none"):
+            return None
+        try:
+            reg_name = regularizer.lower()
+            reg = self._regularizer.get(reg_name)
+            reg_kwargs = {}
+            if reg_name in ["l1", "l2"]:
+                reg_kwargs = select_from_dict(kwargs, reg_name, remove_none=True)
+                if reg_name in reg_kwargs:
+                    reg_kwargs["l"] = reg_kwargs.pop(reg_name)
+            elif reg_name == "l1_l2":
+                reg_kwargs = select_from_dict(kwargs, ["l1", "l2"], remove_none=True)
+            return reg(**reg_kwargs)
+        except KeyError:
+            raise AttributeError(f"Given regularizer {regularizer} is not supported in this model class.")
+
+    def set_model(self):
+        """
+        Build the model.
+        """
+        x_input = keras.layers.Input(shape=self._input_shape)
+        kernel = (1, 1)
+        pad_size = PadUtils.get_padding_for_same(kernel)
+        x_in = Padding2D("SymPad2D")(padding=pad_size, name="SymPad")(x_input)
+        x_in = keras.layers.Conv2D(filters=16, kernel_size=kernel,
+                                   kernel_initializer=self.kernel_initializer,
+                                   kernel_regularizer=self.kernel_regularizer)(x_in)
+        x_in = self.activation()(x_in)
+        x_in = keras.layers.Conv2D(filters=32, kernel_size=kernel,
+                                   kernel_initializer=self.kernel_initializer,
+                                   kernel_regularizer=self.kernel_regularizer)(x_in)
+        x_in = self.activation()(x_in)
+        x_in = Padding2D("SymPad2D")(padding=pad_size, name="SymPad")(x_in)
+        x_in = keras.layers.Conv2D(filters=64, kernel_size=kernel,
+                                   kernel_initializer=self.kernel_initializer,
+                                   kernel_regularizer=self.kernel_regularizer)(x_in)
+        x_in = self.activation()(x_in)
+        x_in = keras.layers.Flatten()(x_in)
+        x_in = keras.layers.Dense(64, kernel_initializer=self.kernel_initializer,
+                                  kernel_regularizer=self.kernel_regularizer)(x_in)
+        x_in = self.activation()(x_in)
+        x_in = keras.layers.Dense(16, kernel_initializer=self.kernel_initializer,
+                                  kernel_regularizer=self.kernel_regularizer)(x_in)
+        x_in = self.activation()(x_in)
+        x_in = keras.layers.Dense(self._output_shape)(x_in)
+        out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in)
+        self.model = keras.Model(inputs=x_input, outputs=[out])
+
+    def set_compile_options(self):
+        self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss])],
+                                "metrics": ["mse", "mae", var_loss]}
-- 
GitLab


From 30c27e99c0daf6ce0620745c23258c5da18450f8 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 12 Mar 2021 12:15:23 +0100
Subject: [PATCH 032/175] new pad layer names

---
 mlair/model_modules/convolutional_networks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlair/model_modules/convolutional_networks.py b/mlair/model_modules/convolutional_networks.py
index f9acdb72..e7d1da23 100644
--- a/mlair/model_modules/convolutional_networks.py
+++ b/mlair/model_modules/convolutional_networks.py
@@ -83,7 +83,7 @@ class CNN(AbstractModelClass):
         x_input = keras.layers.Input(shape=self._input_shape)
         kernel = (1, 1)
         pad_size = PadUtils.get_padding_for_same(kernel)
-        x_in = Padding2D("SymPad2D")(padding=pad_size, name="SymPad")(x_input)
+        x_in = Padding2D("SymPad2D")(padding=pad_size, name="SymPad1")(x_input)
         x_in = keras.layers.Conv2D(filters=16, kernel_size=kernel,
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
@@ -92,7 +92,7 @@ class CNN(AbstractModelClass):
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
         x_in = self.activation()(x_in)
-        x_in = Padding2D("SymPad2D")(padding=pad_size, name="SymPad")(x_in)
+        x_in = Padding2D("SymPad2D")(padding=pad_size, name="SymPad2")(x_in)
         x_in = keras.layers.Conv2D(filters=64, kernel_size=kernel,
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
-- 
GitLab


From 7e529068a8f5c7c0010a2410e9e8389d667e4cd9 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 12 Mar 2021 12:43:19 +0100
Subject: [PATCH 033/175] bigger kernel

---
 mlair/model_modules/convolutional_networks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlair/model_modules/convolutional_networks.py b/mlair/model_modules/convolutional_networks.py
index e7d1da23..0a16be7c 100644
--- a/mlair/model_modules/convolutional_networks.py
+++ b/mlair/model_modules/convolutional_networks.py
@@ -81,7 +81,7 @@ class CNN(AbstractModelClass):
         Build the model.
         """
         x_input = keras.layers.Input(shape=self._input_shape)
-        kernel = (1, 1)
+        kernel = (5, 1)
         pad_size = PadUtils.get_padding_for_same(kernel)
         x_in = Padding2D("SymPad2D")(padding=pad_size, name="SymPad1")(x_input)
         x_in = keras.layers.Conv2D(filters=16, kernel_size=kernel,
@@ -91,8 +91,8 @@ class CNN(AbstractModelClass):
         x_in = keras.layers.Conv2D(filters=32, kernel_size=kernel,
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
+        x_in = keras.layers.MaxPooling2D(kernel, strides=(1, 1), padding='valid')(x_in)
         x_in = self.activation()(x_in)
-        x_in = Padding2D("SymPad2D")(padding=pad_size, name="SymPad2")(x_in)
         x_in = keras.layers.Conv2D(filters=64, kernel_size=kernel,
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
-- 
GitLab


From 7673e830a77c78f499c82909ca51d52e282a3609 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 12 Mar 2021 12:44:41 +0100
Subject: [PATCH 034/175] kernel size can be set from outside

---
 mlair/model_modules/convolutional_networks.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlair/model_modules/convolutional_networks.py b/mlair/model_modules/convolutional_networks.py
index 0a16be7c..5146fe52 100644
--- a/mlair/model_modules/convolutional_networks.py
+++ b/mlair/model_modules/convolutional_networks.py
@@ -22,7 +22,7 @@ class CNN(AbstractModelClass):
     _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad"]
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
-                 optimizer="adam", regularizer=None, **kwargs):
+                 optimizer="adam", regularizer=None, kernel_size=1, **kwargs):
 
         assert len(input_shape) == 1
         assert len(output_shape) == 1
@@ -35,6 +35,7 @@ class CNN(AbstractModelClass):
         self.activation_output_name = activation_output
         self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
         self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
+        self.kernel_size = kernel_size
         self.optimizer = self._set_optimizer(optimizer, **kwargs)
 
         # apply to model
@@ -81,7 +82,7 @@ class CNN(AbstractModelClass):
         Build the model.
         """
         x_input = keras.layers.Input(shape=self._input_shape)
-        kernel = (5, 1)
+        kernel = (self.kernel_size, 1)
         pad_size = PadUtils.get_padding_for_same(kernel)
         x_in = Padding2D("SymPad2D")(padding=pad_size, name="SymPad1")(x_input)
         x_in = keras.layers.Conv2D(filters=16, kernel_size=kernel,
-- 
GitLab


From 452b590c4b36008a70d1831da01874b9d4d90ac8 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 12 Mar 2021 14:47:18 +0100
Subject: [PATCH 035/175] no sympad for CNN

---
 mlair/model_modules/convolutional_networks.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mlair/model_modules/convolutional_networks.py b/mlair/model_modules/convolutional_networks.py
index 5146fe52..2d8fd9e2 100644
--- a/mlair/model_modules/convolutional_networks.py
+++ b/mlair/model_modules/convolutional_networks.py
@@ -83,17 +83,15 @@ class CNN(AbstractModelClass):
         """
         x_input = keras.layers.Input(shape=self._input_shape)
         kernel = (self.kernel_size, 1)
-        pad_size = PadUtils.get_padding_for_same(kernel)
-        x_in = Padding2D("SymPad2D")(padding=pad_size, name="SymPad1")(x_input)
         x_in = keras.layers.Conv2D(filters=16, kernel_size=kernel,
                                    kernel_initializer=self.kernel_initializer,
-                                   kernel_regularizer=self.kernel_regularizer)(x_in)
+                                   kernel_regularizer=self.kernel_regularizer)(x_input)
         x_in = self.activation()(x_in)
         x_in = keras.layers.Conv2D(filters=32, kernel_size=kernel,
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
-        x_in = keras.layers.MaxPooling2D(kernel, strides=(1, 1), padding='valid')(x_in)
         x_in = self.activation()(x_in)
+        x_in = keras.layers.MaxPooling2D(kernel, strides=(1, 1), padding='valid')(x_in)
         x_in = keras.layers.Conv2D(filters=64, kernel_size=kernel,
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
-- 
GitLab


From a9640da66f529382b584ea9c2dabf3ec420d7e34 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 12 Mar 2021 15:07:38 +0100
Subject: [PATCH 036/175] fix kernelsize for now

---
 mlair/model_modules/convolutional_networks.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlair/model_modules/convolutional_networks.py b/mlair/model_modules/convolutional_networks.py
index 2d8fd9e2..329d1952 100644
--- a/mlair/model_modules/convolutional_networks.py
+++ b/mlair/model_modules/convolutional_networks.py
@@ -83,16 +83,16 @@ class CNN(AbstractModelClass):
         """
         x_input = keras.layers.Input(shape=self._input_shape)
         kernel = (self.kernel_size, 1)
-        x_in = keras.layers.Conv2D(filters=16, kernel_size=kernel,
+        x_in = keras.layers.Conv2D(filters=16, kernel_size=(73, 1),
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_input)
         x_in = self.activation()(x_in)
-        x_in = keras.layers.Conv2D(filters=32, kernel_size=kernel,
+        x_in = keras.layers.Conv2D(filters=32, kernel_size=(49, 1),
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
         x_in = self.activation()(x_in)
-        x_in = keras.layers.MaxPooling2D(kernel, strides=(1, 1), padding='valid')(x_in)
-        x_in = keras.layers.Conv2D(filters=64, kernel_size=kernel,
+        x_in = keras.layers.MaxPooling2D((25, 1), strides=(1, 1), padding='valid')(x_in)
+        x_in = keras.layers.Conv2D(filters=64, kernel_size=(13, 1),
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
         x_in = self.activation()(x_in)
-- 
GitLab


From 56f3657c1c07eaa2b617ebf5b2d7435d7f97faa7 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 12 Mar 2021 15:24:49 +0100
Subject: [PATCH 037/175] changed dense layer

---
 mlair/model_modules/convolutional_networks.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mlair/model_modules/convolutional_networks.py b/mlair/model_modules/convolutional_networks.py
index 329d1952..c4a10990 100644
--- a/mlair/model_modules/convolutional_networks.py
+++ b/mlair/model_modules/convolutional_networks.py
@@ -82,7 +82,6 @@ class CNN(AbstractModelClass):
         Build the model.
         """
         x_input = keras.layers.Input(shape=self._input_shape)
-        kernel = (self.kernel_size, 1)
         x_in = keras.layers.Conv2D(filters=16, kernel_size=(73, 1),
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_input)
@@ -97,10 +96,10 @@ class CNN(AbstractModelClass):
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
         x_in = self.activation()(x_in)
         x_in = keras.layers.Flatten()(x_in)
-        x_in = keras.layers.Dense(64, kernel_initializer=self.kernel_initializer,
+        x_in = keras.layers.Dense(128, kernel_initializer=self.kernel_initializer,
                                   kernel_regularizer=self.kernel_regularizer)(x_in)
         x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(16, kernel_initializer=self.kernel_initializer,
+        x_in = keras.layers.Dense(32, kernel_initializer=self.kernel_initializer,
                                   kernel_regularizer=self.kernel_regularizer)(x_in)
         x_in = self.activation()(x_in)
         x_in = keras.layers.Dense(self._output_shape)(x_in)
-- 
GitLab


From 2ba4fe1a3edb7db4479f5fa083ffc7312f36e99d Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 12 Mar 2021 16:45:02 +0100
Subject: [PATCH 038/175] first fix to be able to run again

---
 mlair/data_handler/data_handler_mixed_sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index caaa7a62..8159abda 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -204,7 +204,7 @@ class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFil
         time_deltas = np.round(self.time_delta(self.cutoff_period)).astype(int)
         start, end = window, 1
         res = []
-        window_array = self.create_index_array(self.window_dim.range(start, end), squeeze_dim=self.target_dim)
+        window_array = self.create_index_array(self.window_dim, range(start, end), squeeze_dim=self.target_dim)
         for delta, filter_name in zip(np.append(time_deltas, 1), data.coords["filter"]):
             res_filter = []
             data_filter = data.sel({"filter": filter_name})
-- 
GitLab


From de191bedaa41dec1555a492627b06d969a52f172 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 12 Mar 2021 17:09:09 +0100
Subject: [PATCH 039/175] new req bottleneck improves speed for kzf

---
 HPC_setup/requirements_HDFML_additionals.txt  | 1 +
 HPC_setup/requirements_JUWELS_additionals.txt | 1 +
 mlair/helpers/statistics.py                   | 4 +++-
 requirements.txt                              | 1 +
 requirements_gpu.txt                          | 1 +
 5 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/HPC_setup/requirements_HDFML_additionals.txt b/HPC_setup/requirements_HDFML_additionals.txt
index 12e09ccd..26e335d5 100644
--- a/HPC_setup/requirements_HDFML_additionals.txt
+++ b/HPC_setup/requirements_HDFML_additionals.txt
@@ -2,6 +2,7 @@ absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
 attrs==20.3.0
+bottleneck==1.3.2
 cached-property==1.5.2
 certifi==2020.12.5
 cftime==1.4.1
diff --git a/HPC_setup/requirements_JUWELS_additionals.txt b/HPC_setup/requirements_JUWELS_additionals.txt
index 12e09ccd..26e335d5 100644
--- a/HPC_setup/requirements_JUWELS_additionals.txt
+++ b/HPC_setup/requirements_JUWELS_additionals.txt
@@ -2,6 +2,7 @@ absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
 attrs==20.3.0
+bottleneck==1.3.2
 cached-property==1.5.2
 certifi==2020.12.5
 cftime==1.4.1
diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index 3631597a..57d7802e 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -616,9 +616,11 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
              wl(int): a window length
              itr(int): a number of iteration
         """
+        import warnings
+        warnings.filterwarnings("ignore")
         df_itr = df.__deepcopy__()
         try:
-            kwargs = {"min_periods": 1,
+            kwargs = {"min_periods": int(0.7 * wl),
                       "center": True,
                       self.filter_dim: wl}
             iter_vars = df_itr.coords["variables"].values
diff --git a/requirements.txt b/requirements.txt
index b0a6e7f5..51d6e023 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,6 +2,7 @@ absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
 attrs==20.3.0
+bottleneck==1.3.2
 cached-property==1.5.2
 certifi==2020.12.5
 cftime==1.4.1
diff --git a/requirements_gpu.txt b/requirements_gpu.txt
index 35fe0d5e..11a5c8ae 100644
--- a/requirements_gpu.txt
+++ b/requirements_gpu.txt
@@ -2,6 +2,7 @@ absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
 attrs==20.3.0
+bottleneck==1.3.2
 cached-property==1.5.2
 certifi==2020.12.5
 cftime==1.4.1
-- 
GitLab


From 7376d0c9d2fadaaa926e23f6120b34315a826d4f Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 15 Mar 2021 12:52:16 +0100
Subject: [PATCH 040/175] use he init when using relu activations

---
 mlair/model_modules/fully_connected_networks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 007b8f0d..7108d9a3 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -67,7 +67,8 @@ class FCN(AbstractModelClass):
                    "sigmoid": partial(keras.layers.Activation, "sigmoid"),
                    "linear": partial(keras.layers.Activation, "linear"),
                    "selu": partial(keras.layers.Activation, "selu")}
-    _initializer = {"selu": keras.initializers.lecun_normal()}
+    _initializer = {"tanh": "glorot_uniform", "sigmoid": "glorot_uniform", "linear": "glorot_uniform",
+                    "relu": keras.initializers.he_normal(), "selu": keras.initializers.lecun_normal()}
     _optimizer = {"adam": keras.optimizers.adam, "sgd": keras.optimizers.SGD}
     _regularizer = {"l1": keras.regularizers.l1, "l2": keras.regularizers.l2, "l1_l2": keras.regularizers.l1_l2}
     _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov", "l1", "l2"]
-- 
GitLab


From 1f13155f000f62c5bc8ab1e2f62c658bf6714bea Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 15 Mar 2021 14:03:50 +0100
Subject: [PATCH 041/175] added prelu activation, alpha dropout is used for
 selu act, FCN_64_32_16 is now an inheritance of FCN class

---
 mlair/model_modules/abstract_model_class.py   |   4 +-
 .../model_modules/fully_connected_networks.py | 100 +++++++-----------
 2 files changed, 43 insertions(+), 61 deletions(-)

diff --git a/mlair/model_modules/abstract_model_class.py b/mlair/model_modules/abstract_model_class.py
index 894ff7ac..989f4578 100644
--- a/mlair/model_modules/abstract_model_class.py
+++ b/mlair/model_modules/abstract_model_class.py
@@ -82,7 +82,7 @@ class AbstractModelClass(ABC):
         self.__custom_objects = value
 
     @property
-    def compile_options(self) -> Callable:
+    def compile_options(self) -> Dict:
         """
         The compile options property allows the user to use all keras.compile() arguments. They can ether be passed as
         dictionary (1), as attribute, without setting compile_options (2) or as mixture (partly defined as instance
@@ -116,7 +116,7 @@ class AbstractModelClass(ABC):
             def set_compile_options(self):
                 self.optimizer = keras.optimizers.SGD()
                 self.loss = keras.losses.mean_squared_error
-                self.compile_options = {"optimizer" = keras.optimizers.Adam(), "metrics": ["mse", "mae"]}
+                self.compile_options = {"optimizer": keras.optimizers.Adam(), "metrics": ["mse", "mae"]}
 
         Note:
         * As long as the attribute and the dict value have exactly the same values, the setter method will not raise
diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 7108d9a3..9fb08cdf 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -10,53 +10,6 @@ from mlair.model_modules.loss import var_loss, custom_loss
 import keras
 
 
-class FCN_64_32_16(AbstractModelClass):
-    """
-    A customised model 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the output layer depending
-    on the window_lead_time parameter.
-    """
-
-    def __init__(self, input_shape: list, output_shape: list):
-        """
-        Sets model and loss depending on the given arguments.
-
-        :param input_shape: list of input shapes (expect len=1 with shape=(window_hist, station, variables))
-        :param output_shape: list of output shapes (expect len=1 with shape=(window_forecast))
-        """
-
-        assert len(input_shape) == 1
-        assert len(output_shape) == 1
-        super().__init__(input_shape[0], output_shape[0])
-
-        # settings
-        self.activation = keras.layers.PReLU
-
-        # apply to model
-        self.set_model()
-        self.set_compile_options()
-        self.set_custom_objects(loss=self.compile_options['loss'])
-
-    def set_model(self):
-        """
-        Build the model.
-        """
-        x_input = keras.layers.Input(shape=self._input_shape)
-        x_in = keras.layers.Flatten()(x_input)
-        x_in = keras.layers.Dense(64, name="Dense_64")(x_in)
-        x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(32, name="Dense_32")(x_in)
-        x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(16, name="Dense_16")(x_in)
-        x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(self._output_shape, name="Dense_output")(x_in)
-        out_main = self.activation()(x_in)
-        self.model = keras.Model(inputs=x_input, outputs=[out_main])
-
-    def set_compile_options(self):
-        self.optimizer = keras.optimizers.adam(lr=1e-2)
-        self.compile_options = {"loss": [keras.losses.mean_squared_error], "metrics": ["mse", "mae"]}
-
-
 class FCN(AbstractModelClass):
     """
     A customisable fully connected network (64, 32, 16, window_lead_time), where the last layer is the output layer depending
@@ -66,12 +19,15 @@ class FCN(AbstractModelClass):
     _activation = {"relu": keras.layers.ReLU, "tanh": partial(keras.layers.Activation, "tanh"),
                    "sigmoid": partial(keras.layers.Activation, "sigmoid"),
                    "linear": partial(keras.layers.Activation, "linear"),
-                   "selu": partial(keras.layers.Activation, "selu")}
+                   "selu": partial(keras.layers.Activation, "selu"),
+                   "prelu": partial(keras.layers.PReLU, alpha_initializer=keras.initializers.constant(value=0.25))}
     _initializer = {"tanh": "glorot_uniform", "sigmoid": "glorot_uniform", "linear": "glorot_uniform",
-                    "relu": keras.initializers.he_normal(), "selu": keras.initializers.lecun_normal()}
+                    "relu": keras.initializers.he_normal(), "selu": keras.initializers.lecun_normal(),
+                    "prelu": keras.initializers.he_normal()}
     _optimizer = {"adam": keras.optimizers.adam, "sgd": keras.optimizers.SGD}
     _regularizer = {"l1": keras.regularizers.l1, "l2": keras.regularizers.l2, "l1_l2": keras.regularizers.l1_l2}
     _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov", "l1", "l2"]
+    _dropout = {"selu": keras.layers.AlphaDropout}
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
                  optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None,
@@ -97,12 +53,12 @@ class FCN(AbstractModelClass):
         self._update_model_name()
         self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
         self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
-        self.dropout = self._set_dropout(dropout)
+        self.dropout, self.dropout_rate = self._set_dropout(activation, dropout)
 
         # apply to model
         self.set_model()
         self.set_compile_options()
-        self.set_custom_objects(loss=custom_loss([keras.losses.mean_squared_error, var_loss]), var_loss=var_loss)
+        self.set_custom_objects(loss=self.compile_options["loss"][0], var_loss=var_loss)
 
     def _set_activation(self, activation):
         try:
@@ -140,12 +96,11 @@ class FCN(AbstractModelClass):
         except KeyError:
             raise AttributeError(f"Given regularizer {regularizer} is not supported in this model class.")
 
-    @staticmethod
-    def _set_dropout(dropout):
-        if dropout is None:
-            return dropout
-        assert 0 <= dropout < 1
-        return dropout
+    def _set_dropout(self, activation, dropout_rate):
+        if dropout_rate is None:
+            return None, None
+        assert 0 <= dropout_rate < 1
+        return self._dropout.get(activation, keras.layers.Dropout), dropout_rate
 
     def _update_model_name(self):
         n_input = str(reduce(lambda x, y: x * y, self._input_shape))
@@ -169,7 +124,7 @@ class FCN(AbstractModelClass):
                                           kernel_regularizer=self.kernel_regularizer)(x_in)
                 x_in = self.activation(name=f"{self.activation_name}_{layer + 1}")(x_in)
                 if self.dropout is not None:
-                    x_in = keras.layers.Dropout(self.dropout)(x_in)
+                    x_in = self.dropout(self.dropout_rate)(x_in)
         else:
             assert isinstance(self.layer_configuration, list) is True
             for layer, n_hidden in enumerate(self.layer_configuration):
@@ -177,7 +132,7 @@ class FCN(AbstractModelClass):
                                           kernel_regularizer=self.kernel_regularizer)(x_in)
                 x_in = self.activation(name=f"{self.activation_name}_{layer + 1}")(x_in)
                 if self.dropout is not None:
-                    x_in = keras.layers.Dropout(self.dropout)(x_in)
+                    x_in = self.dropout(self.dropout_rate)(x_in)
         x_in = keras.layers.Dense(self._output_shape)(x_in)
         out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in)
         self.model = keras.Model(inputs=x_input, outputs=[out])
@@ -185,3 +140,30 @@ class FCN(AbstractModelClass):
     def set_compile_options(self):
         self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss])],
                                 "metrics": ["mse", "mae", var_loss]}
+
+
+class FCN_64_32_16(FCN):
+    """
+    A customised model 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the output layer depending
+    on the window_lead_time parameter.
+    """
+
+    _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad"]
+
+    def __init__(self, input_shape: list, output_shape: list, **kwargs):
+        """
+        Sets model and loss depending on the given arguments.
+
+        :param input_shape: list of input shapes (expect len=1 with shape=(window_hist, station, variables))
+        :param output_shape: list of output shapes (expect len=1 with shape=(window_forecast))
+        """
+        lr = kwargs.pop("lr", 1e-2)
+        super().__init__(input_shape, output_shape, activation="prelu", activation_output="linear",
+                         layer_configuration=[64, 32, 16], optimizer="adam", lr=lr, **kwargs)
+
+    def set_compile_options(self):
+        self.compile_options = {"loss": [keras.losses.mean_squared_error], "metrics": ["mse", "mae"]}
+
+    def _update_model_name(self):
+        self.model_name = "FCN"
+        super()._update_model_name()
-- 
GitLab


From 96942e6e3ac9875ce8b78ab8cb999be85e1e4918 Mon Sep 17 00:00:00 2001
From: lukas leufen <l.leufen@fz-juelich.de>
Date: Tue, 16 Mar 2021 09:41:42 +0000
Subject: [PATCH 042/175] try to fix six error

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index b0a6e7f5..c4b281bb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -51,7 +51,7 @@ PyYAML==5.4.1
 requests==2.25.1
 scipy==1.5.4
 seaborn==0.11.1
-six==1.15.0
+--ignore-installed six==1.15.0
 statsmodels==0.12.2
 tabulate==0.8.8
 tensorboard==1.13.1
-- 
GitLab


From 7aefc11fbe46e5807693ebdf45dfbc6fea96e569 Mon Sep 17 00:00:00 2001
From: lukas leufen <l.leufen@fz-juelich.de>
Date: Tue, 16 Mar 2021 09:46:14 +0000
Subject: [PATCH 043/175] change six installation

---
 .gitlab-ci.yml   | 2 +-
 requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index f4d042f0..eacbe3e2 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -42,7 +42,7 @@ tests (from scratch):
     - ./CI/update_badge.sh > /dev/null
   script:
     - pip install --upgrade pip
-    - pip install numpy wheel six
+    - pip install numpy wheel six==1.15.0
     - zypper --non-interactive install binutils libproj-devel gdal-devel
     - zypper --non-interactive install proj geos-devel
     #    - cat requirements.txt | cut -f1 -d"#" | sed '/^\s*$/d' | xargs -L 1 pip install
diff --git a/requirements.txt b/requirements.txt
index c4b281bb..b0a6e7f5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -51,7 +51,7 @@ PyYAML==5.4.1
 requests==2.25.1
 scipy==1.5.4
 seaborn==0.11.1
---ignore-installed six==1.15.0
+six==1.15.0
 statsmodels==0.12.2
 tabulate==0.8.8
 tensorboard==1.13.1
-- 
GitLab


From 614866546c57b9ac85eb9b173066dd703ba711e1 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 16 Mar 2021 14:44:24 +0100
Subject: [PATCH 044/175] kzf per variable seems to be faster than over
 variables, check on HPC

---
 .../data_handler_mixed_sampling.py            |  6 +++
 mlair/helpers/statistics.py                   | 48 ++++++++++++++++++-
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 8159abda..c56499dc 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -114,6 +114,12 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
         self._data = list(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
         self.set_inputs_and_targets()
         self.apply_kz_filter()
+        # lazy data loading on first time if possible
+        # * store the kz data locally in data path under different folder /e.g. kzf_data
+        # * create a checksum for the name and reuse this data always if checksum fits (this will replace all previous
+        #   steps and save a lot of computation time.
+        # lazy create of subsets by reusing as much as possible
+        # * start here when using preprocessed data, select new start and end
         if self.do_transformation is True:
             self.call_transform()
         self.make_samples()
diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index 57d7802e..0b73bc27 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -11,8 +11,10 @@ import pandas as pd
 from typing import Union, Tuple, Dict, List
 from matplotlib import pyplot as plt
 import itertools
+import gc
+import warnings
 
-from mlair.helpers import to_list
+from mlair.helpers import to_list, TimeTracking, TimeTrackingWrapper
 
 Data = Union[xr.DataArray, pd.DataFrame]
 
@@ -608,6 +610,48 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
         else:
             return None
 
+    @TimeTrackingWrapper
+    def kz_filter_new(self, df, wl, itr):
+        """
+        It passes the low frequency time series.
+
+        If filter method is from mean, max, min this method will call construct and rechunk before the actual
+        calculation to improve performance. If filter method is either median or percentile this approach is not
+        applicable and depending on the data and window size, this method can become slow.
+
+        Args:
+             wl(int): a window length
+             itr(int): a number of iteration
+        """
+        warnings.filterwarnings("ignore")
+        df_itr = df.__deepcopy__()
+        try:
+            kwargs = {"min_periods": int(0.7 * wl),
+                      "center": True,
+                      self.filter_dim: wl}
+            for i in np.arange(0, itr):
+                print(i)
+                rolling = df_itr.chunk().rolling(**kwargs)
+                if self.method not in ["percentile", "median"]:
+                    rolling = rolling.construct("construct").chunk("auto")
+                if self.method == "median":
+                    df_mv_avg_tmp = rolling.median()
+                elif self.method == "percentile":
+                    df_mv_avg_tmp = rolling.quantile(self.percentile)
+                elif self.method == "max":
+                    df_mv_avg_tmp = rolling.max("construct")
+                elif self.method == "min":
+                    df_mv_avg_tmp = rolling.min("construct")
+                else:
+                    df_mv_avg_tmp = rolling.mean("construct")
+                df_itr = df_mv_avg_tmp.compute()
+                del df_mv_avg_tmp, rolling
+                gc.collect()
+            return df_itr
+        except ValueError:
+            raise ValueError
+
+    @TimeTrackingWrapper
     def kz_filter(self, df, wl, itr):
         """
         It passes the low frequency time series.
@@ -639,7 +683,7 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
                     else:
                         df_mv_avg_tmp = rolling.mean()
                     df_itr_var = df_mv_avg_tmp.compute()
-                df_itr = df_itr.drop_sel(variables=var).combine_first(df_itr_var)
+                df_itr.loc[{"variables": [var]}] = df_itr_var
             return df_itr
         except ValueError:
             raise ValueError
-- 
GitLab


From faaa3388db7ba50ab7627de6537d4c23efad7e6d Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 16 Mar 2021 17:39:00 +0100
Subject: [PATCH 045/175] single compute call was missing

---
 mlair/data_handler/data_handler_mixed_sampling.py | 2 +-
 mlair/helpers/statistics.py                       | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index c56499dc..c62e18f2 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -218,7 +218,7 @@ class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFil
                 res_filter.append(data_filter.shift({dim: -w * delta}))
             res_filter = xr.concat(res_filter, dim=window_array).chunk()
             res.append(res_filter)
-        res = xr.concat(res, dim="filter")
+        res = xr.concat(res, dim="filter").compute()
         return res
 
     def estimate_filter_width(self):
diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index 0b73bc27..a8ba9795 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -669,8 +669,9 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
                       self.filter_dim: wl}
             iter_vars = df_itr.coords["variables"].values
             for var in iter_vars:
-                df_itr_var = df_itr.sel(variables=[var]).chunk()
+                df_itr_var = df_itr.sel(variables=[var])
                 for _ in np.arange(0, itr):
+                    df_itr_var = df_itr_var.chunk()
                     rolling = df_itr_var.rolling(**kwargs)
                     if self.method == "median":
                         df_mv_avg_tmp = rolling.median()
-- 
GitLab


From 891e208f7b0d56da314e3cc7a3ef275dcb0ac2ae Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 16 Mar 2021 20:10:00 +0100
Subject: [PATCH 046/175] assure that concat data has no nans

---
 mlair/helpers/statistics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index a8ba9795..3e99357c 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -440,7 +440,7 @@ class SkillScores:
         """Calculate CASE IV."""
         AI, BI, CI, data, suffix = self.skill_score_pre_calculations(internal_data, observation_name, forecast_name)
         monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, index=data.index)
-        data = xr.concat([data, monthly_mean_external], dim="type")
+        data = xr.concat([data, monthly_mean_external], dim="type").dropna(dim="index")
         mean, sigma = suffix["mean"], suffix["sigma"]
         mean_external = monthly_mean_external.mean()
         sigma_external = np.sqrt(monthly_mean_external.var())
-- 
GitLab


From b10bca2b5a4db56d1e5f9b36a9cfbe7fb094ece6 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 17 Mar 2021 10:07:19 +0100
Subject: [PATCH 047/175] data handler single station has now a submethod
 make_input_target

---
 mlair/data_handler/data_handler_kz_filter.py  |  9 ++----
 .../data_handler_mixed_sampling.py            | 15 ++--------
 .../data_handler_single_station.py            | 30 ++++++++++++++++---
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/mlair/data_handler/data_handler_kz_filter.py b/mlair/data_handler/data_handler_kz_filter.py
index 78638a13..face8f3c 100644
--- a/mlair/data_handler/data_handler_kz_filter.py
+++ b/mlair/data_handler/data_handler_kz_filter.py
@@ -38,10 +38,7 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
     def _check_sampling(self, **kwargs):
         assert kwargs.get("sampling") == "hourly"  # This data handler requires hourly data resolution
 
-    def setup_samples(self):
-        """
-        Setup samples. This method prepares and creates samples X, and labels Y.
-        """
+    def make_input_target(self):
         data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
                                          self.station_type, self.network, self.store_data_locally, self.data_origin)
         self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
@@ -54,9 +51,6 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
         # import matplotlib.pyplot as plt
         # self.input_data.sel(filter="74d", variables="temp", Stations="DEBW107").plot()
         # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
-        if self.do_transformation is True:
-            self.call_transform()
-        self.make_samples()
 
     @TimeTrackingWrapper
     def apply_kz_filter(self):
@@ -88,6 +82,7 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
         return self.history.transpose(self.time_dim, self.window_dim, self.iter_dim, self.target_dim,
                                       self.filter_dim).copy()
 
+
 class DataHandlerKzFilter(DefaultDataHandler):
     """Data handler using kz filtered data."""
 
diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index caaa7a62..ebcfbb42 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -54,15 +54,9 @@ class DataHandlerMixedSamplingSingleStation(DataHandlerSingleStation):
         assert len(parameter) == 2  # (inputs, targets)
         kwargs.update({parameter_name: parameter})
 
-    def setup_samples(self):
-        """
-        Setup samples. This method prepares and creates samples X, and labels Y.
-        """
+    def make_input_target(self):
         self._data = list(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
         self.set_inputs_and_targets()
-        if self.do_transformation is True:
-            self.call_transform()
-        self.make_samples()
 
     def load_and_interpolate(self, ind) -> [xr.DataArray, pd.DataFrame]:
         vars = [self.variables, self.target_var]
@@ -104,19 +98,14 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
     def _check_sampling(self, **kwargs):
         assert kwargs.get("sampling") == ("hourly", "daily")
 
-    def setup_samples(self):
+    def make_input_target(self):
         """
-        Setup samples. This method prepares and creates samples X, and labels Y.
-
         A KZ filter is applied on the input data that has hourly resolution. Lables Y are provided as aggregated values
         with daily resolution.
         """
         self._data = list(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
         self.set_inputs_and_targets()
         self.apply_kz_filter()
-        if self.do_transformation is True:
-            self.call_transform()
-        self.make_samples()
 
     def estimate_filter_width(self):
         """
diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index a894c635..820e601f 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -5,6 +5,7 @@ __date__ = '2020-07-20'
 
 import copy
 import datetime as dt
+import hashlib
 import logging
 import os
 from functools import reduce
@@ -54,10 +55,16 @@ class DataHandlerSingleStation(AbstractDataHandler):
                  interpolation_limit: Union[int, Tuple[int]] = DEFAULT_INTERPOLATION_LIMIT,
                  interpolation_method: Union[str, Tuple[str]] = DEFAULT_INTERPOLATION_METHOD,
                  overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True,
-                 min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None, **kwargs):
+                 min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None,
+                 lazy_loading: bool = False, **kwargs):
         super().__init__()
         self.station = helpers.to_list(station)
         self.path = self.setup_data_path(data_path, sampling)
+        self.lazy = lazy_loading
+        self.lazy_path = None
+        if self.lazy is True:
+            self.lazy_path = os.path.join(data_path, "lazy_data", self.__class__.__name__)
+            check_path_and_create(self.lazy_path)
         self.statistics_per_var = statistics_per_var
         self.data_origin = data_origin
         self.do_transformation = transformation is not None
@@ -94,6 +101,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
         self.observation = None
 
         # create samples
+        # self.hash()
         self.setup_samples()
 
     def __str__(self):
@@ -215,15 +223,18 @@ class DataHandlerSingleStation(AbstractDataHandler):
         """
         Setup samples. This method prepares and creates samples X, and labels Y.
         """
+        self.make_input_target()
+        if self.do_transformation is True:
+            self.call_transform()
+        self.make_samples()
+
+    def make_input_target(self):
         data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
                                          self.station_type, self.network, self.store_data_locally, self.data_origin,
                                          self.start, self.end)
         self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
                                       limit=self.interpolation_limit)
         self.set_inputs_and_targets()
-        if self.do_transformation is True:
-            self.call_transform()
-        self.make_samples()
 
     def set_inputs_and_targets(self):
         inputs = self._data.sel({self.target_dim: helpers.to_list(self.variables)})
@@ -658,6 +669,17 @@ class DataHandlerSingleStation(AbstractDataHandler):
         return self.transform(data, dim=dim, opts=self._transformation[pos], inverse=inverse,
                               transformation_dim=self.target_dim)
 
+    def _get_hash(self):
+        hash_list = [self.station, self.statistics_per_var, self.data_origin, self.station_type, self.network,
+                     self.sampling, self.target_dim, self.target_var, self.time_dim, self.iter_dim, self.window_dim,
+                     self.window_history_size, self.window_history_offset, self.window_lead_time,
+                     self.interpolation_limit, self.interpolation_method, self.min_length, self.start, self.end]
+
+        hash = "".join([str(e) for e in hash_list]).encode("utf-8")
+        m = hashlib.sha256()
+        m.update(hash)
+        return m.hexdigest()
+
 
 if __name__ == "__main__":
     # dp = AbstractDataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
-- 
GitLab


From 3a3cc762fa0e8616aaf879e24532895304f34298 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 17 Mar 2021 15:35:09 +0100
Subject: [PATCH 048/175] can create a hash from all important parameters, lazy
 loading works for all data handlers

---
 HPC_setup/requirements_HDFML_additionals.txt  |  1 +
 HPC_setup/requirements_JUWELS_additionals.txt |  1 +
 mlair/data_handler/abstract_data_handler.py   |  3 ++
 mlair/data_handler/data_handler_kz_filter.py  |  1 +
 .../data_handler_mixed_sampling.py            | 37 +++++++++++--
 .../data_handler_single_station.py            | 54 ++++++++++++++-----
 requirements.txt                              |  1 +
 requirements_gpu.txt                          |  1 +
 .../test_data_handler_mixed_sampling.py       |  2 +-
 9 files changed, 85 insertions(+), 16 deletions(-)

diff --git a/HPC_setup/requirements_HDFML_additionals.txt b/HPC_setup/requirements_HDFML_additionals.txt
index 12e09ccd..7d6163a6 100644
--- a/HPC_setup/requirements_HDFML_additionals.txt
+++ b/HPC_setup/requirements_HDFML_additionals.txt
@@ -9,6 +9,7 @@ chardet==4.0.0
 coverage==5.4
 cycler==0.10.0
 dask==2021.2.0
+dill==0.3.3
 fsspec==0.8.5
 gast==0.4.0
 grpcio==1.35.0
diff --git a/HPC_setup/requirements_JUWELS_additionals.txt b/HPC_setup/requirements_JUWELS_additionals.txt
index 12e09ccd..7d6163a6 100644
--- a/HPC_setup/requirements_JUWELS_additionals.txt
+++ b/HPC_setup/requirements_JUWELS_additionals.txt
@@ -9,6 +9,7 @@ chardet==4.0.0
 coverage==5.4
 cycler==0.10.0
 dask==2021.2.0
+dill==0.3.3
 fsspec==0.8.5
 gast==0.4.0
 grpcio==1.35.0
diff --git a/mlair/data_handler/abstract_data_handler.py b/mlair/data_handler/abstract_data_handler.py
index f085d18b..419db059 100644
--- a/mlair/data_handler/abstract_data_handler.py
+++ b/mlair/data_handler/abstract_data_handler.py
@@ -55,3 +55,6 @@ class AbstractDataHandler:
     def get_coordinates(self) -> Union[None, Dict]:
         """Return coordinates as dictionary with keys `lon` and `lat`."""
         return None
+
+    def _hash_list(self):
+        return []
diff --git a/mlair/data_handler/data_handler_kz_filter.py b/mlair/data_handler/data_handler_kz_filter.py
index face8f3c..1ff1a36f 100644
--- a/mlair/data_handler/data_handler_kz_filter.py
+++ b/mlair/data_handler/data_handler_kz_filter.py
@@ -22,6 +22,7 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
     """Data handler for a single station to be used by a superior data handler. Inputs are kz filtered."""
 
     _requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
+    _hash = DataHandlerSingleStation._hash + ["kz_filter_length", "kz_filter_iter", "filter_dim"]
 
     DEFAULT_FILTER_DIM = "filter"
 
diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index ebcfbb42..acb62df9 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -12,6 +12,10 @@ import inspect
 from typing import Callable
 import datetime as dt
 from typing import Any
+import os
+import dill
+import logging
+from functools import partial
 
 import numpy as np
 import pandas as pd
@@ -77,6 +81,12 @@ class DataHandlerMixedSamplingSingleStation(DataHandlerSingleStation):
         assert len(sampling) == 2
         return list(map(lambda x: super(__class__, self).setup_data_path(data_path, x), sampling))
 
+    def _extract_lazy(self, lazy_data):
+        _data, self.meta, _input_data, _target_data = lazy_data
+        f_prep = partial(self._slice_prep, start=self.start, end=self.end)
+        self._data = f_prep(_data[0]), f_prep(_data[1])
+        self.input_data, self.target_data = list(map(f_prep, [_input_data, _target_data]))
+
 
 class DataHandlerMixedSampling(DefaultDataHandler):
     """Data handler using mixed sampling for input and target."""
@@ -119,14 +129,24 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
         new_date = dt.datetime.strptime(date, "%Y-%m-%d") + dt.timedelta(hours=delta)
         return new_date.strftime("%Y-%m-%d")
 
-    def load_and_interpolate(self, ind) -> [xr.DataArray, pd.DataFrame]:
-
+    def update_start_end(self, ind):
         if ind == 0:  # for inputs
             estimated_filter_width = self.estimate_filter_width()
             start = self._add_time_delta(self.start, -estimated_filter_width)
             end = self._add_time_delta(self.end, estimated_filter_width)
         else:  # target
             start, end = self.start, self.end
+        return start, end
+
+    def load_and_interpolate(self, ind) -> [xr.DataArray, pd.DataFrame]:
+
+        start, end = self.update_start_end(ind)
+        # if ind == 0:  # for inputs
+        #     estimated_filter_width = self.estimate_filter_width()
+        #     start = self._add_time_delta(self.start, -estimated_filter_width)
+        #     end = self._add_time_delta(self.end, estimated_filter_width)
+        # else:  # target
+        #     start, end = self.start, self.end
 
         vars = [self.variables, self.target_var]
         stats_per_var = helpers.select_from_dict(self.statistics_per_var, vars[ind])
@@ -138,6 +158,16 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
                                 limit=self.interpolation_limit[ind])
         return data
 
+    def _create_lazy_data(self):
+        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
+
+    def _extract_lazy(self, lazy_data):
+        _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
+        start_inp, end_inp = self.update_start_end(0)
+        self._data = list(map(self._slice_prep, _data, [start_inp, self.start], [end_inp, self.end]))
+        self.input_data = self._slice_prep(_input_data, start_inp, end_inp)
+        self.target_data = self._slice_prep(_target_data, self.start, self.end)
+
 
 class DataHandlerMixedSamplingWithFilter(DefaultDataHandler):
     """Data handler using mixed sampling for input and target. Inputs are temporal filtered."""
@@ -158,6 +188,7 @@ class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFil
     """
 
     _requirements = DataHandlerMixedSamplingWithFilterSingleStation.requirements()
+    _hash = DataHandlerMixedSamplingWithFilterSingleStation._hash + ["time_delta"]
 
     def __init__(self, *args, time_delta=np.sqrt, **kwargs):
         assert isinstance(time_delta, Callable)
@@ -193,7 +224,7 @@ class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFil
         time_deltas = np.round(self.time_delta(self.cutoff_period)).astype(int)
         start, end = window, 1
         res = []
-        window_array = self.create_index_array(self.window_dim.range(start, end), squeeze_dim=self.target_dim)
+        window_array = self.create_index_array(self.window_dim, range(start, end), squeeze_dim=self.target_dim)
         for delta, filter_name in zip(np.append(time_deltas, 1), data.coords["filter"]):
             res_filter = []
             data_filter = data.sel({"filter": filter_name})
diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 820e601f..a8c6ea2e 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -5,10 +5,11 @@ __date__ = '2020-07-20'
 
 import copy
 import datetime as dt
+import dill
 import hashlib
 import logging
 import os
-from functools import reduce
+from functools import reduce, partial
 from typing import Union, List, Iterable, Tuple, Dict, Optional
 
 import numpy as np
@@ -46,6 +47,10 @@ class DataHandlerSingleStation(AbstractDataHandler):
     DEFAULT_INTERPOLATION_LIMIT = 0
     DEFAULT_INTERPOLATION_METHOD = "linear"
 
+    _hash = ["station", "statistics_per_var", "data_origin", "station_type", "network", "sampling", "target_dim",
+             "target_var", "time_dim", "iter_dim", "window_dim", "window_history_size", "window_history_offset",
+             "window_lead_time", "interpolation_limit", "interpolation_method"]
+
     def __init__(self, station, data_path, statistics_per_var, station_type=DEFAULT_STATION_TYPE,
                  network=DEFAULT_NETWORK, sampling: Union[str, Tuple[str]] = DEFAULT_SAMPLING,
                  target_dim=DEFAULT_TARGET_DIM, target_var=DEFAULT_TARGET_VAR, time_dim=DEFAULT_TIME_DIM,
@@ -101,7 +106,6 @@ class DataHandlerSingleStation(AbstractDataHandler):
         self.observation = None
 
         # create samples
-        # self.hash()
         self.setup_samples()
 
     def __str__(self):
@@ -223,11 +227,41 @@ class DataHandlerSingleStation(AbstractDataHandler):
         """
         Setup samples. This method prepares and creates samples X, and labels Y.
         """
-        self.make_input_target()
+        if self.lazy is False:
+            self.make_input_target()
+        else:
+            self.load_lazy()
+            self.store_lazy()
         if self.do_transformation is True:
             self.call_transform()
         self.make_samples()
 
+    def store_lazy(self):
+        hash = self._get_hash()
+        filename = os.path.join(self.lazy_path, hash + ".pickle")
+        if not os.path.exists(filename):
+            dill.dump(self._create_lazy_data(), file=open(filename, "wb"))
+
+    def _create_lazy_data(self):
+        return [self._data, self.meta, self.input_data, self.target_data]
+
+    def load_lazy(self):
+        hash = self._get_hash()
+        filename = os.path.join(self.lazy_path, hash + ".pickle")
+        try:
+            with open(filename, "rb") as pickle_file:
+                lazy_data = dill.load(pickle_file)
+            self._extract_lazy(lazy_data)
+            logging.info("<<<loaded lazy file")
+        except FileNotFoundError:
+            logging.info(">>>could not load lazy file")
+            self.make_input_target()
+
+    def _extract_lazy(self, lazy_data):
+        _data, self.meta, _input_data, _target_data = lazy_data
+        f_prep = partial(self._slice_prep, start=self.start, end=self.end)
+        self._data, self.input_data, self.target_data = list(map(f_prep, [_data, _input_data, _target_data]))
+
     def make_input_target(self):
         data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
                                          self.station_type, self.network, self.store_data_locally, self.data_origin,
@@ -669,16 +703,12 @@ class DataHandlerSingleStation(AbstractDataHandler):
         return self.transform(data, dim=dim, opts=self._transformation[pos], inverse=inverse,
                               transformation_dim=self.target_dim)
 
+    def _hash_list(self):
+        return sorted(list(set(self._hash)))
+
     def _get_hash(self):
-        hash_list = [self.station, self.statistics_per_var, self.data_origin, self.station_type, self.network,
-                     self.sampling, self.target_dim, self.target_var, self.time_dim, self.iter_dim, self.window_dim,
-                     self.window_history_size, self.window_history_offset, self.window_lead_time,
-                     self.interpolation_limit, self.interpolation_method, self.min_length, self.start, self.end]
-
-        hash = "".join([str(e) for e in hash_list]).encode("utf-8")
-        m = hashlib.sha256()
-        m.update(hash)
-        return m.hexdigest()
+        hash = "".join([str(self.__getattribute__(e)) for e in self._hash_list()]).encode()
+        return hashlib.md5(hash).hexdigest()
 
 
 if __name__ == "__main__":
diff --git a/requirements.txt b/requirements.txt
index b0a6e7f5..af742fde 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,6 +9,7 @@ chardet==4.0.0
 coverage==5.4
 cycler==0.10.0
 dask==2021.2.0
+dill==0.3.3
 fsspec==0.8.5
 gast==0.4.0
 grpcio==1.35.0
diff --git a/requirements_gpu.txt b/requirements_gpu.txt
index 35fe0d5e..7dd443a4 100644
--- a/requirements_gpu.txt
+++ b/requirements_gpu.txt
@@ -9,6 +9,7 @@ chardet==4.0.0
 coverage==5.4
 cycler==0.10.0
 dask==2021.2.0
+dill==0.3.3
 fsspec==0.8.5
 gast==0.4.0
 grpcio==1.35.0
diff --git a/test/test_data_handler/test_data_handler_mixed_sampling.py b/test/test_data_handler/test_data_handler_mixed_sampling.py
index d2f9ce00..2a6553b7 100644
--- a/test/test_data_handler/test_data_handler_mixed_sampling.py
+++ b/test/test_data_handler/test_data_handler_mixed_sampling.py
@@ -37,7 +37,7 @@ class TestDataHandlerMixedSamplingSingleStation:
         req = object.__new__(DataHandlerSingleStation)
         assert sorted(obj._requirements) == sorted(remove_items(req.requirements(), "station"))
 
-    @mock.patch("mlair.data_handler.data_handler_mixed_sampling.DataHandlerMixedSamplingSingleStation.setup_samples")
+    @mock.patch("mlair.data_handler.data_handler_single_station.DataHandlerSingleStation.setup_samples")
     def test_init(self, mock_super_init):
         obj = DataHandlerMixedSamplingSingleStation("first_arg", "second", {}, test=23, sampling="hourly",
                                                     interpolation_limit=(1, 10))
-- 
GitLab


From b5f3f9ec817085473a0f65c30d22c75ff2fc30f8 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 17 Mar 2021 15:58:38 +0100
Subject: [PATCH 049/175] renamed lazy_loading to lazy_preprocessing

---
 mlair/data_handler/data_handler_kz_filter.py      | 9 +++++++++
 mlair/data_handler/data_handler_mixed_sampling.py | 6 ------
 mlair/data_handler/data_handler_single_station.py | 6 ++----
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/mlair/data_handler/data_handler_kz_filter.py b/mlair/data_handler/data_handler_kz_filter.py
index 1ff1a36f..1f2c63e5 100644
--- a/mlair/data_handler/data_handler_kz_filter.py
+++ b/mlair/data_handler/data_handler_kz_filter.py
@@ -8,6 +8,7 @@ import numpy as np
 import pandas as pd
 import xarray as xr
 from typing import List, Union
+from functools import partial
 
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
 from mlair.data_handler import DefaultDataHandler
@@ -83,6 +84,14 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
         return self.history.transpose(self.time_dim, self.window_dim, self.iter_dim, self.target_dim,
                                       self.filter_dim).copy()
 
+    def _create_lazy_data(self):
+        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
+
+    def _extract_lazy(self, lazy_data):
+        _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
+        f_prep = partial(self._slice_prep, start=self.start, end=self.end)
+        self._data, self.input_data, self.target_data = list(map(f_prep, [_data, _input_data, _target_data]))
+
 
 class DataHandlerKzFilter(DefaultDataHandler):
     """Data handler using kz filtered data."""
diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index acb62df9..b359a26d 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -12,9 +12,6 @@ import inspect
 from typing import Callable
 import datetime as dt
 from typing import Any
-import os
-import dill
-import logging
 from functools import partial
 
 import numpy as np
@@ -158,9 +155,6 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
                                 limit=self.interpolation_limit[ind])
         return data
 
-    def _create_lazy_data(self):
-        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
-
     def _extract_lazy(self, lazy_data):
         _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
         start_inp, end_inp = self.update_start_end(0)
diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index a8c6ea2e..0497bee0 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -61,11 +61,11 @@ class DataHandlerSingleStation(AbstractDataHandler):
                  interpolation_method: Union[str, Tuple[str]] = DEFAULT_INTERPOLATION_METHOD,
                  overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True,
                  min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None,
-                 lazy_loading: bool = False, **kwargs):
+                 lazy_preprocessing: bool = False, **kwargs):
         super().__init__()
         self.station = helpers.to_list(station)
         self.path = self.setup_data_path(data_path, sampling)
-        self.lazy = lazy_loading
+        self.lazy = lazy_preprocessing
         self.lazy_path = None
         if self.lazy is True:
             self.lazy_path = os.path.join(data_path, "lazy_data", self.__class__.__name__)
@@ -252,9 +252,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
             with open(filename, "rb") as pickle_file:
                 lazy_data = dill.load(pickle_file)
             self._extract_lazy(lazy_data)
-            logging.info("<<<loaded lazy file")
         except FileNotFoundError:
-            logging.info(">>>could not load lazy file")
             self.make_input_target()
 
     def _extract_lazy(self, lazy_data):
-- 
GitLab


From f9c10fe3065a696dcdca91ac8193afe8640b53ad Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 17 Mar 2021 16:06:24 +0100
Subject: [PATCH 050/175] replace all pickle calls with dill calls

---
 mlair/data_handler/default_data_handler.py | 5 +++--
 mlair/data_handler/iterator.py             | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index ddf276cf..07a866ae 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -8,6 +8,7 @@ import gc
 import logging
 import os
 import pickle
+import dill
 import shutil
 from functools import reduce
 from typing import Tuple, Union, List
@@ -86,7 +87,7 @@ class DefaultDataHandler(AbstractDataHandler):
             data = {"X": self._X, "Y": self._Y, "X_extreme": self._X_extreme, "Y_extreme": self._Y_extreme}
             data = self._force_dask_computation(data)
             with open(self._save_file, "wb") as f:
-                pickle.dump(data, f)
+                dill.dump(data, f)
             logging.debug(f"save pickle data to {self._save_file}")
             self._reset_data()
 
@@ -101,7 +102,7 @@ class DefaultDataHandler(AbstractDataHandler):
     def _load(self):
         try:
             with open(self._save_file, "rb") as f:
-                data = pickle.load(f)
+                data = dill.load(f)
             logging.debug(f"load pickle data from {self._save_file}")
             self._X, self._Y = data["X"], data["Y"]
             self._X_extreme, self._Y_extreme = data["X_extreme"], data["Y_extreme"]
diff --git a/mlair/data_handler/iterator.py b/mlair/data_handler/iterator.py
index 30c45417..564bf3bf 100644
--- a/mlair/data_handler/iterator.py
+++ b/mlair/data_handler/iterator.py
@@ -9,6 +9,7 @@ import math
 import os
 import shutil
 import pickle
+import dill
 from typing import Tuple, List
 
 
@@ -109,7 +110,7 @@ class KerasIterator(keras.utils.Sequence):
         """Load pickle data from disk."""
         file = self._path % index
         with open(file, "rb") as f:
-            data = pickle.load(f)
+            data = dill.load(f)
         return data["X"], data["Y"]
 
     @staticmethod
@@ -167,7 +168,7 @@ class KerasIterator(keras.utils.Sequence):
         data = {"X": X, "Y": Y}
         file = self._path % index
         with open(file, "wb") as f:
-            pickle.dump(data, f)
+            dill.dump(data, f)
 
     def _get_number_of_mini_batches(self, number_of_samples: int) -> int:
         """Return number of mini batches as the floored ration of number of samples to batch size."""
-- 
GitLab


From 32f3ff2203d47fea8ad1c2df7328506a7e5cd058 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 17 Mar 2021 18:55:22 +0100
Subject: [PATCH 051/175] data handlers with filters will create negative
 values, which is incompatible with log transformation. standardization will
 be used in this cases

---
 mlair/data_handler/data_handler_kz_filter.py      | 15 ++++++++++++++-
 mlair/data_handler/data_handler_mixed_sampling.py |  2 +-
 mlair/data_handler/data_handler_single_station.py |  5 +++--
 mlair/data_handler/default_data_handler.py        |  4 +++-
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/mlair/data_handler/data_handler_kz_filter.py b/mlair/data_handler/data_handler_kz_filter.py
index 1f2c63e5..539712b3 100644
--- a/mlair/data_handler/data_handler_kz_filter.py
+++ b/mlair/data_handler/data_handler_kz_filter.py
@@ -7,7 +7,7 @@ import inspect
 import numpy as np
 import pandas as pd
 import xarray as xr
-from typing import List, Union
+from typing import List, Union, Tuple, Optional
 from functools import partial
 
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
@@ -37,6 +37,19 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
         self.cutoff_period_days = None
         super().__init__(*args, **kwargs)
 
+    def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
+        """
+        Adjust setup of transformation because kfz filtered data will have negative values which is not compatible with
+        the log transformation. Therefore, replace all log transformation methods by a default standardization. This is
+        only applied on input side.
+        """
+        transformation = super(__class__, self).setup_transformation(transformation)
+        if transformation[0] is not None:
+            for k, v in transformation[0].items():
+                if v["method"] == "log":
+                    transformation[0][k]["method"] = "standardise"
+        return transformation
+
     def _check_sampling(self, **kwargs):
         assert kwargs.get("sampling") == "hourly"  # This data handler requires hourly data resolution
 
diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 86e6f856..75e9e645 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -158,7 +158,7 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
     def _extract_lazy(self, lazy_data):
         _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
         start_inp, end_inp = self.update_start_end(0)
-        self._data = list(map(self._slice_prep, _data, [start_inp, self.start], [end_inp, self.end]))
+        self._data = list(map(lambda x: self._slice_prep(_data[x], *self.update_start_end(x)), [0, 1]))
         self.input_data = self._slice_prep(_input_data, start_inp, end_inp)
         self.target_data = self._slice_prep(_target_data, self.start, self.end)
 
diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 0497bee0..19ff6fa1 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -252,7 +252,9 @@ class DataHandlerSingleStation(AbstractDataHandler):
             with open(filename, "rb") as pickle_file:
                 lazy_data = dill.load(pickle_file)
             self._extract_lazy(lazy_data)
+            logging.info(f"{self.station}: used lazy data")
         except FileNotFoundError:
+            logging.info(f"{self.station}: could not use lazy data")
             self.make_input_target()
 
     def _extract_lazy(self, lazy_data):
@@ -594,8 +596,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
         """
         return data.loc[{coord: slice(str(start), str(end))}]
 
-    @staticmethod
-    def setup_transformation(transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
+    def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
         """
         Set up transformation by extracting all relevant information.
 
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 07a866ae..5eb6fd02 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -273,7 +273,9 @@ class DefaultDataHandler(AbstractDataHandler):
                         if var not in transformation_dict[i].keys():
                             transformation_dict[i][var] = {}
                         opts = transformation[var]
-                        assert transformation_dict[i][var].get("method", opts["method"]) == opts["method"]
+                        if not transformation_dict[i][var].get("method", opts["method"]) == opts["method"]:
+                            # data handlers with filters are allowed to change transformation method to standardise
+                            assert hasattr(dh, "filter_dim") and opts["method"] == "standardise"
                         transformation_dict[i][var]["method"] = opts["method"]
                         for k in ["mean", "std", "min", "max"]:
                             old = transformation_dict[i][var].get(k, None)
-- 
GitLab


From a72c04dc95e4532d5217914d494453544aeeb7f7 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 17 Mar 2021 19:51:43 +0100
Subject: [PATCH 052/175] corrected get statement, plot still not working

---
 mlair/run_modules/post_processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 3b9b5634..73aebb00 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -306,7 +306,7 @@ class PostProcessing(RunEnvironment):
         try:
             if ("filter" in self.test_data[0].get_X(as_numpy=False)[0].coords) and (
                     "PlotSeparationOfScales" in plot_list):
-                filter_dim = self.data_store.get("filter_dim", None)
+                filter_dim = self.data_store.get_default("filter_dim", None)
                 PlotSeparationOfScales(self.test_data, plot_folder=self.plot_path, time_dim=time_dim,
                                        window_dim=window_dim, target_dim=target_dim, **{"filter_dim": filter_dim})
         except Exception as e:
-- 
GitLab


From 145b3d7dce710d583a9ffe7078630b681d6f0e64 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 18 Mar 2021 09:43:51 +0100
Subject: [PATCH 053/175] load lazy now just drops a debug message, /close #290

---
 mlair/data_handler/data_handler_single_station.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 19ff6fa1..0c83e625 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -252,9 +252,9 @@ class DataHandlerSingleStation(AbstractDataHandler):
             with open(filename, "rb") as pickle_file:
                 lazy_data = dill.load(pickle_file)
             self._extract_lazy(lazy_data)
-            logging.info(f"{self.station}: used lazy data")
+            logging.debug(f"{self.station[0]}: used lazy data")
         except FileNotFoundError:
-            logging.info(f"{self.station}: could not use lazy data")
+            logging.debug(f"{self.station[0]}: could not use lazy data")
             self.make_input_target()
 
     def _extract_lazy(self, lazy_data):
-- 
GitLab


From 3f2b9806feec3d40a92f07ea9e980450a8e21ca6 Mon Sep 17 00:00:00 2001
From: lukas leufen <l.leufen@fz-juelich.de>
Date: Thu, 18 Mar 2021 10:20:14 +0000
Subject: [PATCH 054/175] include new logging

---
 mlair/run_modules/pre_processing.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 813873b8..7f3ce51e 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -9,6 +9,7 @@ from typing import Tuple
 import multiprocessing
 import requests
 import psutil
+import traceback
 
 import numpy as np
 import pandas as pd
@@ -336,6 +337,7 @@ def f_proc(data_handler, station, name_affix, store, **kwargs):
     try:
         res = data_handler.build(station, name_affix=name_affix, store_processed_data=store, **kwargs)
     except (AttributeError, EmptyQueryResult, KeyError, requests.ConnectionError, ValueError) as e:
-        logging.info(f"remove station {station} because it raised an error: {e}")
+        formatted_lines = traceback.format_exc().splitlines()
+        logging.info(f"remove station {station} because it raised an error: {e} (from {' | '.join(formatted_lines[-3:])}")
         res = None
     return res, station
-- 
GitLab


From d1d75c5f4f4dc2facc88b5b30c7b8dbe3167e606 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 18 Mar 2021 15:10:54 +0100
Subject: [PATCH 055/175] different logging message

---
 mlair/run_modules/pre_processing.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 7f3ce51e..4d90f87b 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -5,11 +5,11 @@ __date__ = '2019-11-25'
 
 import logging
 import os
+import traceback
 from typing import Tuple
 import multiprocessing
 import requests
 import psutil
-import traceback
 
 import numpy as np
 import pandas as pd
@@ -338,6 +338,14 @@ def f_proc(data_handler, station, name_affix, store, **kwargs):
         res = data_handler.build(station, name_affix=name_affix, store_processed_data=store, **kwargs)
     except (AttributeError, EmptyQueryResult, KeyError, requests.ConnectionError, ValueError) as e:
         formatted_lines = traceback.format_exc().splitlines()
-        logging.info(f"remove station {station} because it raised an error: {e} (from {' | '.join(formatted_lines[-3:])}")
+        logging.info(
+            f"remove station {station} because it raised an error: {e} -> {' | '.join(f_inspect_error(formatted_lines))}")
         res = None
     return res, station
+
+
+def f_inspect_error(formatted):
+    for i in range(len(formatted) - 1, -1, -1):
+        if "mlair/mlair" not in formatted[i]:
+            return formatted[i - 3:i]
+    return formatted[-3:0]
-- 
GitLab


From 0f53dffaa0b7c8eb08fb7040b1375423732cb883 Mon Sep 17 00:00:00 2001
From: Felix Kleinert <f.kleinert@fz-juelich.de>
Date: Tue, 23 Mar 2021 17:07:17 +0100
Subject: [PATCH 056/175] update transform methods to proplery work when
 external transformation parameetrs are preovided

---
 mlair/data_handler/data_handler_single_station.py | 12 +++++++++++-
 mlair/data_handler/default_data_handler.py        | 12 ++++++++----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 0c83e625..e9db27a9 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -195,7 +195,17 @@ class DataHandlerSingleStation(AbstractDataHandler):
             else:
                 raise NotImplementedError
 
-        def f_apply(data, method, mean=None, std=None, min=None, max=None):
+        def f_apply(data, method, **kwargs):
+            for k, v in kwargs.items():
+                if not (isinstance(v, xr.DataArray) or v is None):
+                    _, opts = statistics.min_max(data, dim)
+                    helper = xr.ones_like(opts['min'])
+                    kwargs[k] = helper * v
+            mean = kwargs.pop('mean', None)
+            std = kwargs.pop('std', None)
+            min = kwargs.pop('min', None)
+            max = kwargs.pop('max', None)
+
             if method == "standardise":
                 return statistics.standardise_apply(data, mean, std), {"mean": mean, "std": std, "method": method}
             elif method == "centre":
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 5eb6fd02..2eceff32 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -241,6 +241,8 @@ class DefaultDataHandler(AbstractDataHandler):
 
         * standardise (default, if method is not given)
         * centre
+        * min_max
+        * log
 
         ### mean and std estimation
 
@@ -256,14 +258,16 @@ class DefaultDataHandler(AbstractDataHandler):
 
         If mean and std are not None, the default data handler expects this parameters to match the data and applies
         this values to the data. Make sure that all dimensions and/or coordinates are in agreement.
+
+        ### min and max given
+        If min and max are not None, the default data handler expects this parameters to match the data and applies
+        this values to the data. Make sure that all dimensions and/or coordinates are in agreement.
         """
 
         sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs}
-        transformation_dict = sp_keys.get("transformation", None)
-        if transformation_dict is None:
+        if "transformation" not in sp_keys.keys():
             return
-        if isinstance(transformation_dict, dict):  # tuple for (input, target) transformation
-            transformation_dict = copy.deepcopy(transformation_dict), copy.deepcopy(transformation_dict)
+        transformation_dict = ({}, {})
 
         def _inner():
             """Inner method that is performed in both serial and parallel approach."""
-- 
GitLab


From 01dc6fb2e6c26bbb03ee2c3d1827c77f24109743 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 26 Mar 2021 12:45:38 +0100
Subject: [PATCH 057/175] log more model information during model setup stage

---
 HPC_setup/requirements_HDFML_additionals.txt  |  1 +
 HPC_setup/requirements_JUWELS_additionals.txt |  1 +
 mlair/run_modules/model_setup.py              | 26 ++++++++++++-------
 requirements.txt                              |  1 +
 requirements_gpu.txt                          |  1 +
 5 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/HPC_setup/requirements_HDFML_additionals.txt b/HPC_setup/requirements_HDFML_additionals.txt
index 12e09ccd..7d6163a6 100644
--- a/HPC_setup/requirements_HDFML_additionals.txt
+++ b/HPC_setup/requirements_HDFML_additionals.txt
@@ -9,6 +9,7 @@ chardet==4.0.0
 coverage==5.4
 cycler==0.10.0
 dask==2021.2.0
+dill==0.3.3
 fsspec==0.8.5
 gast==0.4.0
 grpcio==1.35.0
diff --git a/HPC_setup/requirements_JUWELS_additionals.txt b/HPC_setup/requirements_JUWELS_additionals.txt
index 12e09ccd..7d6163a6 100644
--- a/HPC_setup/requirements_JUWELS_additionals.txt
+++ b/HPC_setup/requirements_JUWELS_additionals.txt
@@ -9,6 +9,7 @@ chardet==4.0.0
 coverage==5.4
 cycler==0.10.0
 dask==2021.2.0
+dill==0.3.3
 fsspec==0.8.5
 gast==0.4.0
 grpcio==1.35.0
diff --git a/mlair/run_modules/model_setup.py b/mlair/run_modules/model_setup.py
index 5dd73d50..8fae430f 100644
--- a/mlair/run_modules/model_setup.py
+++ b/mlair/run_modules/model_setup.py
@@ -6,6 +6,7 @@ __date__ = '2019-12-02'
 import logging
 import os
 import re
+from dill.source import getsource
 
 import keras
 import pandas as pd
@@ -57,12 +58,12 @@ class ModelSetup(RunEnvironment):
         super().__init__()
         self.model = None
         exp_name = self.data_store.get("experiment_name")
-        path = self.data_store.get("model_path")
+        self.path = self.data_store.get("model_path")
         self.scope = "model"
-        self.path = os.path.join(path, f"{exp_name}_%s")
-        self.model_name = self.path % "%s.h5"
-        self.checkpoint_name = self.path % "model-best.h5"
-        self.callbacks_name = self.path % "model-best-callbacks-%s.pickle"
+        path = os.path.join(self.path, f"{exp_name}_%s")
+        self.model_name = path % "%s.h5"
+        self.checkpoint_name = path % "model-best.h5"
+        self.callbacks_name = path % "model-best-callbacks-%s.pickle"
         self._train_model = self.data_store.get("train_model")
         self._create_new_model = self.data_store.get("create_new_model")
         self._run()
@@ -167,6 +168,7 @@ class ModelSetup(RunEnvironment):
             keras.utils.plot_model(self.model, to_file=file_name, show_shapes=True, show_layer_names=True)
 
     def report_model(self):
+        # report model settings
         model_settings = self.model.get_settings()
         model_settings.update(self.model.compile_options)
         model_settings.update(self.model.optimizer.get_config())
@@ -179,17 +181,23 @@ class ModelSetup(RunEnvironment):
             if "<" in str(v):
                 v = self._clean_name(str(v))
             df.loc[k] = str(v)
+        df.loc["count params"] = str(self.model.count_params())
         df.sort_index(inplace=True)
         column_format = "ll"
         path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
         path_config.check_path_and_create(path)
-        df.to_latex(os.path.join(path, "model_settings.tex"), na_rep='---', column_format=column_format)
-        df.to_markdown(open(os.path.join(path, "model_settings.md"), mode="w", encoding='utf-8'),
-                       tablefmt="github")
+        for p in [path, self.path]:  # log to `latex_report` and `model`
+            df.to_latex(os.path.join(p, "model_settings.tex"), na_rep='---', column_format=column_format)
+            df.to_markdown(open(os.path.join(p, "model_settings.md"), mode="w", encoding='utf-8'), tablefmt="github")
+        # report model summary to file
+        with open(os.path.join(self.path, "model_summary.txt"), "w") as fh:
+            self.model.summary(print_fn=lambda x: fh.write(x + "\n"))
+        # print model code to file
+        with open(os.path.join(self.path, "model_code.txt"), "w") as fh:
+            fh.write(getsource(self.data_store.get("model_class")))
 
     @staticmethod
     def _clean_name(orig_name: str):
         mod_name = re.sub(r'^{0}'.format(re.escape("<")), '', orig_name).replace("'", "").split(" ")
         mod_name = mod_name[1] if any(map(lambda x: x in mod_name[0], ["class", "function", "method"])) else mod_name[0]
         return mod_name[:-1] if mod_name[-1] == ">" else mod_name
-
diff --git a/requirements.txt b/requirements.txt
index b0a6e7f5..af742fde 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,6 +9,7 @@ chardet==4.0.0
 coverage==5.4
 cycler==0.10.0
 dask==2021.2.0
+dill==0.3.3
 fsspec==0.8.5
 gast==0.4.0
 grpcio==1.35.0
diff --git a/requirements_gpu.txt b/requirements_gpu.txt
index 35fe0d5e..7dd443a4 100644
--- a/requirements_gpu.txt
+++ b/requirements_gpu.txt
@@ -9,6 +9,7 @@ chardet==4.0.0
 coverage==5.4
 cycler==0.10.0
 dask==2021.2.0
+dill==0.3.3
 fsspec==0.8.5
 gast==0.4.0
 grpcio==1.35.0
-- 
GitLab


From ef3ced197933ea2e7c022b96f41254cbfbdd09e0 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 29 Mar 2021 12:08:51 +0200
Subject: [PATCH 058/175] added dropout to CNN

---
 mlair/model_modules/convolutional_networks.py | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/mlair/model_modules/convolutional_networks.py b/mlair/model_modules/convolutional_networks.py
index c4a10990..d4955d3d 100644
--- a/mlair/model_modules/convolutional_networks.py
+++ b/mlair/model_modules/convolutional_networks.py
@@ -12,17 +12,22 @@ import keras
 
 
 class CNN(AbstractModelClass):
+
     _activation = {"relu": keras.layers.ReLU, "tanh": partial(keras.layers.Activation, "tanh"),
                    "sigmoid": partial(keras.layers.Activation, "sigmoid"),
                    "linear": partial(keras.layers.Activation, "linear"),
-                   "selu": partial(keras.layers.Activation, "selu")}
-    _initializer = {"selu": keras.initializers.lecun_normal()}
-    _optimizer = {"adam": keras.optimizers.adam}
+                   "selu": partial(keras.layers.Activation, "selu"),
+                   "prelu": partial(keras.layers.PReLU, alpha_initializer=keras.initializers.constant(value=0.25))}
+    _initializer = {"tanh": "glorot_uniform", "sigmoid": "glorot_uniform", "linear": "glorot_uniform",
+                    "relu": keras.initializers.he_normal(), "selu": keras.initializers.lecun_normal(),
+                    "prelu": keras.initializers.he_normal()}
+    _optimizer = {"adam": keras.optimizers.adam, "sgd": keras.optimizers.SGD}
     _regularizer = {"l1": keras.regularizers.l1, "l2": keras.regularizers.l2, "l1_l2": keras.regularizers.l1_l2}
-    _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad"]
+    _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov", "l1", "l2"]
+    _dropout = {"selu": keras.layers.AlphaDropout}
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
-                 optimizer="adam", regularizer=None, kernel_size=1, **kwargs):
+                 optimizer="adam", regularizer=None, kernel_size=1, dropout=None, **kwargs):
 
         assert len(input_shape) == 1
         assert len(output_shape) == 1
@@ -37,6 +42,7 @@ class CNN(AbstractModelClass):
         self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
         self.kernel_size = kernel_size
         self.optimizer = self._set_optimizer(optimizer, **kwargs)
+        self.dropout, self.dropout_rate = self._set_dropout(activation, dropout)
 
         # apply to model
         self.set_model()
@@ -56,6 +62,8 @@ class CNN(AbstractModelClass):
             opt_kwargs = {}
             if opt_name == "adam":
                 opt_kwargs = select_from_dict(kwargs, ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad"])
+            elif opt_name == "sgd":
+                opt_kwargs = select_from_dict(kwargs, ["lr", "momentum", "decay", "nesterov"])
             return opt(**opt_kwargs)
         except KeyError:
             raise AttributeError(f"Given optimizer {optimizer} is not supported in this model class.")
@@ -77,6 +85,12 @@ class CNN(AbstractModelClass):
         except KeyError:
             raise AttributeError(f"Given regularizer {regularizer} is not supported in this model class.")
 
+    def _set_dropout(self, activation, dropout_rate):
+        if dropout_rate is None:
+            return None, None
+        assert 0 <= dropout_rate < 1
+        return self._dropout.get(activation, keras.layers.Dropout), dropout_rate
+
     def set_model(self):
         """
         Build the model.
-- 
GitLab


From 1ea8b24f2b5af03d7d08a5c6f92738cd8f69135c Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 29 Mar 2021 12:28:15 +0200
Subject: [PATCH 059/175] join module now uses a retry strategy, /close #296 on
 test success

---
 mlair/helpers/join.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/mlair/helpers/join.py b/mlair/helpers/join.py
index 8a8ca0b8..e0b28660 100644
--- a/mlair/helpers/join.py
+++ b/mlair/helpers/join.py
@@ -8,6 +8,8 @@ from typing import Iterator, Union, List, Dict
 
 import pandas as pd
 import requests
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.retry import Retry
 
 from mlair import helpers
 from mlair.configuration.join_settings import join_settings
@@ -129,13 +131,24 @@ def get_data(opts: Dict, headers: Dict) -> Union[Dict, List]:
     :return: requested data (either as list or dictionary)
     """
     url = create_url(**opts)
-    response = requests.get(url, headers=headers)
+    response = retries_session().get(url, headers=headers)
     if response.status_code == 200:
         return response.json()
     else:
         raise EmptyQueryResult(f"There was an error (STATUS {response.status_code}) for request {url}")
 
 
+def retries_session(max_retries=5):
+    retry_strategy = Retry(total=max_retries,
+                           status_forcelist=[429, 500, 502, 503, 504],
+                           method_whitelist=["HEAD", "GET", "OPTIONS"])
+    adapter = HTTPAdapter(max_retries=retry_strategy)
+    http = requests.Session()
+    http.mount("https://", adapter)
+    http.mount("http://", adapter)
+    return http
+
+
 def load_series_information(station_name: List[str], station_type: str_or_none, network_name: str_or_none,
                             join_url_base: str, headers: Dict, data_origin: Dict = None) -> [Dict, Dict]:
     """
-- 
GitLab


From 4dae57e381de56f823625f4b48269de6cdbe8f28 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 29 Mar 2021 14:31:08 +0200
Subject: [PATCH 060/175] use dropout in CNNs

---
 mlair/model_modules/convolutional_networks.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mlair/model_modules/convolutional_networks.py b/mlair/model_modules/convolutional_networks.py
index d4955d3d..624cfa09 100644
--- a/mlair/model_modules/convolutional_networks.py
+++ b/mlair/model_modules/convolutional_networks.py
@@ -104,11 +104,15 @@ class CNN(AbstractModelClass):
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
         x_in = self.activation()(x_in)
+        if self.dropout is not None:
+            x_in = self.dropout(self.dropout_rate)(x_in)
         x_in = keras.layers.MaxPooling2D((25, 1), strides=(1, 1), padding='valid')(x_in)
         x_in = keras.layers.Conv2D(filters=64, kernel_size=(13, 1),
                                    kernel_initializer=self.kernel_initializer,
                                    kernel_regularizer=self.kernel_regularizer)(x_in)
         x_in = self.activation()(x_in)
+        if self.dropout is not None:
+            x_in = self.dropout(self.dropout_rate)(x_in)
         x_in = keras.layers.Flatten()(x_in)
         x_in = keras.layers.Dense(128, kernel_initializer=self.kernel_initializer,
                                   kernel_regularizer=self.kernel_regularizer)(x_in)
-- 
GitLab


From 4a1d7679686026da0755dbdf6d2f492a57f13c52 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 29 Mar 2021 14:57:11 +0200
Subject: [PATCH 061/175] fix for #296 to reduce waiting if not internet
 connection could be established

---
 mlair/helpers/join.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlair/helpers/join.py b/mlair/helpers/join.py
index e0b28660..93cb0e7b 100644
--- a/mlair/helpers/join.py
+++ b/mlair/helpers/join.py
@@ -131,15 +131,16 @@ def get_data(opts: Dict, headers: Dict) -> Union[Dict, List]:
     :return: requested data (either as list or dictionary)
     """
     url = create_url(**opts)
-    response = retries_session().get(url, headers=headers)
+    response = retries_session().get(url, headers=headers, timeout=(5, None))  # timeout=(open, read)
     if response.status_code == 200:
         return response.json()
     else:
         raise EmptyQueryResult(f"There was an error (STATUS {response.status_code}) for request {url}")
 
 
-def retries_session(max_retries=5):
+def retries_session(max_retries=3):
     retry_strategy = Retry(total=max_retries,
+                           backoff_factor=0.1,
                            status_forcelist=[429, 500, 502, 503, 504],
                            method_whitelist=["HEAD", "GET", "OPTIONS"])
     adapter = HTTPAdapter(max_retries=retry_strategy)
-- 
GitLab


From 9806b13636a70fbb464581c496f24e30177ad703 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 30 Mar 2021 14:57:29 +0200
Subject: [PATCH 062/175] default data handler and preprocessing support
 parameter use_multiprocessing. Parameter is set to False on DEBUG always.
 /close #297 on pipeline success

---
 mlair/configuration/defaults.py            |  1 +
 mlair/data_handler/default_data_handler.py |  6 ++++--
 mlair/run_modules/experiment_setup.py      | 13 +++++++++++--
 mlair/run_modules/pre_processing.py        |  3 ++-
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py
index 04e441fe..8805acfc 100644
--- a/mlair/configuration/defaults.py
+++ b/mlair/configuration/defaults.py
@@ -53,6 +53,7 @@ DEFAULT_SAMPLING = "daily"
 DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA", "press": "REA", "relhum": "REA",
                        "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "",
                        "pm10": "", "so2": ""}
+DEFAULT_USE_MULTIPROCESSING = True
 
 
 def get_defaults():
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 2eceff32..87fc83b0 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -39,7 +39,8 @@ class DefaultDataHandler(AbstractDataHandler):
 
     def __init__(self, id_class: data_handler, experiment_path: str, min_length: int = 0,
                  extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None,
-                 store_processed_data=True, iter_dim=DEFAULT_ITER_DIM, time_dim=DEFAULT_TIME_DIM):
+                 store_processed_data=True, iter_dim=DEFAULT_ITER_DIM, time_dim=DEFAULT_TIME_DIM,
+                 use_multiprocessing=True):
         super().__init__()
         self.id_class = id_class
         self.time_dim = time_dim
@@ -49,6 +50,7 @@ class DefaultDataHandler(AbstractDataHandler):
         self._Y = None
         self._X_extreme = None
         self._Y_extreme = None
+        self._use_multiprocessing = use_multiprocessing
         _name_affix = str(f"{str(self.id_class)}_{name_affix}" if name_affix is not None else id(self))
         self._save_file = os.path.join(experiment_path, "data", f"{_name_affix}.pickle")
         self._collection = self._create_collection()
@@ -286,7 +288,7 @@ class DefaultDataHandler(AbstractDataHandler):
                             new = opts.get(k)
                             transformation_dict[i][var][k] = new if old is None else old.combine_first(new)
 
-        if multiprocessing.cpu_count() > 1:  # parallel solution
+        if multiprocessing.cpu_count() > 1 and kwargs.get("use_multiprocessing", True) is True:  # parallel solution
             logging.info("use parallel transformation approach")
             pool = multiprocessing.Pool(
                 min([psutil.cpu_count(logical=False), len(set_stations), 16]))  # use only physical cpus
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index 30672ecc..f51cee8a 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -4,6 +4,7 @@ __date__ = '2019-11-15'
 import argparse
 import logging
 import os
+import sys
 from typing import Union, Dict, Any, List, Callable
 
 from mlair.configuration import path_config
@@ -17,7 +18,8 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
     DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \
     DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
     DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
-    DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM
+    DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \
+    DEFAULT_USE_MULTIPROCESSING
 from mlair.data_handler import DefaultDataHandler
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel
@@ -228,7 +230,8 @@ class ExperimentSetup(RunEnvironment):
                  number_of_bootstraps=None,
                  create_new_bootstraps=None, data_path: str = None, batch_path: str = None, login_nodes=None,
                  hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None,
-                 data_origin: Dict = None, competitors: list = None, competitor_path: str = None, **kwargs):
+                 data_origin: Dict = None, competitors: list = None, competitor_path: str = None,
+                 use_multiprocessing: bool = None, **kwargs):
 
         # create run framework
         super().__init__()
@@ -265,6 +268,12 @@ class ExperimentSetup(RunEnvironment):
         logging.info(f"Experiment path is: {experiment_path}")
         path_config.check_path_and_create(self.data_store.get("experiment_path"))
 
+        # host system setup
+        debug_mode = sys.gettrace() is not None
+        self._set_param("debug_mode", debug_mode)
+        use_multiprocessing = False if debug_mode is True else use_multiprocessing
+        self._set_param("use_multiprocessing", use_multiprocessing, default=DEFAULT_USE_MULTIPROCESSING)
+
         # batch path (temporary)
         self._set_param("batch_path", batch_path, default=os.path.join(experiment_path, "batch_data"))
 
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 813873b8..f59a4e89 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -241,8 +241,9 @@ class PreProcessing(RunEnvironment):
         collection = DataCollection(name=set_name)
         valid_stations = []
         kwargs = self.data_store.create_args_dict(data_handler.requirements(), scope=set_name)
+        use_multiprocessing = self.data_store.get("use_multiprocessing")
 
-        if multiprocessing.cpu_count() > 1:  # parallel solution
+        if multiprocessing.cpu_count() > 1 and use_multiprocessing:  # parallel solution
             logging.info("use parallel validate station approach")
             pool = multiprocessing.Pool(
                 min([psutil.cpu_count(logical=False), len(set_stations), 16]))  # use only physical cpus
-- 
GitLab


From a1815b4921c20f9fa885c8900cdf937a17704ae5 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 31 Mar 2021 11:11:27 +0200
Subject: [PATCH 063/175] updated docs

---
 docs/_source/defaults.rst             |  2 +
 mlair/run_modules/experiment_setup.py | 72 +++++++++++----------------
 2 files changed, 30 insertions(+), 44 deletions(-)

diff --git a/docs/_source/defaults.rst b/docs/_source/defaults.rst
index 775134f5..e95cf10e 100644
--- a/docs/_source/defaults.rst
+++ b/docs/_source/defaults.rst
@@ -17,6 +17,7 @@ create_new_model
 data_handler
 data_origin
 data_path
+debug                             -               MLAir checks if it is running in debug mode and stores this
 dimensions
 end
 epochs
@@ -57,6 +58,7 @@ train_start
 transformation                    :py:`{}`        implement all further transformation functionality
                                                   inside your custom data handler
 use_all_stations_on_all_data_sets
+use_multiprocessing               :py:`True`      is set to False if MLAir is running in debug mode
 upsampling
 val_end
 val_min_length
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index f51cee8a..c777bcc4 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -64,48 +64,6 @@ class ExperimentSetup(RunEnvironment):
         * `target_dim` [.]
         * `window_lead_time` [.]
 
-        # interpolation
-        self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']})
-        self._set_param("time_dim", time_dim, default='datetime')
-        self._set_param("interpolation_method", interpolation_method, default='linear')
-        self._set_param("limit_nan_fill", limit_nan_fill, default=1)
-
-        # train set parameters
-        self._set_param("start", train_start, default="1997-01-01", scope="train")
-        self._set_param("end", train_end, default="2007-12-31", scope="train")
-        self._set_param("min_length", train_min_length, default=90, scope="train")
-
-        # validation set parameters
-        self._set_param("start", val_start, default="2008-01-01", scope="val")
-        self._set_param("end", val_end, default="2009-12-31", scope="val")
-        self._set_param("min_length", val_min_length, default=90, scope="val")
-
-        # test set parameters
-        self._set_param("start", test_start, default="2010-01-01", scope="test")
-        self._set_param("end", test_end, default="2017-12-31", scope="test")
-        self._set_param("min_length", test_min_length, default=90, scope="test")
-
-        # train_val set parameters
-        self._set_param("start", self.data_store.get("start", "train"), scope="train_val")
-        self._set_param("end", self.data_store.get("end", "val"), scope="train_val")
-        train_val_min_length = sum([self.data_store.get("min_length", s) for s in ["train", "val"]])
-        self._set_param("min_length", train_val_min_length, default=180, scope="train_val")
-
-        # use all stations on all data sets (train, val, test)
-        self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, default=True)
-
-        # set post-processing instructions
-        self._set_param("evaluate_bootstraps", evaluate_bootstraps, scope="general.postprocessing")
-        create_new_bootstraps = max([self.data_store.get("train_model", "general"), create_new_bootstraps or False])
-        self._set_param("create_new_bootstraps", create_new_bootstraps, scope="general.postprocessing")
-        self._set_param("number_of_bootstraps", number_of_bootstraps, default=20, scope="general.postprocessing")
-        self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing")
-
-        # check variables, statistics and target variable
-        self._check_target_var()
-        self._compare_variables_and_statistics()
-
-
     Creates
         * plot of model architecture in `<model_name>.pdf`
 
@@ -137,8 +95,11 @@ class ExperimentSetup(RunEnvironment):
         predicted.
     :param dimensions:
     :param time_dim:
-    :param interpolation_method:
-    :param limit_nan_fill:
+    :param interpolation_method: The method to use for interpolation.
+    :param interpolation_limit: The maximum number of subsequent time steps in a gap to fill by interpolation. If the
+        gap exceeds this number, the gap is not filled by interpolation at all. The value of time steps is an arbitrary
+        number that is applied depending on the `sampling` frequency. A limit of 2 means that either 2 hours or 2 days
+        are allowed to be interpolated in dependency of the set sampling rate.
     :param train_start:
     :param train_end:
     :param val_start:
@@ -199,6 +160,29 @@ class ExperimentSetup(RunEnvironment):
     :param data_path: path to find and store meteorological and environmental / air quality data. Leave this parameter
         empty, if your host system is known and a suitable path was already hardcoded in the program (see
         :py:func:`prepare host <src.configuration.path_config.prepare_host>`).
+    :param experiment_date:
+    :param window_dim: "Temporal" dimension of the input and target data, that is provided for each sample. The number
+        of samples provided in this dimension can be set using `window_history_size` for inputs and `window_lead_time`
+        on target site.
+    :param iter_dim:
+    :param batch_path:
+    :param login_nodes:
+    :param hpc_hosts:
+    :param model:
+    :param batch_size:
+    :param epochs: Number of epochs used in training. If a training is resumed and the number of epochs of the already
+        (partly) trained model is lower than this parameter, training is continue. In case this number is higher than
+        the given epochs parameter, no training is resumed. Epochs is set to 20 per default, but this value is just a
+        placeholder that should be adjusted for a meaningful training.
+    :param data_handler:
+    :param data_origin:
+    :param competitors: Provide names of reference models trained by MLAir that can be found in the `competitor_path`.
+        These models will be used in the postprocessing for comparison.
+    :param competitor_path: The path where MLAir can find competing models. If not provided, this path is assumed to be
+        in the ´data_path´ directory as a subdirectory called `competitors` (default).
+    :param use_multiprocessing: Enable parallel preprocessing (postprocessing not implemented yet) by setting this
+        parameter to `True` (default). If set to `False` the computation is performed in an serial approach.
+        Multiprocessing is disabled when running in debug mode and cannot be switched on.
 
     """
 
-- 
GitLab


From 88c70c578a4f7c59d90147a70acf9c9e73ea3841 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 7 Apr 2021 10:34:32 +0200
Subject: [PATCH 064/175] multiprocessing can now be enabled in debug mode

---
 docs/_source/defaults.rst                     | 67 ++++++++++++-------
 mlair/configuration/defaults.py               |  1 +
 mlair/run_modules/experiment_setup.py         | 11 +--
 .../test_run_modules/test_experiment_setup.py | 18 +++++
 4 files changed, 69 insertions(+), 28 deletions(-)

diff --git a/docs/_source/defaults.rst b/docs/_source/defaults.rst
index e95cf10e..3a25ae61 100644
--- a/docs/_source/defaults.rst
+++ b/docs/_source/defaults.rst
@@ -4,34 +4,34 @@ Defaults
 In this section, we explain which parameters are set by MLAir during the :py:`ExperimentSetup` if not specified by the
 user. This is important information for example if a new :ref:`Custom Data Handler` is implemented.
 
-================================= =============== ============================================================
-parameter                         default         comment
-================================= =============== ============================================================
+================================= ======================== ============================================================
+parameter                         default                  comment
+================================= ======================== ============================================================
 batch_path
-batch_size
+batch_size                        :py:`512`
 bootstrap_path
 competitor_path
-competitors
-create_new_bootstraps
-create_new_model
-data_handler
+competitors                       :py:`[]`
+create_new_bootstraps             :py:`False`
+create_new_model                  :py:`True`
+data_handler                      :py:`DefaultDataHandler`
 data_origin
 data_path
-debug                             -               MLAir checks if it is running in debug mode and stores this
+debug                             ``-``                    MLAir checks if it is running in debug mode and stores this
 dimensions
-end
-epochs
-evaluate_bootstraps
+end                               :py:`"2017-12-31"`
+epochs                            :py:`20`                 This is just a placeholder to prevent unintended longish training
+evaluate_bootstraps               :py:`True`           Bootstrapping can take some time.
 experiment_name
 experiment_path
-extreme_values
-extremes_on_right_tail_only
+extreme_values                    :py:`None`
+extremes_on_right_tail_only       :py:`False`          Could be used for skew distributions
 forecast_path
 fraction_of_training
 hostname
 hpc_hosts
-interpolation_limit
-interpolation_method
+interpolation_limit               :py:`1`
+interpolation_method              :py:`"linear"`
 logging_path
 login_nodes
 model_class
@@ -39,10 +39,10 @@ model_path
 neighbors
 number_of_bootstraps
 overwrite_local_data
-permute_data
+permute_data                      :py:`False`
 plot_list
 plot_path
-start
+start                             ``"1997-01-01"``
 stations
 statistics_per_var
 target_dim
@@ -55,15 +55,34 @@ train_model
 train_end
 train_min_length
 train_start
-transformation                    :py:`{}`        implement all further transformation functionality
-                                                  inside your custom data handler
+transformation                    :py:`{}`             implement all further transformation functionality
+                                                       inside your custom data handler
 use_all_stations_on_all_data_sets
-use_multiprocessing               :py:`True`      is set to False if MLAir is running in debug mode
+use_multiprocessing               :py:`True`           is used if MLAir is not running in debug mode
+use_multiprocessing_on_debug      :py:`False`          is used if MLAir is running in debug mode
 upsampling
 val_end
 val_min_length
 val_start
 variables
-window_history_size
-window_lead_time
-================================= =============== ============================================================
+window_history_size               :py:`13`
+window_lead_time                  :py:`3`
+================================= ==================== ============================================================
+
+
+
+Test Table
+
+
++---------+---------+---------+
+| parameter | default | comment |
++---------+---------+---------+
+| batch_path | |
+| batch_size | :py:`512` | |
+| bootstrap_path | |
+| competitor_path | |
+| debug | ``-`` | MLAir checks if it is running in debug mode and stores this |
+| dimensions | |
+| end | :py:`"2017-12-31"` | |
+| epochs | :py:`20` | This is just a placeholder to prevent unintended longish training |
++---------+---------+---------+
\ No newline at end of file
diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py
index 8805acfc..a874611a 100644
--- a/mlair/configuration/defaults.py
+++ b/mlair/configuration/defaults.py
@@ -54,6 +54,7 @@ DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA
                        "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "",
                        "pm10": "", "so2": ""}
 DEFAULT_USE_MULTIPROCESSING = True
+DEFAULT_USE_MULTIPROCESSING_ON_DEBUG = False
 
 
 def get_defaults():
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index c777bcc4..24fedaa8 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -19,7 +19,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
     DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
     DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
     DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \
-    DEFAULT_USE_MULTIPROCESSING
+    DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG
 from mlair.data_handler import DefaultDataHandler
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel
@@ -215,7 +215,7 @@ class ExperimentSetup(RunEnvironment):
                  create_new_bootstraps=None, data_path: str = None, batch_path: str = None, login_nodes=None,
                  hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None,
                  data_origin: Dict = None, competitors: list = None, competitor_path: str = None,
-                 use_multiprocessing: bool = None, **kwargs):
+                 use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None, **kwargs):
 
         # create run framework
         super().__init__()
@@ -255,8 +255,11 @@ class ExperimentSetup(RunEnvironment):
         # host system setup
         debug_mode = sys.gettrace() is not None
         self._set_param("debug_mode", debug_mode)
-        use_multiprocessing = False if debug_mode is True else use_multiprocessing
-        self._set_param("use_multiprocessing", use_multiprocessing, default=DEFAULT_USE_MULTIPROCESSING)
+        if debug_mode is True:
+            self._set_param("use_multiprocessing", use_multiprocessing_on_debug,
+                            default=DEFAULT_USE_MULTIPROCESSING_ON_DEBUG)
+        else:
+            self._set_param("use_multiprocessing", use_multiprocessing, default=DEFAULT_USE_MULTIPROCESSING)
 
         # batch path (temporary)
         self._set_param("batch_path", batch_path, default=os.path.join(experiment_path, "batch_data"))
diff --git a/test/test_run_modules/test_experiment_setup.py b/test/test_run_modules/test_experiment_setup.py
index 7c63d3d1..d7cd0887 100644
--- a/test/test_run_modules/test_experiment_setup.py
+++ b/test/test_run_modules/test_experiment_setup.py
@@ -3,6 +3,7 @@ import logging
 import os
 
 import pytest
+import mock
 
 from mlair.helpers import TimeTracking, to_list
 from mlair.configuration.path_config import prepare_host
@@ -185,3 +186,20 @@ class TestExperimentSetup:
 
         kwargs["variables"] = ["o3", "temp"]
         assert ExperimentSetup(**kwargs) is not None
+
+    def test_multiprocessing_no_debug(self):
+        # no debug mode, parallel
+        exp_setup = ExperimentSetup(use_multiprocessing_on_debug=False)
+        assert exp_setup.data_store.get("use_multiprocessing") is True
+        # no debug mode, serial
+        exp_setup = ExperimentSetup(use_multiprocessing=False, use_multiprocessing_on_debug=True)
+        assert exp_setup.data_store.get("use_multiprocessing") is False
+
+    @mock.patch("sys.gettrace", return_value="dummy_not_null")
+    def test_multiprocessing_debug(self, mock_gettrace):
+        # debug mode, parallel
+        exp_setup = ExperimentSetup(use_multiprocessing=False, use_multiprocessing_on_debug=True)
+        assert exp_setup.data_store.get("use_multiprocessing") is True
+        # debug mode, serial
+        exp_setup = ExperimentSetup(use_multiprocessing=True)
+        assert exp_setup.data_store.get("use_multiprocessing") is False
-- 
GitLab


From 1044611454595c79828e6ee7bf520ec3a0982fed Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 7 Apr 2021 10:50:33 +0200
Subject: [PATCH 065/175] try another table format

---
 docs/_source/defaults.rst | 43 ++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/docs/_source/defaults.rst b/docs/_source/defaults.rst
index 3a25ae61..89a113f1 100644
--- a/docs/_source/defaults.rst
+++ b/docs/_source/defaults.rst
@@ -21,11 +21,11 @@ debug                             ``-``                    MLAir checks if it is
 dimensions
 end                               :py:`"2017-12-31"`
 epochs                            :py:`20`                 This is just a placeholder to prevent unintended longish training
-evaluate_bootstraps               :py:`True`           Bootstrapping can take some time.
+evaluate_bootstraps               :py:`True`               Bootstrapping can take some time.
 experiment_name
 experiment_path
 extreme_values                    :py:`None`
-extremes_on_right_tail_only       :py:`False`          Could be used for skew distributions
+extremes_on_right_tail_only       :py:`False`              Could be used for skew distributions
 forecast_path
 fraction_of_training
 hostname
@@ -55,11 +55,11 @@ train_model
 train_end
 train_min_length
 train_start
-transformation                    :py:`{}`             implement all further transformation functionality
-                                                       inside your custom data handler
+transformation                    :py:`{}`                 implement all further transformation functionality
+                                                           inside your custom data handler
 use_all_stations_on_all_data_sets
-use_multiprocessing               :py:`True`           is used if MLAir is not running in debug mode
-use_multiprocessing_on_debug      :py:`False`          is used if MLAir is running in debug mode
+use_multiprocessing               :py:`True`               is used if MLAir is not running in debug mode
+use_multiprocessing_on_debug      :py:`False`              is used if MLAir is running in debug mode
 upsampling
 val_end
 val_min_length
@@ -67,22 +67,23 @@ val_start
 variables
 window_history_size               :py:`13`
 window_lead_time                  :py:`3`
-================================= ==================== ============================================================
-
+================================= ======================== ============================================================
 
 
-Test Table
 
+.. list-table:: Title
+   :widths: 25 25 50
+   :header-rows: 1
 
-+---------+---------+---------+
-| parameter | default | comment |
-+---------+---------+---------+
-| batch_path | |
-| batch_size | :py:`512` | |
-| bootstrap_path | |
-| competitor_path | |
-| debug | ``-`` | MLAir checks if it is running in debug mode and stores this |
-| dimensions | |
-| end | :py:`"2017-12-31"` | |
-| epochs | :py:`20` | This is just a placeholder to prevent unintended longish training |
-+---------+---------+---------+
\ No newline at end of file
+   * - parameter
+     - default
+     - comment
+   * - batch_path
+     -
+     -
+   * - batch_size
+     - :py:`512`
+     -
+   * - debug
+     - ``-``
+     - MLAir checks if it is running in debug mode and stores this
-- 
GitLab


From 4fa6840267ba56a0cf6ccecef90210b5d06872fe Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 7 Apr 2021 12:20:21 +0200
Subject: [PATCH 066/175] use new table format for defaults

---
 docs/_source/defaults.rst | 235 +++++++++++++++++++++++++++-----------
 1 file changed, 167 insertions(+), 68 deletions(-)

diff --git a/docs/_source/defaults.rst b/docs/_source/defaults.rst
index 89a113f1..360c9ad1 100644
--- a/docs/_source/defaults.rst
+++ b/docs/_source/defaults.rst
@@ -4,74 +4,8 @@ Defaults
 In this section, we explain which parameters are set by MLAir during the :py:`ExperimentSetup` if not specified by the
 user. This is important information for example if a new :ref:`Custom Data Handler` is implemented.
 
-================================= ======================== ============================================================
-parameter                         default                  comment
-================================= ======================== ============================================================
-batch_path
-batch_size                        :py:`512`
-bootstrap_path
-competitor_path
-competitors                       :py:`[]`
-create_new_bootstraps             :py:`False`
-create_new_model                  :py:`True`
-data_handler                      :py:`DefaultDataHandler`
-data_origin
-data_path
-debug                             ``-``                    MLAir checks if it is running in debug mode and stores this
-dimensions
-end                               :py:`"2017-12-31"`
-epochs                            :py:`20`                 This is just a placeholder to prevent unintended longish training
-evaluate_bootstraps               :py:`True`               Bootstrapping can take some time.
-experiment_name
-experiment_path
-extreme_values                    :py:`None`
-extremes_on_right_tail_only       :py:`False`              Could be used for skew distributions
-forecast_path
-fraction_of_training
-hostname
-hpc_hosts
-interpolation_limit               :py:`1`
-interpolation_method              :py:`"linear"`
-logging_path
-login_nodes
-model_class
-model_path
-neighbors
-number_of_bootstraps
-overwrite_local_data
-permute_data                      :py:`False`
-plot_list
-plot_path
-start                             ``"1997-01-01"``
-stations
-statistics_per_var
-target_dim
-target_var
-test_start
-test_end
-test_min_length
-time_dim
-train_model
-train_end
-train_min_length
-train_start
-transformation                    :py:`{}`                 implement all further transformation functionality
-                                                           inside your custom data handler
-use_all_stations_on_all_data_sets
-use_multiprocessing               :py:`True`               is used if MLAir is not running in debug mode
-use_multiprocessing_on_debug      :py:`False`              is used if MLAir is running in debug mode
-upsampling
-val_end
-val_min_length
-val_start
-variables
-window_history_size               :py:`13`
-window_lead_time                  :py:`3`
-================================= ======================== ============================================================
 
-
-
-.. list-table:: Title
+.. list-table:: Defaults Overview
    :widths: 25 25 50
    :header-rows: 1
 
@@ -84,6 +18,171 @@ window_lead_time                  :py:`3`
    * - batch_size
      - :py:`512`
      -
+   * - bootstrap_path
+     -
+     -
+   * - competitor_path
+     -
+     -
+   * - competitors
+     - :py:`[]`
+     -
+   * - create_new_bootstraps
+     - :py:`False`
+     -
+   * - create_new_model
+     - :py:`True`
+     -
+   * - data_handler
+     - :py:`DefaultDataHandler`
+     -
+   * - data_origin
+     -
+     -
+   * - data_path
+     -
+     -
    * - debug
      - ``-``
-     - MLAir checks if it is running in debug mode and stores this
+     - MLAir checks if it is running in debug mode and stores this dimensions
+   * - end
+     - :py:`"2017-12-31"`
+     -
+   * - epochs
+     - :py:`20`
+     - This is just a placeholder to prevent unintended longish training
+   * - evaluate_bootstraps
+     - :py:`True`
+     - Bootstrapping may take some time.
+   * - experiment_name
+     -
+     -
+   * - experiment_path
+     -
+     -
+   * - extreme_values
+     - :py:`None`
+     -
+   * - extremes_on_right_tail_only
+     - :py:`False`
+     - Could be used for skewed distributions
+   * - forecast_path
+     -
+     -
+   * - fraction_of_training
+     -
+     -
+   * - hostname
+     -
+     -
+   * - hpc_hosts
+     -
+     -
+   * - interpolation_limit
+     - :py:`1`
+     -
+   * - interpolation_method
+     - :py:`"linear"`
+     -
+   * - logging_path
+     -
+     -
+   * - login_nodes
+     -
+     -
+   * - model_class
+     -
+     -
+   * - model_path
+     -
+     -
+   * - neighbors
+     -
+     -
+   * - number_of_bootstraps
+     -
+     -
+   * - overwrite_local_data
+     -
+     -
+   * - permute_data
+     - :py:`False`
+     -
+   * - plot_list
+     -
+     -
+   * - plot_path
+     -
+     -
+   * - start
+     - :py:`"1997-01-01"`
+     -
+   * - stations
+     -
+     -
+   * - statistics_per_var
+     -
+     -
+   * - target_dim
+     -
+     -
+   * - target_var
+     -
+     -
+   * - test_start
+     -
+     -
+   * - test_end
+     -
+     -
+   * - test_min_length
+     -
+     -
+   * - time_dim
+     -
+     -
+   * - train_model
+     -
+     -
+   * - train_end
+     -
+     -
+   * - train_min_length
+     -
+     -
+   * - train_start
+     -
+     -
+   * - transformation
+     - :py:`{}`
+     - implement all further transformation functionality inside your custom data handler
+   * - use_all_stations_on_all_data_sets
+     -
+     -
+   * - use_multiprocessing
+     - :py:`True`
+     - is used if MLAir is not running in debug mode
+   * - use_multiprocessing_on_debug
+     - :py:`False`
+     - is used if MLAir is running in debug mode
+   * - upsampling
+     -
+     -
+   * - val_end
+     -
+     -
+   * - val_min_length
+     -
+     -
+   * - val_start
+     -
+     -
+   * - variables
+     -
+     -
+   * - window_history_size
+     - :py:`13`
+     -
+   * - window_lead_time
+     - :py:`3`
+     -
-- 
GitLab


From 0d3326677ac4f0b13eef9d43d07b77e2a79927ef Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 9 Apr 2021 13:29:42 +0200
Subject: [PATCH 067/175] added limit for x or y axis

---
 mlair/plotting/postprocessing_plotting.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index 1cb6181a..c4f18bfd 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -659,7 +659,7 @@ class PlotClimatologicalSkillScore(AbstractPlotClass):
         """
         return "" if score_only else "terms and "
 
-    def _plot(self, score_only):
+    def _plot(self, score_only, xlim=5):
         """
         Plot climatological skill score.
 
@@ -672,6 +672,8 @@ class PlotClimatologicalSkillScore(AbstractPlotClass):
                     showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
         ax.axhline(y=0, color="grey", linewidth=.5)
         ax.set(ylabel=f"{self._label_add(score_only)}skill score", xlabel="", title="summary of all stations")
+        x_min, x_max = ax.get_xlim()
+        ax.set_xlim([max(x_min, -xlim), min(x_max, xlim)])
         handles, _ = ax.get_legend_handles_labels()
         ax.legend(handles, self._labels)
         plt.tight_layout()
@@ -737,7 +739,7 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
         data = data.stack(level=0).reset_index(level=2, drop=True).reset_index(name="data")
         return data.astype({"comparison": str, "ahead": int, "data": float})
 
-    def _plot(self, single_model_comparison=False):
+    def _plot(self, single_model_comparison=False, xlim=5):
         """Plot skill scores of the comparisons."""
         size = max([len(np.unique(self._data.comparison)), 6])
         fig, ax = plt.subplots(figsize=(size, size * 0.8))
@@ -749,12 +751,14 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
         ax.axhline(y=0, color="grey", linewidth=.5)
 
         ax.set(ylabel="skill score", xlabel="competing models", title="summary of all stations", ylim=self._lim())
+        x_min, x_max = ax.get_xlim()
+        ax.set_xlim([max(x_min, -xlim), min(x_max, xlim)])
         handles, _ = ax.get_legend_handles_labels()
         plt.xticks(rotation=90)
         ax.legend(handles, self._labels)
         plt.tight_layout()
 
-    def _plot_vertical(self, single_model_comparison=False):
+    def _plot_vertical(self, single_model_comparison=False, ylim=5):
         """Plot skill scores of the comparisons, but vertically aligned."""
         fig, ax = plt.subplots()
         data = self._filter_comparisons(self._data) if single_model_comparison is True else self._data
@@ -765,6 +769,8 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
         # ax.axhline(x=0, color="grey", linewidth=.5)
         ax.axvline(x=0, color="grey", linewidth=.5)
         ax.set(xlabel="skill score", ylabel="competing models", title="summary of all stations", xlim=self._lim())
+        y_min, y_max = ax.get_ylim()
+        ax.set_ylim([max(y_min, -ylim), min(y_max, ylim)])
         handles, _ = ax.get_legend_handles_labels()
         ax.legend(handles, self._labels)
         plt.tight_layout()
-- 
GitLab


From 330c10686a942f22fb7bde5729df0cb0506c97e1 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 9 Apr 2021 13:42:13 +0200
Subject: [PATCH 068/175] added tmp prints because limits are still exceeded

---
 mlair/plotting/postprocessing_plotting.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index c4f18bfd..68cab25b 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -752,11 +752,15 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
 
         ax.set(ylabel="skill score", xlabel="competing models", title="summary of all stations", ylim=self._lim())
         x_min, x_max = ax.get_xlim()
+        print("-----------------")
+        print(ax.get_xlim())
         ax.set_xlim([max(x_min, -xlim), min(x_max, xlim)])
         handles, _ = ax.get_legend_handles_labels()
         plt.xticks(rotation=90)
+        print(ax.get_xlim())
         ax.legend(handles, self._labels)
         plt.tight_layout()
+        print(ax.get_xlim())
 
     def _plot_vertical(self, single_model_comparison=False, ylim=5):
         """Plot skill scores of the comparisons, but vertically aligned."""
-- 
GitLab


From 70f7963948547c4908bf313e5dac090a616f25b1 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 9 Apr 2021 14:31:45 +0200
Subject: [PATCH 069/175] error is fixed, another try on HPC, on success /close
 #301

---
 mlair/plotting/postprocessing_plotting.py | 36 +++++++++++++----------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index 68cab25b..4855f16c 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -671,13 +671,26 @@ class PlotClimatologicalSkillScore(AbstractPlotClass):
         sns.boxplot(x="terms", y="data", hue="ahead", data=self._data, ax=ax, whis=1., palette="Blues_d",
                     showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
         ax.axhline(y=0, color="grey", linewidth=.5)
-        ax.set(ylabel=f"{self._label_add(score_only)}skill score", xlabel="", title="summary of all stations")
-        x_min, x_max = ax.get_xlim()
-        ax.set_xlim([max(x_min, -xlim), min(x_max, xlim)])
+        ax.set(ylabel=f"{self._label_add(score_only)}skill score", xlabel="", title="summary of all stations",
+               ylim=self._lim())
         handles, _ = ax.get_legend_handles_labels()
         ax.legend(handles, self._labels)
         plt.tight_layout()
 
+    def _lim(self) -> Tuple[float, float]:
+        """
+        Calculate axis limits from data (Can be used to set axis extend).
+
+        Lower limit is the minimum of 0 and data's minimum (reduced by small subtrahend) and upper limit is data's
+        maximum (increased by a small addend).
+
+        :return:
+        """
+        limit = 5
+        lower = np.max([-limit, np.min([0, helpers.float_round(self._data["data"].min() - 0.1, 2)])])
+        upper = np.min([limit, helpers.float_round(self._data["data"].max() + 0.1, 2)])
+        return lower, upper
+
 
 @TimeTrackingWrapper
 class PlotCompetitiveSkillScore(AbstractPlotClass):
@@ -739,7 +752,7 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
         data = data.stack(level=0).reset_index(level=2, drop=True).reset_index(name="data")
         return data.astype({"comparison": str, "ahead": int, "data": float})
 
-    def _plot(self, single_model_comparison=False, xlim=5):
+    def _plot(self, single_model_comparison=False):
         """Plot skill scores of the comparisons."""
         size = max([len(np.unique(self._data.comparison)), 6])
         fig, ax = plt.subplots(figsize=(size, size * 0.8))
@@ -751,18 +764,12 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
         ax.axhline(y=0, color="grey", linewidth=.5)
 
         ax.set(ylabel="skill score", xlabel="competing models", title="summary of all stations", ylim=self._lim())
-        x_min, x_max = ax.get_xlim()
-        print("-----------------")
-        print(ax.get_xlim())
-        ax.set_xlim([max(x_min, -xlim), min(x_max, xlim)])
         handles, _ = ax.get_legend_handles_labels()
         plt.xticks(rotation=90)
-        print(ax.get_xlim())
         ax.legend(handles, self._labels)
         plt.tight_layout()
-        print(ax.get_xlim())
 
-    def _plot_vertical(self, single_model_comparison=False, ylim=5):
+    def _plot_vertical(self, single_model_comparison=False):
         """Plot skill scores of the comparisons, but vertically aligned."""
         fig, ax = plt.subplots()
         data = self._filter_comparisons(self._data) if single_model_comparison is True else self._data
@@ -773,8 +780,6 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
         # ax.axhline(x=0, color="grey", linewidth=.5)
         ax.axvline(x=0, color="grey", linewidth=.5)
         ax.set(xlabel="skill score", ylabel="competing models", title="summary of all stations", xlim=self._lim())
-        y_min, y_max = ax.get_ylim()
-        ax.set_ylim([max(y_min, -ylim), min(y_max, ylim)])
         handles, _ = ax.get_legend_handles_labels()
         ax.legend(handles, self._labels)
         plt.tight_layout()
@@ -799,8 +804,9 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
 
         :return:
         """
-        lower = np.min([0, helpers.float_round(self._data.min()[2], 2) - 0.1])
-        upper = helpers.float_round(self._data.max()[2], 2) + 0.1
+        limit = 5
+        lower = np.max([-limit, np.min([0, helpers.float_round(self._data.min()[2], 2) - 0.1])])
+        upper = np.min([limit, helpers.float_round(self._data.max()[2], 2) + 0.1])
         return lower, upper
 
 
-- 
GitLab


From 9cd6a8ab15bf43dec5af9c8969ee518398a8290f Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 9 Apr 2021 14:47:12 +0200
Subject: [PATCH 070/175] another adjustment

---
 mlair/plotting/postprocessing_plotting.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index 4855f16c..d769fabc 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -762,8 +762,7 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
                     showmeans=True, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."},
                     order=order)
         ax.axhline(y=0, color="grey", linewidth=.5)
-
-        ax.set(ylabel="skill score", xlabel="competing models", title="summary of all stations", ylim=self._lim())
+        ax.set(ylabel="skill score", xlabel="competing models", title="summary of all stations", ylim=self._lim(data))
         handles, _ = ax.get_legend_handles_labels()
         plt.xticks(rotation=90)
         ax.legend(handles, self._labels)
@@ -777,9 +776,8 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
         sns.boxplot(y="comparison", x="data", hue="ahead", data=data, whis=1., ax=ax, palette="Blues_d",
                     showmeans=True, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."},
                     order=order)
-        # ax.axhline(x=0, color="grey", linewidth=.5)
         ax.axvline(x=0, color="grey", linewidth=.5)
-        ax.set(xlabel="skill score", ylabel="competing models", title="summary of all stations", xlim=self._lim())
+        ax.set(xlabel="skill score", ylabel="competing models", title="summary of all stations", xlim=self._lim(data))
         handles, _ = ax.get_legend_handles_labels()
         ax.legend(handles, self._labels)
         plt.tight_layout()
@@ -795,7 +793,8 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
         filtered_headers = list(filter(lambda x: "nn-" in x, data.comparison.unique()))
         return data[data.comparison.isin(filtered_headers)]
 
-    def _lim(self) -> Tuple[float, float]:
+    @staticmethod
+    def _lim(data) -> Tuple[float, float]:
         """
         Calculate axis limits from data (Can be used to set axis extend).
 
@@ -805,8 +804,8 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
         :return:
         """
         limit = 5
-        lower = np.max([-limit, np.min([0, helpers.float_round(self._data.min()[2], 2) - 0.1])])
-        upper = np.min([limit, helpers.float_round(self._data.max()[2], 2) + 0.1])
+        lower = np.max([-limit, np.min([0, helpers.float_round(data.min()[2], 2) - 0.1])])
+        upper = np.min([limit, helpers.float_round(data.max()[2], 2) + 0.1])
         return lower, upper
 
 
-- 
GitLab


From 317f24468d546fc91a9801bc4f07496d6d31d0aa Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 13 Apr 2021 11:49:37 +0200
Subject: [PATCH 071/175] split plots into abstract, pre, and postprocessing
 plots

---
 mlair/plotting/abstract_plot_class.py     | 101 +++++
 mlair/plotting/postprocessing_plotting.py | 523 +---------------------
 mlair/plotting/preprocessing_plotting.py  | 438 ++++++++++++++++++
 mlair/run_modules/post_processing.py      |   6 +-
 4 files changed, 543 insertions(+), 525 deletions(-)
 create mode 100644 mlair/plotting/abstract_plot_class.py
 create mode 100644 mlair/plotting/preprocessing_plotting.py

diff --git a/mlair/plotting/abstract_plot_class.py b/mlair/plotting/abstract_plot_class.py
new file mode 100644
index 00000000..dab45156
--- /dev/null
+++ b/mlair/plotting/abstract_plot_class.py
@@ -0,0 +1,101 @@
+"""Abstract plot class that should be used for preprocessing and postprocessing plots."""
+__author__ = "Lukas Leufen"
+__date__ = '2021-04-13'
+
+import logging
+import os
+
+from matplotlib import pyplot as plt
+
+
+class AbstractPlotClass:
+    """
+    Abstract class for all plotting routines to unify plot workflow.
+
+    Each inheritance requires a _plot method. Create a plot class like:
+
+    .. code-block:: python
+
+        class MyCustomPlot(AbstractPlotClass):
+
+            def __init__(self, plot_folder, *args, **kwargs):
+                super().__init__(plot_folder, "custom_plot_name")
+                self._data = self._prepare_data(*args, **kwargs)
+                self._plot(*args, **kwargs)
+                self._save()
+
+            def _prepare_data(*args, **kwargs):
+                <your custom data preparation>
+                return data
+
+            def _plot(*args, **kwargs):
+                <your custom plotting without saving>
+
+    The save method is already implemented in the AbstractPlotClass. If special saving is required (e.g. if you are
+    using pdfpages), you need to overwrite it. Plots are saved as .pdf with a resolution of 500dpi per default (can be
+    set in super class initialisation).
+
+    Methods like the shown _prepare_data() are optional. The only method required to implement is _plot.
+
+    If you want to add a time tracking module, just add the TimeTrackingWrapper as decorator around your custom plot
+    class. It will log the spent time if you call your plotting without saving the returned object.
+
+    .. code-block:: python
+
+        @TimeTrackingWrapper
+        class MyCustomPlot(AbstractPlotClass):
+            pass
+
+    Let's assume it takes a while to create this very special plot.
+
+    >>> MyCustomPlot()
+    INFO: MyCustomPlot finished after 00:00:11 (hh:mm:ss)
+
+    """
+
+    def __init__(self, plot_folder, plot_name, resolution=500, rc_params=None):
+        """Set up plot folder and name, and plot resolution (default 500dpi)."""
+        plot_folder = os.path.abspath(plot_folder)
+        if not os.path.exists(plot_folder):
+            os.makedirs(plot_folder)
+        self.plot_folder = plot_folder
+        self.plot_name = plot_name
+        self.resolution = resolution
+        if rc_params is None:
+            rc_params = {'axes.labelsize': 'large',
+                         'xtick.labelsize': 'large',
+                         'ytick.labelsize': 'large',
+                         'legend.fontsize': 'large',
+                         'axes.titlesize': 'large',
+                         }
+        self.rc_params = rc_params
+        self._update_rc_params()
+
+    def _plot(self, *args):
+        """Abstract plot class needs to be implemented in inheritance."""
+        raise NotImplementedError
+
+    def _save(self, **kwargs):
+        """Store plot locally. Name of and path to plot need to be set on initialisation."""
+        plot_name = os.path.join(self.plot_folder, f"{self.plot_name}.pdf")
+        logging.debug(f"... save plot to {plot_name}")
+        plt.savefig(plot_name, dpi=self.resolution, **kwargs)
+        plt.close('all')
+
+    def _update_rc_params(self):
+        plt.rcParams.update(self.rc_params)
+
+    @staticmethod
+    def _get_sampling(sampling):
+        if sampling == "daily":
+            return "D"
+        elif sampling == "hourly":
+            return "h"
+
+    @staticmethod
+    def get_dataset_colors():
+        """
+        Standard colors used for train-, val-, and test-sets during postprocessing
+        """
+        colors = {"train": "#e69f00", "val": "#009e73", "test": "#56b4e9", "train_val": "#000000"}  # hex code
+        return colors
diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index d769fabc..491aa52e 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -9,10 +9,7 @@ import warnings
 from typing import Dict, List, Tuple
 
 import matplotlib
-import matplotlib.patches as mpatches
-import matplotlib.lines as mlines
 import matplotlib.pyplot as plt
-import matplotlib.dates as mdates
 import numpy as np
 import pandas as pd
 import seaborn as sns
@@ -22,6 +19,7 @@ from matplotlib.backends.backend_pdf import PdfPages
 from mlair import helpers
 from mlair.data_handler.iterator import DataCollection
 from mlair.helpers import TimeTrackingWrapper
+from mlair.plotting.abstract_plot_class import AbstractPlotClass
 
 logging.getLogger('matplotlib').setLevel(logging.WARNING)
 
@@ -31,100 +29,6 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
 # import matplotlib.pyplot as plt
 
 
-class AbstractPlotClass:
-    """
-    Abstract class for all plotting routines to unify plot workflow.
-
-    Each inheritance requires a _plot method. Create a plot class like:
-
-    .. code-block:: python
-
-        class MyCustomPlot(AbstractPlotClass):
-
-            def __init__(self, plot_folder, *args, **kwargs):
-                super().__init__(plot_folder, "custom_plot_name")
-                self._data = self._prepare_data(*args, **kwargs)
-                self._plot(*args, **kwargs)
-                self._save()
-
-            def _prepare_data(*args, **kwargs):
-                <your custom data preparation>
-                return data
-
-            def _plot(*args, **kwargs):
-                <your custom plotting without saving>
-
-    The save method is already implemented in the AbstractPlotClass. If special saving is required (e.g. if you are
-    using pdfpages), you need to overwrite it. Plots are saved as .pdf with a resolution of 500dpi per default (can be
-    set in super class initialisation).
-
-    Methods like the shown _prepare_data() are optional. The only method required to implement is _plot.
-
-    If you want to add a time tracking module, just add the TimeTrackingWrapper as decorator around your custom plot
-    class. It will log the spent time if you call your plotting without saving the returned object.
-
-    .. code-block:: python
-
-        @TimeTrackingWrapper
-        class MyCustomPlot(AbstractPlotClass):
-            pass
-
-    Let's assume it takes a while to create this very special plot.
-
-    >>> MyCustomPlot()
-    INFO: MyCustomPlot finished after 00:00:11 (hh:mm:ss)
-
-    """
-
-    def __init__(self, plot_folder, plot_name, resolution=500, rc_params=None):
-        """Set up plot folder and name, and plot resolution (default 500dpi)."""
-        plot_folder = os.path.abspath(plot_folder)
-        if not os.path.exists(plot_folder):
-            os.makedirs(plot_folder)
-        self.plot_folder = plot_folder
-        self.plot_name = plot_name
-        self.resolution = resolution
-        if rc_params is None:
-            rc_params = {'axes.labelsize': 'large',
-                         'xtick.labelsize': 'large',
-                         'ytick.labelsize': 'large',
-                         'legend.fontsize': 'large',
-                         'axes.titlesize': 'large',
-                         }
-        self.rc_params = rc_params
-        self._update_rc_params()
-
-    def _plot(self, *args):
-        """Abstract plot class needs to be implemented in inheritance."""
-        raise NotImplementedError
-
-    def _save(self, **kwargs):
-        """Store plot locally. Name of and path to plot need to be set on initialisation."""
-        plot_name = os.path.join(self.plot_folder, f"{self.plot_name}.pdf")
-        logging.debug(f"... save plot to {plot_name}")
-        plt.savefig(plot_name, dpi=self.resolution, **kwargs)
-        plt.close('all')
-
-    def _update_rc_params(self):
-        plt.rcParams.update(self.rc_params)
-
-    @staticmethod
-    def _get_sampling(sampling):
-        if sampling == "daily":
-            return "D"
-        elif sampling == "hourly":
-            return "h"
-
-    @staticmethod
-    def get_dataset_colors():
-        """
-        Standard colors used for train-, val-, and test-sets during postprocessing
-        """
-        colors = {"train": "#e69f00", "val": "#009e73", "test": "#56b4e9", "train_val": "#000000"}  # hex code
-        return colors
-
-
-
 @TimeTrackingWrapper
 class PlotMonthlySummary(AbstractPlotClass):
     """
@@ -230,132 +134,6 @@ class PlotMonthlySummary(AbstractPlotClass):
         plt.tight_layout()
 
 
-@TimeTrackingWrapper
-class PlotStationMap(AbstractPlotClass):
-    """
-    Plot geographical overview of all used stations as squares.
-
-    Different data sets can be colorised by its key in the input dictionary generators. The key represents the color to
-    plot on the map. Currently, there is only a white background, but this can be adjusted by loading locally stored
-    topography data (not implemented yet). The plot is saved under plot_path with the name station_map.pdf
-
-    .. image:: ../../../../../_source/_plots/station_map.png
-        :width: 400
-    """
-
-    def __init__(self, generators: List, plot_folder: str = ".", plot_name="station_map"):
-        """
-        Set attributes and create plot.
-
-        :param generators: dictionary with the plot color of each data set as key and the generator containing all stations
-        as value.
-        :param plot_folder: path to save the plot (default: current directory)
-        """
-        super().__init__(plot_folder, plot_name)
-        self._ax = None
-        self._gl = None
-        self._plot(generators)
-        self._save(bbox_inches="tight")
-
-    def _draw_background(self):
-        """Draw coastline, lakes, ocean, rivers and country borders as background on the map."""
-
-        import cartopy.feature as cfeature
-
-        self._ax.add_feature(cfeature.LAND.with_scale("50m"))
-        self._ax.natural_earth_shp(resolution='50m')
-        self._ax.add_feature(cfeature.COASTLINE.with_scale("50m"), edgecolor='black')
-        self._ax.add_feature(cfeature.LAKES.with_scale("50m"))
-        self._ax.add_feature(cfeature.OCEAN.with_scale("50m"))
-        self._ax.add_feature(cfeature.RIVERS.with_scale("50m"))
-        self._ax.add_feature(cfeature.BORDERS.with_scale("50m"), facecolor='none', edgecolor='black')
-
-    def _plot_stations(self, generators):
-        """
-        Loop over all keys in generators dict and its containing stations and plot the stations's position.
-
-        Position is highlighted by a square on the map regarding the given color.
-
-        :param generators: dictionary with the plot color of each data set as key and the generator containing all
-            stations as value.
-        """
-
-        import cartopy.crs as ccrs
-        if generators is not None:
-            legend_elements = []
-            default_colors = self.get_dataset_colors()
-            for element in generators:
-                data_collection, plot_opts = self._get_collection_and_opts(element)
-                name = data_collection.name or "unknown"
-                marker = plot_opts.get("marker", "s")
-                ms = plot_opts.get("ms", 6)
-                mec = plot_opts.get("mec", "k")
-                mfc = plot_opts.get("mfc", default_colors.get(name, "b"))
-                legend_elements.append(
-                    mlines.Line2D([], [], mfc=mfc, mec=mec, marker=self._adjust_marker(marker), ms=ms, linestyle='None',
-                                  label=f"{name} ({len(data_collection)})"))
-                for station in data_collection:
-                    coords = station.get_coordinates()
-                    IDx, IDy = coords["lon"], coords["lat"]
-                    self._ax.plot(IDx, IDy, mfc=mfc, mec=mec, marker=marker, ms=ms, transform=ccrs.PlateCarree())
-            if len(legend_elements) > 0:
-                self._ax.legend(handles=legend_elements, loc='best')
-
-    @staticmethod
-    def _adjust_marker(marker):
-        _adjust = {4: "<", 5: ">", 6: "^", 7: "v", 8: "<", 9: ">", 10: "^", 11: "v"}
-        if isinstance(marker, int) and marker in _adjust.keys():
-            return _adjust[marker]
-        else:
-            return marker
-
-    @staticmethod
-    def _get_collection_and_opts(element):
-        if isinstance(element, tuple):
-            if len(element) == 1:
-                return element[0], {}
-            else:
-                return element
-        else:
-            return element, {}
-
-    def _plot(self, generators: List):
-        """
-        Create the station map plot.
-
-        Set figure and call all required sub-methods.
-
-        :param generators: dictionary with the plot color of each data set as key and the generator containing all
-            stations as value.
-        """
-
-        import cartopy.crs as ccrs
-        from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
-        fig = plt.figure(figsize=(10, 5))
-        self._ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
-        self._gl = self._ax.gridlines(xlocs=range(0, 21, 5), ylocs=range(44, 59, 2), draw_labels=True)
-        self._gl.xformatter = LONGITUDE_FORMATTER
-        self._gl.yformatter = LATITUDE_FORMATTER
-        self._draw_background()
-        self._plot_stations(generators)
-        self._adjust_extent()
-        plt.tight_layout()
-
-    def _adjust_extent(self):
-        import cartopy.crs as ccrs
-
-        def diff(arr):
-            return arr[1] - arr[0], arr[3] - arr[2]
-
-        def find_ratio(delta, reference=5):
-            return min(max(abs(reference / delta[0]), abs(reference / delta[1])), 5)
-
-        extent = self._ax.get_extent(crs=ccrs.PlateCarree())
-        ratio = find_ratio(diff(extent))
-        new_extent = extent + np.array([-1, 1, -1, 1]) * ratio
-        self._ax.set_extent(new_extent, crs=ccrs.PlateCarree())
-
-
 @TimeTrackingWrapper
 class PlotConditionalQuantiles(AbstractPlotClass):
     """
@@ -1138,133 +916,6 @@ class PlotTimeSeries:
         return matplotlib.backends.backend_pdf.PdfPages(plot_name)
 
 
-@TimeTrackingWrapper
-class PlotAvailability(AbstractPlotClass):
-    """
-    Create data availablility plot similar to Gantt plot.
-
-    Each entry of given generator, will result in a new line in the plot. Data is summarised for given temporal
-    resolution and checked whether data is available or not for each time step. This is afterwards highlighted as a
-    colored bar or a blank space.
-
-    You can set different colors to highlight subsets for example by providing different generators for the same index
-    using different keys in the input dictionary.
-
-    Note: each bar is surrounded by a small white box to highlight gabs in between. This can result in too long gabs
-    in display, if a gab is only very short. Also this appears on a (fluent) transition from one to another subset.
-
-    Calling this class will create three versions fo the availability plot.
-
-    1) Data availability for each element
-    1) Data availability as summary over all elements (is there at least a single elemnt for each time step)
-    1) Combination of single and overall availability
-
-    .. image:: ../../../../../_source/_plots/data_availability.png
-        :width: 400
-
-    .. image:: ../../../../../_source/_plots/data_availability_summary.png
-        :width: 400
-
-    .. image:: ../../../../../_source/_plots/data_availability_combined.png
-        :width: 400
-
-    """
-
-    def __init__(self, generators: Dict[str, DataCollection], plot_folder: str = ".", sampling="daily",
-                 summary_name="data availability", time_dimension="datetime", window_dimension="window"):
-        """Initialise."""
-        # create standard Gantt plot for all stations (currently in single pdf file with single page)
-        super().__init__(plot_folder, "data_availability")
-        self.time_dim = time_dimension
-        self.window_dim = window_dimension
-        self.sampling = self._get_sampling(sampling)
-        self.linewidth = None
-        if self.sampling == 'h':
-            self.linewidth = 0.001
-        plot_dict = self._prepare_data(generators)
-        lgd = self._plot(plot_dict)
-        self._save(bbox_extra_artists=(lgd,), bbox_inches="tight")
-        # create summary Gantt plot (is data in at least one station available)
-        self.plot_name += "_summary"
-        plot_dict_summary = self._summarise_data(generators, summary_name)
-        lgd = self._plot(plot_dict_summary)
-        self._save(bbox_extra_artists=(lgd,), bbox_inches="tight")
-        # combination of station and summary plot, last element is summary broken bar
-        self.plot_name = "data_availability_combined"
-        plot_dict_summary.update(plot_dict)
-        lgd = self._plot(plot_dict_summary)
-        self._save(bbox_extra_artists=(lgd,), bbox_inches="tight")
-
-    def _prepare_data(self, generators: Dict[str, DataCollection]):
-        plt_dict = {}
-        for subset, data_collection in generators.items():
-            for station in data_collection:
-                labels = station.get_Y(as_numpy=False).resample({self.time_dim: self.sampling}, skipna=True).mean()
-                labels_bool = labels.sel(**{self.window_dim: 1}).notnull()
-                group = (labels_bool != labels_bool.shift({self.time_dim: 1})).cumsum()
-                plot_data = pd.DataFrame({"avail": labels_bool.values, "group": group.values},
-                                         index=labels.coords[self.time_dim].values)
-                t = plot_data.groupby("group").apply(lambda x: (x["avail"].head(1)[0], x.index[0], x.shape[0]))
-                t2 = [i[1:] for i in t if i[0]]
-
-                if plt_dict.get(str(station)) is None:
-                    plt_dict[str(station)] = {subset: t2}
-                else:
-                    plt_dict[str(station)].update({subset: t2})
-        return plt_dict
-
-    def _summarise_data(self, generators: Dict[str, DataCollection], summary_name: str):
-        plt_dict = {}
-        for subset, data_collection in generators.items():
-            all_data = None
-            for station in data_collection:
-                labels = station.get_Y(as_numpy=False).resample({self.time_dim: self.sampling}, skipna=True).mean()
-                labels_bool = labels.sel(**{self.window_dim: 1}).notnull()
-                if all_data is None:
-                    all_data = labels_bool
-                else:
-                    tmp = all_data.combine_first(labels_bool)  # expand dims to merged datetime coords
-                    all_data = np.logical_or(tmp, labels_bool).combine_first(
-                        all_data)  # apply logical on merge and fill missing with all_data
-
-            group = (all_data != all_data.shift({self.time_dim: 1})).cumsum()
-            plot_data = pd.DataFrame({"avail": all_data.values, "group": group.values},
-                                     index=all_data.coords[self.time_dim].values)
-            t = plot_data.groupby("group").apply(lambda x: (x["avail"].head(1)[0], x.index[0], x.shape[0]))
-            t2 = [i[1:] for i in t if i[0]]
-            if plt_dict.get(summary_name) is None:
-                plt_dict[summary_name] = {subset: t2}
-            else:
-                plt_dict[summary_name].update({subset: t2})
-        return plt_dict
-
-    def _plot(self, plt_dict):
-        colors = self.get_dataset_colors()
-        _used_colors = []
-        pos = 0
-        height = 0.8  # should be <= 1
-        yticklabels = []
-        number_of_stations = len(plt_dict.keys())
-        fig, ax = plt.subplots(figsize=(10, number_of_stations / 3))
-        for station, d in sorted(plt_dict.items(), reverse=True):
-            pos += 1
-            for subset, color in colors.items():
-                plt_data = d.get(subset)
-                if plt_data is None:
-                    continue
-                elif color not in _used_colors:  # this is required for a proper legend creation
-                    _used_colors.append(color)
-                ax.broken_barh(plt_data, (pos, height), color=color, edgecolor="white", linewidth=self.linewidth)
-            yticklabels.append(station)
-
-        ax.set_ylim([height, number_of_stations + 1])
-        ax.set_yticks(np.arange(len(plt_dict.keys())) + 1 + height / 2)
-        ax.set_yticklabels(yticklabels)
-        handles = [mpatches.Patch(color=c, label=k) for k, c in colors.items() if c in _used_colors]
-        lgd = plt.legend(handles=handles, bbox_to_anchor=(0, 1, 1, 0.2), loc="lower center", ncol=len(handles))
-        return lgd
-
-
 @TimeTrackingWrapper
 class PlotSeparationOfScales(AbstractPlotClass):
 
@@ -1292,178 +943,6 @@ class PlotSeparationOfScales(AbstractPlotClass):
             self._save()
 
 
-@TimeTrackingWrapper
-class PlotAvailabilityHistogram(AbstractPlotClass):
-    """
-    Create data availability plots as histogram.
-
-    Each entry of each generator is checked for `notnull()` values along all the datetime axis (boolean).
-    Calling this class creates two different types of histograms where each generator
-
-    1) data_availability_histogram: datetime (xaxis) vs. number of stations with availabile data (yaxis)
-    2) data_availability_histogram_cumulative: number of samples (xaxis) vs. number of stations having at least number
-       of samples (yaxis)
-
-    .. image:: ../../../../../_source/_plots/data_availability_histogram_hist.png
-        :width: 400
-
-    .. image:: ../../../../../_source/_plots/data_availability_histogram_hist_cum.png
-        :width: 400
-
-    """
-
-    def __init__(self, generators: Dict[str, DataCollection], plot_folder: str = ".",
-                 subset_dim: str = 'DataSet', history_dim: str = 'window',
-                 station_dim: str = 'Stations',):
-
-        super().__init__(plot_folder, "data_availability_histogram")
-
-        self.subset_dim = subset_dim
-        self.history_dim = history_dim
-        self.station_dim = station_dim
-
-        self.freq = None
-        self.temporal_dim = None
-        self.target_dim = None
-        self._prepare_data(generators)
-
-        for plt_type in self.allowed_plot_types:
-            plot_name_tmp = self.plot_name
-            self.plot_name += '_' + plt_type
-            self._plot(plt_type=plt_type)
-            self._save()
-            self.plot_name = plot_name_tmp
-
-    def _set_dims_from_datahandler(self, data_handler):
-        self.temporal_dim = data_handler.id_class.time_dim
-        self.target_dim = data_handler.id_class.target_dim
-        self.freq = self._get_sampling(data_handler.id_class.sampling)
-
-    @property
-    def allowed_plot_types(self):
-        plot_types = ['hist', 'hist_cum']
-        return plot_types
-
-    def _prepare_data(self, generators: Dict[str, DataCollection]):
-        """
-        Prepares data to be used by plot methods.
-
-        Creates xarrays which are sums of valid data (boolean sums) across i) station_dim and ii) temporal_dim
-        """
-        avail_data_time_sum = {}
-        avail_data_station_sum = {}
-        dataset_time_interval = {}
-        for subset, generator in generators.items():
-            avail_list = []
-            for station in generator:
-                self._set_dims_from_datahandler(data_handler=station)
-                station_data_x = station.get_X(as_numpy=False)[0]
-                station_data_x = station_data_x.loc[{self.history_dim: 0,  # select recent window frame
-                                                     self.target_dim: station_data_x[self.target_dim].values[0]}]
-                station_data_x = self._reduce_dims(station_data_x)
-                avail_list.append(station_data_x.notnull())
-            avail_data = xr.concat(avail_list, dim=self.station_dim).notnull()
-            avail_data_time_sum[subset] = avail_data.sum(dim=self.station_dim)
-            avail_data_station_sum[subset] = avail_data.sum(dim=self.temporal_dim)
-            dataset_time_interval[subset] = self._get_first_and_last_indexelement_from_xarray(
-                avail_data_time_sum[subset], dim_name=self.temporal_dim, return_type='as_dict'
-            )
-        avail_data_amount = xr.concat(avail_data_time_sum.values(), pd.Index(avail_data_time_sum.keys(),
-                                                                             name=self.subset_dim)
-                                      )
-        full_time_index = self._make_full_time_index(avail_data_amount.coords[self.temporal_dim].values, freq=self.freq)
-        self.avail_data_cum_sum = xr.concat(avail_data_station_sum.values(), pd.Index(avail_data_station_sum.keys(),
-                                                                                      name=self.subset_dim))
-        self.avail_data_amount = avail_data_amount.reindex({self.temporal_dim: full_time_index})
-        self.dataset_time_interval = dataset_time_interval
-
-    def _reduce_dims(self, dataset):
-        if len(dataset.dims) > 2:
-            required = {self.temporal_dim, self.station_dim}
-            unimportant = set(dataset.dims).difference(required)
-            sel_dict = {un: dataset[un].values[0] for un in unimportant}
-            dataset = dataset.loc[sel_dict]
-        return dataset
-
-    @staticmethod
-    def _get_first_and_last_indexelement_from_xarray(xarray, dim_name, return_type='as_tuple'):
-        if isinstance(xarray, xr.DataArray):
-            first = xarray.coords[dim_name].values[0]
-            last = xarray.coords[dim_name].values[-1]
-            if return_type == 'as_tuple':
-                return first, last
-            elif return_type == 'as_dict':
-                return {'first': first, 'last': last}
-            else:
-                raise TypeError(f"return_type must be 'as_tuple' or 'as_dict', but is '{return_type}'")
-        else:
-            raise TypeError(f"xarray must be of type xr.DataArray, but is of type {type(xarray)}")
-
-    @staticmethod
-    def _make_full_time_index(irregular_time_index, freq):
-        full_time_index = pd.date_range(start=irregular_time_index[0], end=irregular_time_index[-1], freq=freq)
-        return full_time_index
-
-    def _plot(self, plt_type='hist', *args):
-        if plt_type == 'hist':
-            self._plot_hist()
-        elif plt_type == 'hist_cum':
-            self._plot_hist_cum()
-        else:
-            raise ValueError(f"plt_type mus be 'hist' or 'hist_cum', but is {type}")
-
-    def _plot_hist(self, *args):
-        colors = self.get_dataset_colors()
-        fig, axes = plt.subplots(figsize=(10, 3))
-        for i, subset in enumerate(self.dataset_time_interval.keys()):
-            plot_dataset = self.avail_data_amount.sel({self.subset_dim: subset,
-                                                       self.temporal_dim: slice(
-                                                           self.dataset_time_interval[subset]['first'],
-                                                           self.dataset_time_interval[subset]['last']
-                                                       )
-                                                       }
-                                                      )
-
-            plot_dataset.plot.step(color=colors[subset], ax=axes, label=subset)
-            plt.fill_between(plot_dataset.coords[self.temporal_dim].values, plot_dataset.values, color=colors[subset])
-
-        lgd = fig.legend(loc="upper right", ncol=len(self.dataset_time_interval),
-                         facecolor='white', framealpha=1, edgecolor='black')
-        for lgd_line in lgd.get_lines():
-            lgd_line.set_linewidth(4.0)
-        plt.gca().xaxis.set_major_locator(mdates.YearLocator())
-        plt.title('')
-        plt.ylabel('Number of samples')
-        plt.tight_layout()
-
-    def _plot_hist_cum(self, *args):
-        colors = self.get_dataset_colors()
-        fig, axes = plt.subplots(figsize=(10, 3))
-        n_bins = int(self.avail_data_cum_sum.max().values)
-        bins = np.arange(0, n_bins+1)
-        descending_subsets = self.avail_data_cum_sum.max(dim=self.station_dim).sortby(
-            self.avail_data_cum_sum.max(dim=self.station_dim), ascending=False
-        ).coords[self.subset_dim].values
-
-        for subset in descending_subsets:
-            self.avail_data_cum_sum.sel({self.subset_dim: subset}).plot.hist(ax=axes,
-                                                                             bins=bins,
-                                                                             label=subset,
-                                                                             cumulative=-1,
-                                                                             color=colors[subset],
-                                                                             # alpha=.5
-                                                                             )
-
-        lgd = fig.legend(loc="upper right", ncol=len(self.dataset_time_interval),
-                         facecolor='white', framealpha=1, edgecolor='black')
-        plt.title('')
-        plt.ylabel('Number of stations')
-        plt.xlabel('Number of samples')
-        plt.xlim((bins[0], bins[-1]))
-        plt.tight_layout()
-
-
-
 if __name__ == "__main__":
     stations = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']
     path = "../../testrun_network/forecasts"
diff --git a/mlair/plotting/preprocessing_plotting.py b/mlair/plotting/preprocessing_plotting.py
new file mode 100644
index 00000000..aa61b1f3
--- /dev/null
+++ b/mlair/plotting/preprocessing_plotting.py
@@ -0,0 +1,438 @@
+"""Collection of plots to get more insight into data."""
+__author__ = "Lukas Leufen, Felix Kleinert"
+__date__ = '2021-04-13'
+
+from typing import List, Dict
+
+import numpy as np
+import pandas as pd
+import xarray as xr
+from matplotlib import lines as mlines, pyplot as plt, patches as mpatches, dates as mdates
+
+from mlair.data_handler import DataCollection
+from mlair.helpers import TimeTrackingWrapper
+from mlair.plotting.abstract_plot_class import AbstractPlotClass
+
+
+@TimeTrackingWrapper
+class PlotStationMap(AbstractPlotClass):
+    """
+    Plot geographical overview of all used stations as squares.
+
+    Different data sets can be colorised by its key in the input dictionary generators. The key represents the color to
+    plot on the map. Currently, there is only a white background, but this can be adjusted by loading locally stored
+    topography data (not implemented yet). The plot is saved under plot_path with the name station_map.pdf
+
+    .. image:: ../../../../../_source/_plots/station_map.png
+        :width: 400
+    """
+
+    def __init__(self, generators: List, plot_folder: str = ".", plot_name="station_map"):
+        """
+        Set attributes and create plot.
+
+        :param generators: dictionary with the plot color of each data set as key and the generator containing all stations
+        as value.
+        :param plot_folder: path to save the plot (default: current directory)
+        """
+        super().__init__(plot_folder, plot_name)
+        self._ax = None
+        self._gl = None
+        self._plot(generators)
+        self._save(bbox_inches="tight")
+
+    def _draw_background(self):
+        """Draw coastline, lakes, ocean, rivers and country borders as background on the map."""
+
+        import cartopy.feature as cfeature
+
+        self._ax.add_feature(cfeature.LAND.with_scale("50m"))
+        self._ax.natural_earth_shp(resolution='50m')
+        self._ax.add_feature(cfeature.COASTLINE.with_scale("50m"), edgecolor='black')
+        self._ax.add_feature(cfeature.LAKES.with_scale("50m"))
+        self._ax.add_feature(cfeature.OCEAN.with_scale("50m"))
+        self._ax.add_feature(cfeature.RIVERS.with_scale("50m"))
+        self._ax.add_feature(cfeature.BORDERS.with_scale("50m"), facecolor='none', edgecolor='black')
+
+    def _plot_stations(self, generators):
+        """
+        Loop over all keys in generators dict and its containing stations and plot the stations's position.
+
+        Position is highlighted by a square on the map regarding the given color.
+
+        :param generators: dictionary with the plot color of each data set as key and the generator containing all
+            stations as value.
+        """
+
+        import cartopy.crs as ccrs
+        if generators is not None:
+            legend_elements = []
+            default_colors = self.get_dataset_colors()
+            for element in generators:
+                data_collection, plot_opts = self._get_collection_and_opts(element)
+                name = data_collection.name or "unknown"
+                marker = plot_opts.get("marker", "s")
+                ms = plot_opts.get("ms", 6)
+                mec = plot_opts.get("mec", "k")
+                mfc = plot_opts.get("mfc", default_colors.get(name, "b"))
+                legend_elements.append(
+                    mlines.Line2D([], [], mfc=mfc, mec=mec, marker=self._adjust_marker(marker), ms=ms, linestyle='None',
+                                  label=f"{name} ({len(data_collection)})"))
+                for station in data_collection:
+                    coords = station.get_coordinates()
+                    IDx, IDy = coords["lon"], coords["lat"]
+                    self._ax.plot(IDx, IDy, mfc=mfc, mec=mec, marker=marker, ms=ms, transform=ccrs.PlateCarree())
+            if len(legend_elements) > 0:
+                self._ax.legend(handles=legend_elements, loc='best')
+
+    @staticmethod
+    def _adjust_marker(marker):
+        _adjust = {4: "<", 5: ">", 6: "^", 7: "v", 8: "<", 9: ">", 10: "^", 11: "v"}
+        if isinstance(marker, int) and marker in _adjust.keys():
+            return _adjust[marker]
+        else:
+            return marker
+
+    @staticmethod
+    def _get_collection_and_opts(element):
+        if isinstance(element, tuple):
+            if len(element) == 1:
+                return element[0], {}
+            else:
+                return element
+        else:
+            return element, {}
+
+    def _plot(self, generators: List):
+        """
+        Create the station map plot.
+
+        Set figure and call all required sub-methods.
+
+        :param generators: dictionary with the plot color of each data set as key and the generator containing all
+            stations as value.
+        """
+
+        import cartopy.crs as ccrs
+        from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
+        fig = plt.figure(figsize=(10, 5))
+        self._ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
+        self._gl = self._ax.gridlines(xlocs=range(0, 21, 5), ylocs=range(44, 59, 2), draw_labels=True)
+        self._gl.xformatter = LONGITUDE_FORMATTER
+        self._gl.yformatter = LATITUDE_FORMATTER
+        self._draw_background()
+        self._plot_stations(generators)
+        self._adjust_extent()
+        plt.tight_layout()
+
+    def _adjust_extent(self):
+        import cartopy.crs as ccrs
+
+        def diff(arr):
+            return arr[1] - arr[0], arr[3] - arr[2]
+
+        def find_ratio(delta, reference=5):
+            return min(max(abs(reference / delta[0]), abs(reference / delta[1])), 5)
+
+        extent = self._ax.get_extent(crs=ccrs.PlateCarree())
+        ratio = find_ratio(diff(extent))
+        new_extent = extent + np.array([-1, 1, -1, 1]) * ratio
+        self._ax.set_extent(new_extent, crs=ccrs.PlateCarree())
+
+
+@TimeTrackingWrapper
+class PlotAvailability(AbstractPlotClass):
+    """
+    Create data availablility plot similar to Gantt plot.
+
+    Each entry of given generator, will result in a new line in the plot. Data is summarised for given temporal
+    resolution and checked whether data is available or not for each time step. This is afterwards highlighted as a
+    colored bar or a blank space.
+
+    You can set different colors to highlight subsets for example by providing different generators for the same index
+    using different keys in the input dictionary.
+
+    Note: each bar is surrounded by a small white box to highlight gabs in between. This can result in too long gabs
+    in display, if a gab is only very short. Also this appears on a (fluent) transition from one to another subset.
+
+    Calling this class will create three versions fo the availability plot.
+
+    1) Data availability for each element
+    1) Data availability as summary over all elements (is there at least a single elemnt for each time step)
+    1) Combination of single and overall availability
+
+    .. image:: ../../../../../_source/_plots/data_availability.png
+        :width: 400
+
+    .. image:: ../../../../../_source/_plots/data_availability_summary.png
+        :width: 400
+
+    .. image:: ../../../../../_source/_plots/data_availability_combined.png
+        :width: 400
+
+    """
+
+    def __init__(self, generators: Dict[str, DataCollection], plot_folder: str = ".", sampling="daily",
+                 summary_name="data availability", time_dimension="datetime", window_dimension="window"):
+        """Initialise."""
+        # create standard Gantt plot for all stations (currently in single pdf file with single page)
+        super().__init__(plot_folder, "data_availability")
+        self.time_dim = time_dimension
+        self.window_dim = window_dimension
+        self.sampling = self._get_sampling(sampling)
+        self.linewidth = None
+        if self.sampling == 'h':
+            self.linewidth = 0.001
+        plot_dict = self._prepare_data(generators)
+        lgd = self._plot(plot_dict)
+        self._save(bbox_extra_artists=(lgd,), bbox_inches="tight")
+        # create summary Gantt plot (is data in at least one station available)
+        self.plot_name += "_summary"
+        plot_dict_summary = self._summarise_data(generators, summary_name)
+        lgd = self._plot(plot_dict_summary)
+        self._save(bbox_extra_artists=(lgd,), bbox_inches="tight")
+        # combination of station and summary plot, last element is summary broken bar
+        self.plot_name = "data_availability_combined"
+        plot_dict_summary.update(plot_dict)
+        lgd = self._plot(plot_dict_summary)
+        self._save(bbox_extra_artists=(lgd,), bbox_inches="tight")
+
+    def _prepare_data(self, generators: Dict[str, DataCollection]):
+        plt_dict = {}
+        for subset, data_collection in generators.items():
+            for station in data_collection:
+                labels = station.get_Y(as_numpy=False).resample({self.time_dim: self.sampling}, skipna=True).mean()
+                labels_bool = labels.sel(**{self.window_dim: 1}).notnull()
+                group = (labels_bool != labels_bool.shift({self.time_dim: 1})).cumsum()
+                plot_data = pd.DataFrame({"avail": labels_bool.values, "group": group.values},
+                                         index=labels.coords[self.time_dim].values)
+                t = plot_data.groupby("group").apply(lambda x: (x["avail"].head(1)[0], x.index[0], x.shape[0]))
+                t2 = [i[1:] for i in t if i[0]]
+
+                if plt_dict.get(str(station)) is None:
+                    plt_dict[str(station)] = {subset: t2}
+                else:
+                    plt_dict[str(station)].update({subset: t2})
+        return plt_dict
+
+    def _summarise_data(self, generators: Dict[str, DataCollection], summary_name: str):
+        plt_dict = {}
+        for subset, data_collection in generators.items():
+            all_data = None
+            for station in data_collection:
+                labels = station.get_Y(as_numpy=False).resample({self.time_dim: self.sampling}, skipna=True).mean()
+                labels_bool = labels.sel(**{self.window_dim: 1}).notnull()
+                if all_data is None:
+                    all_data = labels_bool
+                else:
+                    tmp = all_data.combine_first(labels_bool)  # expand dims to merged datetime coords
+                    all_data = np.logical_or(tmp, labels_bool).combine_first(
+                        all_data)  # apply logical on merge and fill missing with all_data
+
+            group = (all_data != all_data.shift({self.time_dim: 1})).cumsum()
+            plot_data = pd.DataFrame({"avail": all_data.values, "group": group.values},
+                                     index=all_data.coords[self.time_dim].values)
+            t = plot_data.groupby("group").apply(lambda x: (x["avail"].head(1)[0], x.index[0], x.shape[0]))
+            t2 = [i[1:] for i in t if i[0]]
+            if plt_dict.get(summary_name) is None:
+                plt_dict[summary_name] = {subset: t2}
+            else:
+                plt_dict[summary_name].update({subset: t2})
+        return plt_dict
+
+    def _plot(self, plt_dict):
+        colors = self.get_dataset_colors()
+        _used_colors = []
+        pos = 0
+        height = 0.8  # should be <= 1
+        yticklabels = []
+        number_of_stations = len(plt_dict.keys())
+        fig, ax = plt.subplots(figsize=(10, number_of_stations / 3))
+        for station, d in sorted(plt_dict.items(), reverse=True):
+            pos += 1
+            for subset, color in colors.items():
+                plt_data = d.get(subset)
+                if plt_data is None:
+                    continue
+                elif color not in _used_colors:  # this is required for a proper legend creation
+                    _used_colors.append(color)
+                ax.broken_barh(plt_data, (pos, height), color=color, edgecolor="white", linewidth=self.linewidth)
+            yticklabels.append(station)
+
+        ax.set_ylim([height, number_of_stations + 1])
+        ax.set_yticks(np.arange(len(plt_dict.keys())) + 1 + height / 2)
+        ax.set_yticklabels(yticklabels)
+        handles = [mpatches.Patch(color=c, label=k) for k, c in colors.items() if c in _used_colors]
+        lgd = plt.legend(handles=handles, bbox_to_anchor=(0, 1, 1, 0.2), loc="lower center", ncol=len(handles))
+        return lgd
+
+
+@TimeTrackingWrapper
+class PlotAvailabilityHistogram(AbstractPlotClass):
+    """
+    Create data availability plots as histogram.
+
+    Each entry of each generator is checked for `notnull()` values along all the datetime axis (boolean).
+    Calling this class creates two different types of histograms where each generator
+
+    1) data_availability_histogram: datetime (xaxis) vs. number of stations with availabile data (yaxis)
+    2) data_availability_histogram_cumulative: number of samples (xaxis) vs. number of stations having at least number
+       of samples (yaxis)
+
+    .. image:: ../../../../../_source/_plots/data_availability_histogram_hist.png
+        :width: 400
+
+    .. image:: ../../../../../_source/_plots/data_availability_histogram_hist_cum.png
+        :width: 400
+
+    """
+
+    def __init__(self, generators: Dict[str, DataCollection], plot_folder: str = ".",
+                 subset_dim: str = 'DataSet', history_dim: str = 'window',
+                 station_dim: str = 'Stations', ):
+
+        super().__init__(plot_folder, "data_availability_histogram")
+
+        self.subset_dim = subset_dim
+        self.history_dim = history_dim
+        self.station_dim = station_dim
+
+        self.freq = None
+        self.temporal_dim = None
+        self.target_dim = None
+        self._prepare_data(generators)
+
+        for plt_type in self.allowed_plot_types:
+            plot_name_tmp = self.plot_name
+            self.plot_name += '_' + plt_type
+            self._plot(plt_type=plt_type)
+            self._save()
+            self.plot_name = plot_name_tmp
+
+    def _set_dims_from_datahandler(self, data_handler):
+        self.temporal_dim = data_handler.id_class.time_dim
+        self.target_dim = data_handler.id_class.target_dim
+        self.freq = self._get_sampling(data_handler.id_class.sampling)
+
+    @property
+    def allowed_plot_types(self):
+        plot_types = ['hist', 'hist_cum']
+        return plot_types
+
+    def _prepare_data(self, generators: Dict[str, DataCollection]):
+        """
+        Prepares data to be used by plot methods.
+
+        Creates xarrays which are sums of valid data (boolean sums) across i) station_dim and ii) temporal_dim
+        """
+        avail_data_time_sum = {}
+        avail_data_station_sum = {}
+        dataset_time_interval = {}
+        for subset, generator in generators.items():
+            avail_list = []
+            for station in generator:
+                self._set_dims_from_datahandler(data_handler=station)
+                station_data_x = station.get_X(as_numpy=False)[0]
+                station_data_x = station_data_x.loc[{self.history_dim: 0,  # select recent window frame
+                                                     self.target_dim: station_data_x[self.target_dim].values[0]}]
+                station_data_x = self._reduce_dims(station_data_x)
+                avail_list.append(station_data_x.notnull())
+            avail_data = xr.concat(avail_list, dim=self.station_dim).notnull()
+            avail_data_time_sum[subset] = avail_data.sum(dim=self.station_dim)
+            avail_data_station_sum[subset] = avail_data.sum(dim=self.temporal_dim)
+            dataset_time_interval[subset] = self._get_first_and_last_indexelement_from_xarray(
+                avail_data_time_sum[subset], dim_name=self.temporal_dim, return_type='as_dict'
+            )
+        avail_data_amount = xr.concat(avail_data_time_sum.values(), pd.Index(avail_data_time_sum.keys(),
+                                                                             name=self.subset_dim)
+                                      )
+        full_time_index = self._make_full_time_index(avail_data_amount.coords[self.temporal_dim].values, freq=self.freq)
+        self.avail_data_cum_sum = xr.concat(avail_data_station_sum.values(), pd.Index(avail_data_station_sum.keys(),
+                                                                                      name=self.subset_dim))
+        self.avail_data_amount = avail_data_amount.reindex({self.temporal_dim: full_time_index})
+        self.dataset_time_interval = dataset_time_interval
+
+    def _reduce_dims(self, dataset):
+        if len(dataset.dims) > 2:
+            required = {self.temporal_dim, self.station_dim}
+            unimportant = set(dataset.dims).difference(required)
+            sel_dict = {un: dataset[un].values[0] for un in unimportant}
+            dataset = dataset.loc[sel_dict]
+        return dataset
+
+    @staticmethod
+    def _get_first_and_last_indexelement_from_xarray(xarray, dim_name, return_type='as_tuple'):
+        if isinstance(xarray, xr.DataArray):
+            first = xarray.coords[dim_name].values[0]
+            last = xarray.coords[dim_name].values[-1]
+            if return_type == 'as_tuple':
+                return first, last
+            elif return_type == 'as_dict':
+                return {'first': first, 'last': last}
+            else:
+                raise TypeError(f"return_type must be 'as_tuple' or 'as_dict', but is '{return_type}'")
+        else:
+            raise TypeError(f"xarray must be of type xr.DataArray, but is of type {type(xarray)}")
+
+    @staticmethod
+    def _make_full_time_index(irregular_time_index, freq):
+        full_time_index = pd.date_range(start=irregular_time_index[0], end=irregular_time_index[-1], freq=freq)
+        return full_time_index
+
+    def _plot(self, plt_type='hist', *args):
+        if plt_type == 'hist':
+            self._plot_hist()
+        elif plt_type == 'hist_cum':
+            self._plot_hist_cum()
+        else:
+            raise ValueError(f"plt_type mus be 'hist' or 'hist_cum', but is {type}")
+
+    def _plot_hist(self, *args):
+        colors = self.get_dataset_colors()
+        fig, axes = plt.subplots(figsize=(10, 3))
+        for i, subset in enumerate(self.dataset_time_interval.keys()):
+            plot_dataset = self.avail_data_amount.sel({self.subset_dim: subset,
+                                                       self.temporal_dim: slice(
+                                                           self.dataset_time_interval[subset]['first'],
+                                                           self.dataset_time_interval[subset]['last']
+                                                       )
+                                                       }
+                                                      )
+
+            plot_dataset.plot.step(color=colors[subset], ax=axes, label=subset)
+            plt.fill_between(plot_dataset.coords[self.temporal_dim].values, plot_dataset.values, color=colors[subset])
+
+        lgd = fig.legend(loc="upper right", ncol=len(self.dataset_time_interval),
+                         facecolor='white', framealpha=1, edgecolor='black')
+        for lgd_line in lgd.get_lines():
+            lgd_line.set_linewidth(4.0)
+        plt.gca().xaxis.set_major_locator(mdates.YearLocator())
+        plt.title('')
+        plt.ylabel('Number of samples')
+        plt.tight_layout()
+
+    def _plot_hist_cum(self, *args):
+        colors = self.get_dataset_colors()
+        fig, axes = plt.subplots(figsize=(10, 3))
+        n_bins = int(self.avail_data_cum_sum.max().values)
+        bins = np.arange(0, n_bins + 1)
+        descending_subsets = self.avail_data_cum_sum.max(dim=self.station_dim).sortby(
+            self.avail_data_cum_sum.max(dim=self.station_dim), ascending=False
+        ).coords[self.subset_dim].values
+
+        for subset in descending_subsets:
+            self.avail_data_cum_sum.sel({self.subset_dim: subset}).plot.hist(ax=axes,
+                                                                             bins=bins,
+                                                                             label=subset,
+                                                                             cumulative=-1,
+                                                                             color=colors[subset],
+                                                                             # alpha=.5
+                                                                             )
+
+        lgd = fig.legend(loc="upper right", ncol=len(self.dataset_time_interval),
+                         facecolor='white', framealpha=1, edgecolor='black')
+        plt.title('')
+        plt.ylabel('Number of stations')
+        plt.xlabel('Number of samples')
+        plt.xlim((bins[0], bins[-1]))
+        plt.tight_layout()
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 73aebb00..ff74da37 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -19,9 +19,9 @@ from mlair.helpers.datastore import NameNotFoundInDataStore
 from mlair.helpers import TimeTracking, statistics, extract_value, remove_items, to_list, tables
 from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel
 from mlair.model_modules import AbstractModelClass
-from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStationMap, PlotClimatologicalSkillScore, \
-    PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotAvailability, PlotAvailabilityHistogram, \
-    PlotConditionalQuantiles, PlotSeparationOfScales
+from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \
+    PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, PlotSeparationOfScales
+from mlair.plotting.preprocessing_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram
 from mlair.run_modules.run_environment import RunEnvironment
 
 
-- 
GitLab


From 175841d5855cef26b63fdbb7e12075e0a4e17b19 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 13 Apr 2021 11:52:12 +0200
Subject: [PATCH 072/175] moved availability and station map to preprocessing

---
 mlair/run_modules/post_processing.py | 34 -------------------
 mlair/run_modules/pre_processing.py  | 50 ++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 34 deletions(-)

diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index ff74da37..a633dec1 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -21,7 +21,6 @@ from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel
 from mlair.model_modules import AbstractModelClass
 from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \
     PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, PlotSeparationOfScales
-from mlair.plotting.preprocessing_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram
 from mlair.run_modules.run_environment import RunEnvironment
 
 
@@ -325,23 +324,6 @@ class PostProcessing(RunEnvironment):
         except Exception as e:
             logging.error(f"Could not create plot PlotConditionalQuantiles due to the following error: {e}")
 
-        try:
-            if "PlotStationMap" in plot_list:
-                if self.data_store.get("hostname")[:2] in self.data_store.get("hpc_hosts") or self.data_store.get(
-                        "hostname")[:6] in self.data_store.get("hpc_hosts"):
-                    logging.warning(
-                        f"Skip 'PlotStationMap` because running on a hpc node: {self.data_store.get('hostname')}")
-                else:
-                    gens = [(self.train_data, {"marker": 5, "ms": 9}),
-                            (self.val_data, {"marker": 6, "ms": 9}),
-                            (self.test_data, {"marker": 4, "ms": 9})]
-                    PlotStationMap(generators=gens, plot_folder=self.plot_path)
-                    gens = [(self.train_val_data, {"marker": 8, "ms": 9}),
-                            (self.test_data, {"marker": 9, "ms": 9})]
-                    PlotStationMap(generators=gens, plot_folder=self.plot_path, plot_name="station_map_var")
-        except Exception as e:
-            logging.error(f"Could not create plot PlotStationMap due to the following error: {e}")
-
         try:
             if "PlotMonthlySummary" in plot_list:
                 PlotMonthlySummary(self.test_data.keys(), path, r"forecasts_%s_test.nc", self.target_var,
@@ -372,22 +354,6 @@ class PostProcessing(RunEnvironment):
         except Exception as e:
             logging.error(f"Could not create plot PlotTimeSeries due to the following error: {e}")
 
-        try:
-            if "PlotAvailability" in plot_list:
-                avail_data = {"train": self.train_data, "val": self.val_data, "test": self.test_data}
-                PlotAvailability(avail_data, plot_folder=self.plot_path, time_dimension=time_dim,
-                                 window_dimension=window_dim)
-        except Exception as e:
-            logging.error(f"Could not create plot PlotAvailability due to the following error: {e}")
-
-        try:
-            if "PlotAvailabilityHistogram" in plot_list:
-                avail_data = {"train": self.train_data, "val": self.val_data, "test": self.test_data}
-                PlotAvailabilityHistogram(avail_data, plot_folder=self.plot_path, station_dim=iter_dim,
-                                          history_dim=window_dim)
-        except Exception as e:
-            logging.error(f"Could not create plot PlotAvailabilityHistogram due to the following error: {e}")
-
     def calculate_test_score(self):
         """Evaluate test score of model and save locally."""
 
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index f59a4e89..3c2670aa 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -18,6 +18,7 @@ from mlair.helpers import TimeTracking, to_list, tables
 from mlair.configuration import path_config
 from mlair.helpers.join import EmptyQueryResult
 from mlair.run_modules.run_environment import RunEnvironment
+from mlair.plotting.preprocessing_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram
 
 
 class PreProcessing(RunEnvironment):
@@ -67,6 +68,7 @@ class PreProcessing(RunEnvironment):
         self.split_train_val_test()
         self.report_pre_processing()
         self.prepare_competitors()
+        self.plot()
 
     def report_pre_processing(self):
         """Log some metrics on data and create latex report."""
@@ -327,6 +329,54 @@ class PreProcessing(RunEnvironment):
         else:
             logging.info("No preparation required because no competitor was provided to the workflow.")
 
+    def plot(self):
+        logging.info("Run plotting routines...")
+
+        plot_list = self.data_store.get("plot_list", "postprocessing")
+        time_dim = self.data_store.get("time_dim")
+        window_dim = self.data_store.get("window_dim")
+        target_dim = self.data_store.get("target_dim")
+        iter_dim = self.data_store.get("iter_dim")
+
+        train_data = self.data_store.get("data_collection", "train")
+        val_data = self.data_store.get("data_collection", "val")
+        test_data = self.data_store.get("data_collection", "test")
+        train_val_data = self.data_store.get("data_collection", "train_val")
+        plot_path: str = self.data_store.get("plot_path")
+
+        try:
+            if "PlotStationMap" in plot_list:
+                if self.data_store.get("hostname")[:2] in self.data_store.get("hpc_hosts") or self.data_store.get(
+                        "hostname")[:6] in self.data_store.get("hpc_hosts"):
+                    logging.warning(
+                        f"Skip 'PlotStationMap` because running on a hpc node: {self.data_store.get('hostname')}")
+                else:
+                    gens = [(train_data, {"marker": 5, "ms": 9}),
+                            (val_data, {"marker": 6, "ms": 9}),
+                            (test_data, {"marker": 4, "ms": 9})]
+                    PlotStationMap(generators=gens, plot_folder=plot_path)
+                    gens = [(train_val_data, {"marker": 8, "ms": 9}),
+                            (test_data, {"marker": 9, "ms": 9})]
+                    PlotStationMap(generators=gens, plot_folder=plot_path, plot_name="station_map_var")
+        except Exception as e:
+            logging.error(f"Could not create plot PlotStationMap due to the following error: {e}")
+
+        try:
+            if "PlotAvailability" in plot_list:
+                avail_data = {"train": train_data, "val": val_data, "test": test_data}
+                PlotAvailability(avail_data, plot_folder=plot_path, time_dimension=time_dim,
+                                 window_dimension=window_dim)
+        except Exception as e:
+            logging.error(f"Could not create plot PlotAvailability due to the following error: {e}")
+
+        try:
+            if "PlotAvailabilityHistogram" in plot_list:
+                avail_data = {"train": train_data, "val": val_data, "test": test_data}
+                PlotAvailabilityHistogram(avail_data, plot_folder=plot_path, station_dim=iter_dim,
+                                          history_dim=window_dim)
+        except Exception as e:
+            logging.error(f"Could not create plot PlotAvailabilityHistogram due to the following error: {e}")
+
 
 def f_proc(data_handler, station, name_affix, store, **kwargs):
     """
-- 
GitLab


From ab5136ae25dedc7af8f97f33bd9720d1432605c2 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 13 Apr 2021 16:55:33 +0200
Subject: [PATCH 073/175] periodogram plot is included in preprocessing
 plotting for first time

---
 HPC_setup/requirements_HDFML_additionals.txt  |  1 +
 HPC_setup/requirements_JUWELS_additionals.txt |  1 +
 mlair/plotting/preprocessing_plotting.py      | 95 ++++++++++++++++++-
 mlair/run_modules/pre_processing.py           | 13 ++-
 requirements.txt                              |  1 +
 requirements_gpu.txt                          |  1 +
 6 files changed, 110 insertions(+), 2 deletions(-)

diff --git a/HPC_setup/requirements_HDFML_additionals.txt b/HPC_setup/requirements_HDFML_additionals.txt
index b2a29fbf..fd22a309 100644
--- a/HPC_setup/requirements_HDFML_additionals.txt
+++ b/HPC_setup/requirements_HDFML_additionals.txt
@@ -1,6 +1,7 @@
 absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
+astropy==4.1
 attrs==20.3.0
 bottleneck==1.3.2
 cached-property==1.5.2
diff --git a/HPC_setup/requirements_JUWELS_additionals.txt b/HPC_setup/requirements_JUWELS_additionals.txt
index b2a29fbf..fd22a309 100644
--- a/HPC_setup/requirements_JUWELS_additionals.txt
+++ b/HPC_setup/requirements_JUWELS_additionals.txt
@@ -1,6 +1,7 @@
 absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
+astropy==4.1
 attrs==20.3.0
 bottleneck==1.3.2
 cached-property==1.5.2
diff --git a/mlair/plotting/preprocessing_plotting.py b/mlair/plotting/preprocessing_plotting.py
index aa61b1f3..da5916fb 100644
--- a/mlair/plotting/preprocessing_plotting.py
+++ b/mlair/plotting/preprocessing_plotting.py
@@ -3,14 +3,17 @@ __author__ = "Lukas Leufen, Felix Kleinert"
 __date__ = '2021-04-13'
 
 from typing import List, Dict
+import os
 
 import numpy as np
 import pandas as pd
 import xarray as xr
+import matplotlib
 from matplotlib import lines as mlines, pyplot as plt, patches as mpatches, dates as mdates
+from astropy.timeseries import LombScargle
 
 from mlair.data_handler import DataCollection
-from mlair.helpers import TimeTrackingWrapper
+from mlair.helpers import TimeTrackingWrapper, to_list
 from mlair.plotting.abstract_plot_class import AbstractPlotClass
 
 
@@ -436,3 +439,93 @@ class PlotAvailabilityHistogram(AbstractPlotClass):
         plt.xlabel('Number of samples')
         plt.xlim((bins[0], bins[-1]))
         plt.tight_layout()
+
+
+class PlotPeriodogram(AbstractPlotClass):
+
+    def __init__(self, generator: Dict[str, DataCollection], plot_folder: str = ".", plot_name="periodogram",
+                 variables_dim="variables", time_dim="datetime", sampling="daily"):
+        super().__init__(plot_folder, plot_name)
+        self.variables_dim = variables_dim
+        self.time_dim = time_dim
+
+        for pos, s in enumerate(sampling if isinstance(sampling, tuple) else (sampling,)):
+            self._sampling = s
+            self._prepare_pgram(generator, pos)
+            self._plot(raw=True)
+            self._plot(raw=False)
+
+    def _prepare_pgram(self, generator, pos):
+        raw_data = dict()
+        plot_data = dict()
+        plot_data_raw = dict()
+        plot_data_mean = dict()
+        for g in generator:
+            print(g)
+            d = g.id_class._data
+            d = d[pos] if isinstance(d, tuple) else d
+            for var in d[self.variables_dim].values:
+                var_str = str(var)
+                d_var = d.loc[{self.variables_dim: var}].squeeze().dropna(self.time_dim)
+                t = (d_var.datetime - d_var.datetime[0]).astype("timedelta64[h]").values / np.timedelta64(1, "D")
+                f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1).autopower()
+                raw_data[var_str] = [(f, pgram)] if var_str not in raw_data.keys() else raw_data[var_str] + [(f, pgram)]
+        self.f_index = np.logspace(-3, 0 if self._sampling == "daily" else np.log10(24), 1000)
+        for var in raw_data.keys():
+            pgram_com = []
+            pgram_mean = 0
+            all_data = raw_data[var]
+            pgram_mean_raw = np.zeros((len(self.f_index), len(all_data)))
+            for i, (f, pgram) in enumerate(all_data):
+                d = np.interp(self.f_index, f, pgram)
+                pgram_com.append(d)
+                pgram_mean += d
+                pgram_mean_raw[:, i] = d
+            pgram_mean /= len(all_data)
+            plot_data[var] = pgram_com
+            plot_data_mean[var] = (self.f_index, pgram_mean)
+            plot_data_raw[var] = (self.f_index, pgram_mean_raw)
+        self.plot_data = plot_data
+        self.plot_data_mean = plot_data_mean
+        self.plot_data_raw = plot_data_raw
+
+    @staticmethod
+    def _add_annotation_line(pos, div, lims, unit):
+        for p in to_list(pos):  # per year
+            plt.vlines(p / div, *lims, "black")
+            plt.text(p / div, lims[0], r"%s$%s^{-1}$" % (p, unit), rotation="vertical", rotation_mode="anchor")
+
+    def _plot(self, raw=True):
+        plot_path = os.path.join(os.path.abspath(self.plot_folder),
+                                 f"{self.plot_name}{'_raw' if raw else ''}_{self._sampling}.pdf")
+        pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
+        for var in self.plot_data.keys():
+            fig, ax = plt.subplots()
+            if raw is True:
+                for pgram in self.plot_data[var]:
+                    ax.plot(self.f_index, pgram, "lightblue")
+                ax.plot(*self.plot_data_mean[var], "blue")
+            else:
+                ma = pd.DataFrame(np.vstack(self.plot_data[var]).T).rolling(5, center=True, axis=0)
+                mean = ma.mean().mean(axis=1).values.flatten()
+                upper, lower = ma.max().mean(axis=1).values.flatten(), ma.min().mean(axis=1).values.flatten()
+                ax.plot(self.f_index, mean, "blue")
+                ax.fill_between(self.f_index, lower, upper, color="lightblue")
+            plt.yscale("log")
+            plt.xscale("log")
+            ax.set_ylabel("power", fontsize='x-large')
+            ax.set_xlabel("frequency $[day^{-1}$]", fontsize='x-large')
+            lims = ax.get_ylim()
+            self._add_annotation_line([1, 2, 3], 365.25, lims, "yr")  # per year
+            self._add_annotation_line(1, 365.25 / 12, lims, "m")  # per month
+            self._add_annotation_line(1, 7, lims, "w")  # per week
+            self._add_annotation_line([1, 0.5], 1, lims, "d")  # per day
+            if self._sampling == "hourly":
+                self._add_annotation_line(2, 1, lims, "d")  # per day
+                self._add_annotation_line([1, 0.5], 1 / 24., lims, "h")  # per hour
+            title = f"Periodogram ({var})"
+            plt.title(title)
+            pdf_pages.savefig()
+        # close all open figures / plots
+        pdf_pages.close()
+        plt.close('all')
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 3c2670aa..d1ec0c60 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -18,7 +18,8 @@ from mlair.helpers import TimeTracking, to_list, tables
 from mlair.configuration import path_config
 from mlair.helpers.join import EmptyQueryResult
 from mlair.run_modules.run_environment import RunEnvironment
-from mlair.plotting.preprocessing_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram
+from mlair.plotting.preprocessing_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \
+    PlotPeriodogram
 
 
 class PreProcessing(RunEnvironment):
@@ -344,6 +345,8 @@ class PreProcessing(RunEnvironment):
         train_val_data = self.data_store.get("data_collection", "train_val")
         plot_path: str = self.data_store.get("plot_path")
 
+        sampling = self.data_store.get("sampling")
+
         try:
             if "PlotStationMap" in plot_list:
                 if self.data_store.get("hostname")[:2] in self.data_store.get("hpc_hosts") or self.data_store.get(
@@ -377,6 +380,14 @@ class PreProcessing(RunEnvironment):
         except Exception as e:
             logging.error(f"Could not create plot PlotAvailabilityHistogram due to the following error: {e}")
 
+        try:
+            if "PlotPeriodogram" in plot_list:
+                PlotPeriodogram(train_data, plot_folder=plot_path, time_dim=time_dim, variables_dim=target_dim,
+                                sampling=sampling)
+
+        except Exception as e:
+            logging.error(f"Could not create plot PlotPeriodogram due to the following error: {e}")
+
 
 def f_proc(data_handler, station, name_affix, store, **kwargs):
     """
diff --git a/requirements.txt b/requirements.txt
index 85655e23..dba565fb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
+astropy==4.1
 attrs==20.3.0
 bottleneck==1.3.2
 cached-property==1.5.2
diff --git a/requirements_gpu.txt b/requirements_gpu.txt
index cc189496..f170e1b7 100644
--- a/requirements_gpu.txt
+++ b/requirements_gpu.txt
@@ -1,6 +1,7 @@
 absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
+astropy==4.1
 attrs==20.3.0
 bottleneck==1.3.2
 cached-property==1.5.2
-- 
GitLab


From 8f2b1be959dd94ad94117ca07d5d940e560acc80 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 13 Apr 2021 17:23:39 +0200
Subject: [PATCH 074/175] added overall plot including all variables

---
 mlair/plotting/preprocessing_plotting.py | 40 ++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/mlair/plotting/preprocessing_plotting.py b/mlair/plotting/preprocessing_plotting.py
index da5916fb..70c007c6 100644
--- a/mlair/plotting/preprocessing_plotting.py
+++ b/mlair/plotting/preprocessing_plotting.py
@@ -454,6 +454,8 @@ class PlotPeriodogram(AbstractPlotClass):
             self._prepare_pgram(generator, pos)
             self._plot(raw=True)
             self._plot(raw=False)
+            self._plot_total(raw=True)
+            self._plot_total(raw=False)
 
     def _prepare_pgram(self, generator, pos):
         raw_data = dict()
@@ -529,3 +531,41 @@ class PlotPeriodogram(AbstractPlotClass):
         # close all open figures / plots
         pdf_pages.close()
         plt.close('all')
+
+    def _plot_total(self, raw=True):
+        plot_path = os.path.join(os.path.abspath(self.plot_folder),
+                                 f"{self.plot_name}{'_raw' if raw else ''}_{self._sampling}_total.pdf")
+        pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
+        fig, ax = plt.subplots()
+        res = None
+        for var in self.plot_data_raw.keys():
+            d_var = self.plot_data_raw[var][1]
+            res = d_var if res is None else np.concatenate((res, d_var), axis=-1)
+        if raw is True:
+            for i in range(res.shape[1]):
+                ax.plot(self.f_index, res[:, i], "lightblue")
+            ax.plot(self.f_index, res.mean(axis=1), "blue")
+        else:
+            ma = pd.DataFrame(np.vstack(res)).rolling(5, center=True, axis=0)
+            mean = ma.mean().mean(axis=1).values.flatten()
+            upper, lower = ma.max().mean(axis=1).values.flatten(), ma.min().mean(axis=1).values.flatten()
+            ax.plot(self.f_index, mean, "blue")
+            ax.fill_between(self.f_index, lower, upper, color="lightblue")
+        plt.yscale("log")
+        plt.xscale("log")
+        ax.set_ylabel("power", fontsize='x-large')
+        ax.set_xlabel("frequency $[day^{-1}$]", fontsize='x-large')
+        lims = ax.get_ylim()
+        self._add_annotation_line([1, 2, 3], 365.25, lims, "yr")  # per year
+        self._add_annotation_line(1, 365.25 / 12, lims, "m")  # per month
+        self._add_annotation_line(1, 7, lims, "w")  # per week
+        self._add_annotation_line([1, 0.5], 1, lims, "d")  # per day
+        if self._sampling == "hourly":
+            self._add_annotation_line(2, 1, lims, "d")  # per day
+            self._add_annotation_line([1, 0.5], 1 / 24., lims, "h")  # per hour
+        title = f"Periodogram (total)"
+        plt.title(title)
+        pdf_pages.savefig()
+        # close all open figures / plots
+        pdf_pages.close()
+        plt.close('all')
-- 
GitLab


From 95688d303cfde018f2abf6ca172eb33be7a34948 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 13 Apr 2021 17:28:05 +0200
Subject: [PATCH 075/175] added short description

---
 mlair/plotting/preprocessing_plotting.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/mlair/plotting/preprocessing_plotting.py b/mlair/plotting/preprocessing_plotting.py
index 70c007c6..53b29568 100644
--- a/mlair/plotting/preprocessing_plotting.py
+++ b/mlair/plotting/preprocessing_plotting.py
@@ -442,6 +442,19 @@ class PlotAvailabilityHistogram(AbstractPlotClass):
 
 
 class PlotPeriodogram(AbstractPlotClass):
+    """
+    Create Lomb-Scargle periodogram in raw input and target data. The Lomb-Scargle version can deal with missing values.
+
+    This plot routine is creating the following plots:
+
+    * "raw": data is not aggregated, 1 graph per variable
+    * "": single data lines are aggregated, 1 graph per variable
+    * "total": data is aggregated on all variables, single graph
+
+    If data consists on different sampling rates, a separate plot is create for each sampling.
+
+    To work properly, the data handler must have the attribute `.id_class._data`.
+    """
 
     def __init__(self, generator: Dict[str, DataCollection], plot_folder: str = ".", plot_name="periodogram",
                  variables_dim="variables", time_dim="datetime", sampling="daily"):
-- 
GitLab


From 37c6a279b5386038475d581a95eb62b17283ae39 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 14 Apr 2021 15:35:09 +0200
Subject: [PATCH 076/175] PlotPeriodogram now can plot periodogram before and
 after filtering

---
 .../data_handler_mixed_sampling.py            |   6 +-
 mlair/plotting/preprocessing_plotting.py      | 275 +++++++++++++-----
 mlair/run_modules/pre_processing.py           |   3 +-
 3 files changed, 203 insertions(+), 81 deletions(-)

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 75e9e645..e2516257 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -56,7 +56,7 @@ class DataHandlerMixedSamplingSingleStation(DataHandlerSingleStation):
         kwargs.update({parameter_name: parameter})
 
     def make_input_target(self):
-        self._data = list(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
+        self._data = tuple(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
         self.set_inputs_and_targets()
 
     def load_and_interpolate(self, ind) -> [xr.DataArray, pd.DataFrame]:
@@ -110,7 +110,7 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
         A KZ filter is applied on the input data that has hourly resolution. Lables Y are provided as aggregated values
         with daily resolution.
         """
-        self._data = list(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
+        self._data = tuple(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
         self.set_inputs_and_targets()
         self.apply_kz_filter()
 
@@ -158,7 +158,7 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
     def _extract_lazy(self, lazy_data):
         _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
         start_inp, end_inp = self.update_start_end(0)
-        self._data = list(map(lambda x: self._slice_prep(_data[x], *self.update_start_end(x)), [0, 1]))
+        self._data = tuple(map(lambda x: self._slice_prep(_data[x], *self.update_start_end(x)), [0, 1]))
         self.input_data = self._slice_prep(_input_data, start_inp, end_inp)
         self.target_data = self._slice_prep(_target_data, self.start, self.end)
 
diff --git a/mlair/plotting/preprocessing_plotting.py b/mlair/plotting/preprocessing_plotting.py
index 53b29568..84df9b4b 100644
--- a/mlair/plotting/preprocessing_plotting.py
+++ b/mlair/plotting/preprocessing_plotting.py
@@ -4,6 +4,9 @@ __date__ = '2021-04-13'
 
 from typing import List, Dict
 import os
+import logging
+import multiprocessing
+import psutil
 
 import numpy as np
 import pandas as pd
@@ -18,7 +21,7 @@ from mlair.plotting.abstract_plot_class import AbstractPlotClass
 
 
 @TimeTrackingWrapper
-class PlotStationMap(AbstractPlotClass):
+class PlotStationMap(AbstractPlotClass):  # pragma: no cover
     """
     Plot geographical overview of all used stations as squares.
 
@@ -144,7 +147,7 @@ class PlotStationMap(AbstractPlotClass):
 
 
 @TimeTrackingWrapper
-class PlotAvailability(AbstractPlotClass):
+class PlotAvailability(AbstractPlotClass):  # pragma: no cover
     """
     Create data availablility plot similar to Gantt plot.
 
@@ -271,7 +274,7 @@ class PlotAvailability(AbstractPlotClass):
 
 
 @TimeTrackingWrapper
-class PlotAvailabilityHistogram(AbstractPlotClass):
+class PlotAvailabilityHistogram(AbstractPlotClass):  # pragma: no cover
     """
     Create data availability plots as histogram.
 
@@ -441,7 +444,7 @@ class PlotAvailabilityHistogram(AbstractPlotClass):
         plt.tight_layout()
 
 
-class PlotPeriodogram(AbstractPlotClass):
+class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
     """
     Create Lomb-Scargle periodogram in raw input and target data. The Lomb-Scargle version can deal with missing values.
 
@@ -457,89 +460,171 @@ class PlotPeriodogram(AbstractPlotClass):
     """
 
     def __init__(self, generator: Dict[str, DataCollection], plot_folder: str = ".", plot_name="periodogram",
-                 variables_dim="variables", time_dim="datetime", sampling="daily"):
+                 variables_dim="variables", time_dim="datetime", sampling="daily", use_multiprocessing=False):
         super().__init__(plot_folder, plot_name)
         self.variables_dim = variables_dim
         self.time_dim = time_dim
 
-        for pos, s in enumerate(sampling if isinstance(sampling, tuple) else (sampling,)):
+        for pos, s in enumerate(sampling if isinstance(sampling, tuple) else (sampling, sampling)):
             self._sampling = s
-            self._prepare_pgram(generator, pos)
+            self._add_text = {0: "input", 1: "target"}[pos]
+            multiple = self._has_filter_dimension(generator[0], pos)
+            self._prepare_pgram(generator, pos, multiple, use_multiprocessing=use_multiprocessing)
             self._plot(raw=True)
             self._plot(raw=False)
             self._plot_total(raw=True)
             self._plot_total(raw=False)
+            if multiple > 1:
+                self._plot_difference()
 
-    def _prepare_pgram(self, generator, pos):
-        raw_data = dict()
-        plot_data = dict()
-        plot_data_raw = dict()
-        plot_data_mean = dict()
+    @staticmethod
+    def _has_filter_dimension(g, pos):
+        # check if coords raw data differs from input / target data
+        check_data = g.id_class
+        if "filter" not in [check_data.input_data, check_data.target_data][pos].coords.dims:
+            return 1
+        else:
+            if len(set(check_data._data[0].coords.dims).symmetric_difference(check_data.input_data.coords.dims)) > 0:
+                return g.id_class.input_data.coords["filter"].shape[0]
+            else:
+                return 1
+
+    @TimeTrackingWrapper
+    def _prepare_pgram(self, generator, pos, multiple=1, use_multiprocessing=False):
+        """
+        Create periodogram data.
+        """
+        self.raw_data = []
+        self.plot_data = []
+        self.plot_data_raw = []
+        self.plot_data_mean = []
+        iter = range(multiple if multiple == 1 else multiple + 1)
+        for m in iter:
+            plot_data_single = dict()
+            plot_data_raw_single = dict()
+            plot_data_mean_single = dict()
+            raw_data_single = self._prepare_pgram_parallel_gen(generator, m, pos, use_multiprocessing)
+            # raw_data_single = self._prepare_pgram_parallel_var(generator, m, pos, use_multiprocessing)
+            self.f_index = np.logspace(-3, 0 if self._sampling == "daily" else np.log10(24), 1000)
+            for var in raw_data_single.keys():
+                pgram_com = []
+                pgram_mean = 0
+                all_data = raw_data_single[var]
+                pgram_mean_raw = np.zeros((len(self.f_index), len(all_data)))
+                for i, (f, pgram) in enumerate(all_data):
+                    d = np.interp(self.f_index, f, pgram)
+                    pgram_com.append(d)
+                    pgram_mean += d
+                    pgram_mean_raw[:, i] = d
+                pgram_mean /= len(all_data)
+                plot_data_single[var] = pgram_com
+                plot_data_mean_single[var] = (self.f_index, pgram_mean)
+                plot_data_raw_single[var] = (self.f_index, pgram_mean_raw)
+            self.plot_data.append(plot_data_single)
+            self.plot_data_mean.append(plot_data_mean_single)
+            self.plot_data_raw.append(plot_data_raw_single)
+
+    def _prepare_pgram_parallel_var(self, generator, m, pos, use_multiprocessing):
+        """Implementation of data preprocessing using parallel variables element processing."""
+        raw_data_single = dict()
         for g in generator:
-            print(g)
-            d = g.id_class._data
+            if m == 0:
+                d = g.id_class._data
+            else:
+                gd = g.id_class
+                filter_sel = {"filter": gd.input_data.coords["filter"][m - 1]}
+                d = (gd.input_data.sel(filter_sel), gd.target_data)
             d = d[pos] if isinstance(d, tuple) else d
-            for var in d[self.variables_dim].values:
-                var_str = str(var)
-                d_var = d.loc[{self.variables_dim: var}].squeeze().dropna(self.time_dim)
-                t = (d_var.datetime - d_var.datetime[0]).astype("timedelta64[h]").values / np.timedelta64(1, "D")
-                f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1).autopower()
-                raw_data[var_str] = [(f, pgram)] if var_str not in raw_data.keys() else raw_data[var_str] + [(f, pgram)]
-        self.f_index = np.logspace(-3, 0 if self._sampling == "daily" else np.log10(24), 1000)
-        for var in raw_data.keys():
-            pgram_com = []
-            pgram_mean = 0
-            all_data = raw_data[var]
-            pgram_mean_raw = np.zeros((len(self.f_index), len(all_data)))
-            for i, (f, pgram) in enumerate(all_data):
-                d = np.interp(self.f_index, f, pgram)
-                pgram_com.append(d)
-                pgram_mean += d
-                pgram_mean_raw[:, i] = d
-            pgram_mean /= len(all_data)
-            plot_data[var] = pgram_com
-            plot_data_mean[var] = (self.f_index, pgram_mean)
-            plot_data_raw[var] = (self.f_index, pgram_mean_raw)
-        self.plot_data = plot_data
-        self.plot_data_mean = plot_data_mean
-        self.plot_data_raw = plot_data_raw
+            res = []
+            if multiprocessing.cpu_count() > 1 and use_multiprocessing:  # parallel solution
+                pool = multiprocessing.Pool(
+                    min([psutil.cpu_count(logical=False), len(d[self.variables_dim].values),
+                         16]))  # use only physical cpus
+                output = [
+                    pool.apply_async(f_proc,
+                                     args=(var, d.loc[{self.variables_dim: var}].squeeze().dropna(self.time_dim)))
+                    for var in d[self.variables_dim].values]
+                for i, p in enumerate(output):
+                    res.append(p.get())
+            else:  # serial solution
+                for var in d[self.variables_dim].values:
+                    res.append(f_proc(var, d.loc[{self.variables_dim: var}].squeeze().dropna(self.time_dim)))
+            for (var_str, f, pgram) in res:
+                if var_str not in raw_data_single.keys():
+                    raw_data_single[var_str] = [(f, pgram)]
+                else:
+                    raw_data_single[var_str] = raw_data_single[var_str] + [(f, pgram)]
+        return raw_data_single
+
+    def _prepare_pgram_parallel_gen(self, generator, m, pos, use_multiprocessing):
+        """Implementation of data preprocessing using parallel generator element processing."""
+        raw_data_single = dict()
+        res = []
+        if multiprocessing.cpu_count() > 1 and use_multiprocessing:  # parallel solution
+            pool = multiprocessing.Pool(
+                min([psutil.cpu_count(logical=False), len(generator), 16]))  # use only physical cpus
+            output = [
+                pool.apply_async(f_proc_2, args=(g, m, pos, self.variables_dim, self.time_dim))
+                for g in generator]
+            for i, p in enumerate(output):
+                res.append(p.get())
+        else:
+            for g in generator:
+                res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim))
+        for res_dict in res:
+            for k, v in res_dict.items():
+                if k not in raw_data_single.keys():
+                    raw_data_single[k] = v
+                else:
+                    raw_data_single[k] = raw_data_single[k] + v
+        return raw_data_single
 
     @staticmethod
-    def _add_annotation_line(pos, div, lims, unit):
+    def _add_annotation_line(ax, pos, div, lims, unit):
         for p in to_list(pos):  # per year
-            plt.vlines(p / div, *lims, "black")
-            plt.text(p / div, lims[0], r"%s$%s^{-1}$" % (p, unit), rotation="vertical", rotation_mode="anchor")
+            ax.vlines(p / div, *lims, "black")
+            ax.text(p / div, lims[0], r"%s$%s^{-1}$" % (p, unit), rotation="vertical", rotation_mode="anchor")
+
+    def _format_figure(self, ax, var_name="total"):
+        """
+        Set log scale on both axis, add labels and annotation lines, and set title.
+        :param ax: current ax object
+        :param var_name: name of variable that will be included in the title
+        """
+        ax.set_yscale('log')
+        ax.set_xscale('log')
+        ax.set_ylabel("power", fontsize='x-large')
+        ax.set_xlabel("frequency $[day^{-1}$]", fontsize='x-large')
+        lims = ax.get_ylim()
+        self._add_annotation_line(ax, [1, 2, 3], 365.25, lims, "yr")  # per year
+        self._add_annotation_line(ax, 1, 365.25 / 12, lims, "m")  # per month
+        self._add_annotation_line(ax, 1, 7, lims, "w")  # per week
+        self._add_annotation_line(ax, [1, 0.5], 1, lims, "d")  # per day
+        if self._sampling == "hourly":
+            self._add_annotation_line(ax, 2, 1, lims, "d")  # per day
+            self._add_annotation_line(ax, [1, 0.5], 1 / 24., lims, "h")  # per hour
+        title = f"Periodogram ({var_name})"
+        ax.set_title(title)
 
     def _plot(self, raw=True):
         plot_path = os.path.join(os.path.abspath(self.plot_folder),
-                                 f"{self.plot_name}{'_raw' if raw else ''}_{self._sampling}.pdf")
+                                 f"{self.plot_name}{'_raw' if raw else ''}_{self._sampling}_{self._add_text}.pdf")
         pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
-        for var in self.plot_data.keys():
+        plot_data = self.plot_data[0]
+        plot_data_mean = self.plot_data_mean[0]
+        for var in plot_data.keys():
             fig, ax = plt.subplots()
             if raw is True:
-                for pgram in self.plot_data[var]:
+                for pgram in plot_data[var]:
                     ax.plot(self.f_index, pgram, "lightblue")
-                ax.plot(*self.plot_data_mean[var], "blue")
+                ax.plot(*plot_data_mean[var], "blue")
             else:
-                ma = pd.DataFrame(np.vstack(self.plot_data[var]).T).rolling(5, center=True, axis=0)
+                ma = pd.DataFrame(np.vstack(plot_data[var]).T).rolling(5, center=True, axis=0)
                 mean = ma.mean().mean(axis=1).values.flatten()
                 upper, lower = ma.max().mean(axis=1).values.flatten(), ma.min().mean(axis=1).values.flatten()
                 ax.plot(self.f_index, mean, "blue")
                 ax.fill_between(self.f_index, lower, upper, color="lightblue")
-            plt.yscale("log")
-            plt.xscale("log")
-            ax.set_ylabel("power", fontsize='x-large')
-            ax.set_xlabel("frequency $[day^{-1}$]", fontsize='x-large')
-            lims = ax.get_ylim()
-            self._add_annotation_line([1, 2, 3], 365.25, lims, "yr")  # per year
-            self._add_annotation_line(1, 365.25 / 12, lims, "m")  # per month
-            self._add_annotation_line(1, 7, lims, "w")  # per week
-            self._add_annotation_line([1, 0.5], 1, lims, "d")  # per day
-            if self._sampling == "hourly":
-                self._add_annotation_line(2, 1, lims, "d")  # per day
-                self._add_annotation_line([1, 0.5], 1 / 24., lims, "h")  # per hour
-            title = f"Periodogram ({var})"
-            plt.title(title)
+            self._format_figure(ax, var)
             pdf_pages.savefig()
         # close all open figures / plots
         pdf_pages.close()
@@ -547,12 +632,13 @@ class PlotPeriodogram(AbstractPlotClass):
 
     def _plot_total(self, raw=True):
         plot_path = os.path.join(os.path.abspath(self.plot_folder),
-                                 f"{self.plot_name}{'_raw' if raw else ''}_{self._sampling}_total.pdf")
+                                 f"{self.plot_name}{'_raw' if raw else ''}_{self._sampling}_{self._add_text}_total.pdf")
         pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
+        plot_data_raw = self.plot_data_raw[0]
         fig, ax = plt.subplots()
         res = None
-        for var in self.plot_data_raw.keys():
-            d_var = self.plot_data_raw[var][1]
+        for var in plot_data_raw.keys():
+            d_var = plot_data_raw[var][1]
             res = d_var if res is None else np.concatenate((res, d_var), axis=-1)
         if raw is True:
             for i in range(res.shape[1]):
@@ -564,21 +650,56 @@ class PlotPeriodogram(AbstractPlotClass):
             upper, lower = ma.max().mean(axis=1).values.flatten(), ma.min().mean(axis=1).values.flatten()
             ax.plot(self.f_index, mean, "blue")
             ax.fill_between(self.f_index, lower, upper, color="lightblue")
-        plt.yscale("log")
-        plt.xscale("log")
-        ax.set_ylabel("power", fontsize='x-large')
-        ax.set_xlabel("frequency $[day^{-1}$]", fontsize='x-large')
-        lims = ax.get_ylim()
-        self._add_annotation_line([1, 2, 3], 365.25, lims, "yr")  # per year
-        self._add_annotation_line(1, 365.25 / 12, lims, "m")  # per month
-        self._add_annotation_line(1, 7, lims, "w")  # per week
-        self._add_annotation_line([1, 0.5], 1, lims, "d")  # per day
-        if self._sampling == "hourly":
-            self._add_annotation_line(2, 1, lims, "d")  # per day
-            self._add_annotation_line([1, 0.5], 1 / 24., lims, "h")  # per hour
-        title = f"Periodogram (total)"
-        plt.title(title)
+        self._format_figure(ax, "total")
         pdf_pages.savefig()
         # close all open figures / plots
         pdf_pages.close()
         plt.close('all')
+
+    def _plot_difference(self):
+        plot_name = f"{self.plot_name}_{self._sampling}_{self._add_text}_filter.pdf"
+        plot_path = os.path.join(os.path.abspath(self.plot_folder), plot_name)
+        logging.info(f"... plotting {plot_name}")
+        pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
+        colors = ["blue", "red", "green", "orange"]
+        max_iter = len(self.plot_data)
+        var_keys = self.plot_data[0].keys()
+        for var in var_keys:
+            fig, ax = plt.subplots()
+            for i in reversed(range(max_iter)):
+                plot_data = self.plot_data[i]
+                c = colors[i]
+                ma = pd.DataFrame(np.vstack(plot_data[var]).T).rolling(5, center=True, axis=0)
+                mean = ma.mean().mean(axis=1).values.flatten()
+                ax.plot(self.f_index, mean, c)
+                if i < 1:
+                    upper, lower = ma.max().mean(axis=1).values.flatten(), ma.min().mean(axis=1).values.flatten()
+                    ax.fill_between(self.f_index, lower, upper, color="light" + c, alpha=0.5)
+            self._format_figure(ax, var)
+            pdf_pages.savefig()
+        # close all open figures / plots
+        pdf_pages.close()
+        plt.close('all')
+
+
+def f_proc(var, d_var):
+    var_str = str(var)
+    t = (d_var.datetime - d_var.datetime[0]).astype("timedelta64[h]").values / np.timedelta64(1, "D")
+    f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1).autopower()
+    return var_str, f, pgram
+
+
+def f_proc_2(g, m, pos, variables_dim, time_dim):
+    raw_data_single = dict()
+    if m == 0:
+        d = g.id_class._data
+    else:
+        gd = g.id_class
+        filter_sel = {"filter": gd.input_data.coords["filter"][m - 1]}
+        d = (gd.input_data.sel(filter_sel), gd.target_data)
+    d = d[pos] if isinstance(d, tuple) else d
+    for var in d[variables_dim].values:
+        d_var = d.loc[{variables_dim: var}].squeeze().dropna(time_dim)
+        var_str, f, pgram = f_proc(var, d_var)
+        raw_data_single[var_str] = [(f, pgram)]
+    return raw_data_single
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index d1ec0c60..148c34a0 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -346,6 +346,7 @@ class PreProcessing(RunEnvironment):
         plot_path: str = self.data_store.get("plot_path")
 
         sampling = self.data_store.get("sampling")
+        use_multiprocessing = self.data_store.get("use_multiprocessing")
 
         try:
             if "PlotStationMap" in plot_list:
@@ -383,7 +384,7 @@ class PreProcessing(RunEnvironment):
         try:
             if "PlotPeriodogram" in plot_list:
                 PlotPeriodogram(train_data, plot_folder=plot_path, time_dim=time_dim, variables_dim=target_dim,
-                                sampling=sampling)
+                                sampling=sampling, use_multiprocessing=use_multiprocessing)
 
         except Exception as e:
             logging.error(f"Could not create plot PlotPeriodogram due to the following error: {e}")
-- 
GitLab


From 8e40c948060d9476f7efe73c7beae68e830dce32 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 14 Apr 2021 15:51:44 +0200
Subject: [PATCH 077/175] moved plotting back from preprocessing to
 postprocessing to have a single location where all plots are created

---
 ...g_plotting.py => data_insight_plotting.py} |  0
 mlair/run_modules/post_processing.py          | 44 +++++++++++++
 mlair/run_modules/pre_processing.py           | 63 -------------------
 3 files changed, 44 insertions(+), 63 deletions(-)
 rename mlair/plotting/{preprocessing_plotting.py => data_insight_plotting.py} (100%)

diff --git a/mlair/plotting/preprocessing_plotting.py b/mlair/plotting/data_insight_plotting.py
similarity index 100%
rename from mlair/plotting/preprocessing_plotting.py
rename to mlair/plotting/data_insight_plotting.py
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index a633dec1..23d26fc1 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -21,6 +21,8 @@ from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel
 from mlair.model_modules import AbstractModelClass
 from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \
     PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, PlotSeparationOfScales
+from mlair.plotting.data_insight_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \
+    PlotPeriodogram
 from mlair.run_modules.run_environment import RunEnvironment
 
 
@@ -295,6 +297,7 @@ class PostProcessing(RunEnvironment):
         """
         logging.info("Run plotting routines...")
         path = self.data_store.get("forecast_path")
+        use_multiprocessing = self.data_store.get("use_multiprocessing")
 
         plot_list = self.data_store.get("plot_list", "postprocessing")
         time_dim = self.data_store.get("time_dim")
@@ -354,6 +357,47 @@ class PostProcessing(RunEnvironment):
         except Exception as e:
             logging.error(f"Could not create plot PlotTimeSeries due to the following error: {e}")
 
+        try:
+            if "PlotStationMap" in plot_list:
+                if self.data_store.get("hostname")[:2] in self.data_store.get("hpc_hosts") or self.data_store.get(
+                        "hostname")[:6] in self.data_store.get("hpc_hosts"):
+                    logging.warning(
+                        f"Skip 'PlotStationMap` because running on a hpc node: {self.data_store.get('hostname')}")
+                else:
+                    gens = [(self.train_data, {"marker": 5, "ms": 9}),
+                            (self.val_data, {"marker": 6, "ms": 9}),
+                            (self.test_data, {"marker": 4, "ms": 9})]
+                    PlotStationMap(generators=gens, plot_folder=self.plot_path)
+                    gens = [(self.train_val_data, {"marker": 8, "ms": 9}),
+                            (self.test_data, {"marker": 9, "ms": 9})]
+                    PlotStationMap(generators=gens, plot_folder=self.plot_path, plot_name="station_map_var")
+        except Exception as e:
+            logging.error(f"Could not create plot PlotStationMap due to the following error: {e}")
+
+        try:
+            if "PlotAvailability" in plot_list:
+                avail_data = {"train": self.train_data, "val": self.val_data, "test": self.test_data}
+                PlotAvailability(avail_data, plot_folder=self.plot_path, time_dimension=time_dim,
+                                 window_dimension=window_dim)
+        except Exception as e:
+            logging.error(f"Could not create plot PlotAvailability due to the following error: {e}")
+
+        try:
+            if "PlotAvailabilityHistogram" in plot_list:
+                avail_data = {"train": self.train_data, "val": self.val_data, "test": self.test_data}
+                PlotAvailabilityHistogram(avail_data, plot_folder=self.plot_path, station_dim=iter_dim,
+                                          history_dim=window_dim)
+        except Exception as e:
+            logging.error(f"Could not create plot PlotAvailabilityHistogram due to the following error: {e}")
+
+        try:
+            if "PlotPeriodogram" in plot_list:
+                PlotPeriodogram(self.train_data, plot_folder=self.plot_path, time_dim=time_dim,
+                                variables_dim=target_dim, sampling=self._sampling,
+                                use_multiprocessing=use_multiprocessing)
+        except Exception as e:
+            logging.error(f"Could not create plot PlotPeriodogram due to the following error: {e}")
+
     def calculate_test_score(self):
         """Evaluate test score of model and save locally."""
 
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 148c34a0..4edf8e96 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -10,7 +10,6 @@ import multiprocessing
 import requests
 import psutil
 
-import numpy as np
 import pandas as pd
 
 from mlair.data_handler import DataCollection, AbstractDataHandler
@@ -18,8 +17,6 @@ from mlair.helpers import TimeTracking, to_list, tables
 from mlair.configuration import path_config
 from mlair.helpers.join import EmptyQueryResult
 from mlair.run_modules.run_environment import RunEnvironment
-from mlair.plotting.preprocessing_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \
-    PlotPeriodogram
 
 
 class PreProcessing(RunEnvironment):
@@ -69,7 +66,6 @@ class PreProcessing(RunEnvironment):
         self.split_train_val_test()
         self.report_pre_processing()
         self.prepare_competitors()
-        self.plot()
 
     def report_pre_processing(self):
         """Log some metrics on data and create latex report."""
@@ -330,65 +326,6 @@ class PreProcessing(RunEnvironment):
         else:
             logging.info("No preparation required because no competitor was provided to the workflow.")
 
-    def plot(self):
-        logging.info("Run plotting routines...")
-
-        plot_list = self.data_store.get("plot_list", "postprocessing")
-        time_dim = self.data_store.get("time_dim")
-        window_dim = self.data_store.get("window_dim")
-        target_dim = self.data_store.get("target_dim")
-        iter_dim = self.data_store.get("iter_dim")
-
-        train_data = self.data_store.get("data_collection", "train")
-        val_data = self.data_store.get("data_collection", "val")
-        test_data = self.data_store.get("data_collection", "test")
-        train_val_data = self.data_store.get("data_collection", "train_val")
-        plot_path: str = self.data_store.get("plot_path")
-
-        sampling = self.data_store.get("sampling")
-        use_multiprocessing = self.data_store.get("use_multiprocessing")
-
-        try:
-            if "PlotStationMap" in plot_list:
-                if self.data_store.get("hostname")[:2] in self.data_store.get("hpc_hosts") or self.data_store.get(
-                        "hostname")[:6] in self.data_store.get("hpc_hosts"):
-                    logging.warning(
-                        f"Skip 'PlotStationMap` because running on a hpc node: {self.data_store.get('hostname')}")
-                else:
-                    gens = [(train_data, {"marker": 5, "ms": 9}),
-                            (val_data, {"marker": 6, "ms": 9}),
-                            (test_data, {"marker": 4, "ms": 9})]
-                    PlotStationMap(generators=gens, plot_folder=plot_path)
-                    gens = [(train_val_data, {"marker": 8, "ms": 9}),
-                            (test_data, {"marker": 9, "ms": 9})]
-                    PlotStationMap(generators=gens, plot_folder=plot_path, plot_name="station_map_var")
-        except Exception as e:
-            logging.error(f"Could not create plot PlotStationMap due to the following error: {e}")
-
-        try:
-            if "PlotAvailability" in plot_list:
-                avail_data = {"train": train_data, "val": val_data, "test": test_data}
-                PlotAvailability(avail_data, plot_folder=plot_path, time_dimension=time_dim,
-                                 window_dimension=window_dim)
-        except Exception as e:
-            logging.error(f"Could not create plot PlotAvailability due to the following error: {e}")
-
-        try:
-            if "PlotAvailabilityHistogram" in plot_list:
-                avail_data = {"train": train_data, "val": val_data, "test": test_data}
-                PlotAvailabilityHistogram(avail_data, plot_folder=plot_path, station_dim=iter_dim,
-                                          history_dim=window_dim)
-        except Exception as e:
-            logging.error(f"Could not create plot PlotAvailabilityHistogram due to the following error: {e}")
-
-        try:
-            if "PlotPeriodogram" in plot_list:
-                PlotPeriodogram(train_data, plot_folder=plot_path, time_dim=time_dim, variables_dim=target_dim,
-                                sampling=sampling, use_multiprocessing=use_multiprocessing)
-
-        except Exception as e:
-            logging.error(f"Could not create plot PlotPeriodogram due to the following error: {e}")
-
 
 def f_proc(data_handler, station, name_affix, store, **kwargs):
     """
-- 
GitLab


From e664f7163c5c272a4410edc38242f6856c6e19d1 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 14 Apr 2021 16:38:59 +0200
Subject: [PATCH 078/175] added legend

---
 mlair/data_handler/default_data_handler.py |  1 +
 mlair/plotting/data_insight_plotting.py    | 23 +++++++++++++---------
 mlair/run_modules/pre_processing.py        |  1 +
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 87fc83b0..3a57d9fe 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -299,6 +299,7 @@ class DefaultDataHandler(AbstractDataHandler):
             for p in output:
                 dh, s = p.get()
                 _inner()
+            pool.close()
         else:  # serial solution
             logging.info("use serial transformation approach")
             for station in set_stations:
diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index 84df9b4b..79c26522 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -468,26 +468,27 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         for pos, s in enumerate(sampling if isinstance(sampling, tuple) else (sampling, sampling)):
             self._sampling = s
             self._add_text = {0: "input", 1: "target"}[pos]
-            multiple = self._has_filter_dimension(generator[0], pos)
+            multiple, label_names = self._has_filter_dimension(generator[0], pos)
             self._prepare_pgram(generator, pos, multiple, use_multiprocessing=use_multiprocessing)
             self._plot(raw=True)
             self._plot(raw=False)
             self._plot_total(raw=True)
             self._plot_total(raw=False)
             if multiple > 1:
-                self._plot_difference()
+                self._plot_difference(label_names)
 
     @staticmethod
     def _has_filter_dimension(g, pos):
         # check if coords raw data differs from input / target data
         check_data = g.id_class
         if "filter" not in [check_data.input_data, check_data.target_data][pos].coords.dims:
-            return 1
+            return 1, []
         else:
             if len(set(check_data._data[0].coords.dims).symmetric_difference(check_data.input_data.coords.dims)) > 0:
-                return g.id_class.input_data.coords["filter"].shape[0]
+                return g.id_class.input_data.coords["filter"].shape[0], g.id_class.input_data.coords[
+                    "filter"].values.tolist()
             else:
-                return 1
+                return 1, []
 
     @TimeTrackingWrapper
     def _prepare_pgram(self, generator, pos, multiple=1, use_multiprocessing=False):
@@ -546,6 +547,7 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
                     for var in d[self.variables_dim].values]
                 for i, p in enumerate(output):
                     res.append(p.get())
+                pool.close()
             else:  # serial solution
                 for var in d[self.variables_dim].values:
                     res.append(f_proc(var, d.loc[{self.variables_dim: var}].squeeze().dropna(self.time_dim)))
@@ -568,6 +570,7 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
                 for g in generator]
             for i, p in enumerate(output):
                 res.append(p.get())
+            pool.close()
         else:
             for g in generator:
                 res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim))
@@ -656,12 +659,13 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         pdf_pages.close()
         plt.close('all')
 
-    def _plot_difference(self):
+    def _plot_difference(self, label_names):
         plot_name = f"{self.plot_name}_{self._sampling}_{self._add_text}_filter.pdf"
         plot_path = os.path.join(os.path.abspath(self.plot_folder), plot_name)
         logging.info(f"... plotting {plot_name}")
         pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
-        colors = ["blue", "red", "green", "orange"]
+        colors = ["blue", "red", "green", "orange", "purple", "black", "grey"]
+        label_names = ["orig"] + label_names
         max_iter = len(self.plot_data)
         var_keys = self.plot_data[0].keys()
         for var in var_keys:
@@ -671,11 +675,12 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
                 c = colors[i]
                 ma = pd.DataFrame(np.vstack(plot_data[var]).T).rolling(5, center=True, axis=0)
                 mean = ma.mean().mean(axis=1).values.flatten()
-                ax.plot(self.f_index, mean, c)
+                ax.plot(self.f_index, mean, c, label=label_names[i])
                 if i < 1:
                     upper, lower = ma.max().mean(axis=1).values.flatten(), ma.min().mean(axis=1).values.flatten()
-                    ax.fill_between(self.f_index, lower, upper, color="light" + c, alpha=0.5)
+                    ax.fill_between(self.f_index, lower, upper, color="light" + c, alpha=0.5, label=None)
             self._format_figure(ax, var)
+            ax.legend()
             pdf_pages.savefig()
         # close all open figures / plots
         pdf_pages.close()
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 4edf8e96..68164b6f 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -256,6 +256,7 @@ class PreProcessing(RunEnvironment):
                 if dh is not None:
                     collection.add(dh)
                     valid_stations.append(s)
+            pool.close()
         else:  # serial solution
             logging.info("use serial validate station approach")
             for station in set_stations:
-- 
GitLab


From da4d59e9bd4de63854c406c27e55e1b868b32025 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 15 Apr 2021 08:58:41 +0200
Subject: [PATCH 079/175] added sample plot into docs

---
 docs/_source/_plots/periodogram.png     | Bin 0 -> 63782 bytes
 mlair/plotting/data_insight_plotting.py |  12 +++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 docs/_source/_plots/periodogram.png

diff --git a/docs/_source/_plots/periodogram.png b/docs/_source/_plots/periodogram.png
new file mode 100644
index 0000000000000000000000000000000000000000..a756cffab18869f615ff504303f2743618f14633
GIT binary patch
literal 63782
zcmd41WmuG7^fo$xgdimn(jrPX0@5i+OAOu8-Q7q?OLv2SbayJ!NOvRMJwu#lP=D|L
zob!Hr&xf<QxR`5rV(+!?wbs4vy+h<>#nDj-Q9&RO`g;jc1rP|K9Rz|OK|un((Q$rW
z4FWxWZ7L!nZ)#uw0!f6##Tm=TsNwe<p7L&g5_~EgBrg(8`^qO4OPzYO_^aSnn!K1r
za6LTT=U{fRC^0yE%{U5kv1e$Ze(-n>{`ByDjkdTSgw$-7o~K=s2sGa(xer_4+5cX&
zAHKGMBM<gB?C5vbS7>aX>y3C$@ND4G5_MQceCP3*1H#fo6E>)%+r4pV__`DYJKFSS
zqrSe`?wZEW0hGCCI5{9Gr?!&QFO3;_-)XF(TCTawx}>_Y<?XsBHPl8A`Leb<sO5!=
z3L>U~5MkU0-rFC3R^swIoBsOKB45r<$qk%H{w1k)^f75d|JgH2@$auiy%dk`<8Dau
z#ElBdM6K-lUJF?`NOPZhW4vKyW`Cq(eEefqD)Pm<d+%XmF`brmH_)^_h#cDLwh{+&
zIDi*~{8Ev#Y$%KM0C682u{}z<cl#QLj*j93za@x74X>Q}*ufW@86mzMj|+sq4Cf()
zH4VBVN1}v>`w8Emi%bm?(<S3VDH3Eb0m)?IrXr;+^VY+c`p+#h5+FtTVoCcY2tHc$
zMMUu{$9kSE#2WQ97h_xS9Trh#ATc@TPa;%5;VcvC$9+C_S!wdvN~p?0j`BEXXbu>S
z@CyM*SwSOQ><G8$o&ng2kH+*6RS_nGW|)zkK6$R3+pwpiZnj?^X#=kdlItWn!oA|)
zeS)@<a6}A`MbRl3i>(z9Zi9_UNf09Z=qETAi$;7W8k|eCEpqTe_#@u6PiLkH^=nCd
z!w|c7*I8FzmBztJEJdh}@Dsi}VhbYSIig>OO<2`QOZ;mEPe**WOdRkUL0S=3qIEfC
zSwX*Iwwwu`1p4af4KNQSl1mdCvE>uk6O08mggx(;*8N?s`t1oA+5FRad!XJ}1>&^i
z-ut~*dst3Sp`_y7G%H~?5%s#7PZ=>)BFukib)&E3+n~AFHghk+ANoeNTX!Zb6CEvF
z@m#&Xffw>?r%=WAcx)R$5`Zd9w?e2Fb`&T<S%fthqTQ{!>ZFf&{OlOXHucI^^p$B$
zeJ+*)k}1<R*EZ|+i*58I=}GdrFaEvT>n-cAPn1s-Puy9O6VnOg=!Gp(RBKd&rlY3m
zP}8$zSL8Y5?Wi|m*<yKO)oh#V1G!Xqr9FyWYOdZMe>i^oI5{9IAV@S@v_iD1TPDPm
zHoi#6G4q}LX<^S-l|mf$fPBfd(W~mTYNXjWvy>zv#=l(8xZJRBjUgZ{%J*^X@n;DG
z@!#T`$!R{4KS4)|jBcmaOEn5(=9i?Au+62&yO5}nM1Mah)&G7@Qd|;WvhF>mly|IN
zAJ@<B{=J0lSXYYBFB9?SUk1PMQMk)Efg>1=X?haqVtE)YB}n6^;xFTu=)I*|b-ZMS
zK6Jc)A(o}EQq=j4LgW{iMYclvID0khsZ6gr4Y*Kx`^zX=K4naov@V$0J&D55)zEn3
z$p-fZ+y)|U{O1hGbjg%H9A!URjW?1t(lz4Bx%s)jOXG_tvo9n_%7(y^9|Uq23;4@A
zMo~tlOMVry41O^>R(<;oZ!+<<+Sofb<tpvdR=jtiCAejds=K8e-z>99a<oc3#gQjj
zCPF6Oeq*agZRzt+9xZ|>=QL6Rc*?g5J*X!Y8J1y^D^om-Yfbhlzw^4~za2(;jr14^
zuoZe?AY-6w;6k7`ktxw}cv1L4+3PYV%@#|?shz1G+&`j!h=-9Sj!4hTIHjB=da%A>
z!%eJB9!e}s`jkA87+$$;A?L&b4SYL0Kje{e?RtudmyfrNSHN&rIbIW1XHlD~-KCDx
zkgqow5cgR*Q8_JcB-xb2%yV4bB*y~BNYVOq06f9bS^LZBbI$jWwbb?IO15&G>6*Dg
zdxm<A`h<OKcPgkb6b`w_KSLz0OHmxR$D{{OJUS+1%&4G$$i!G>^L8kQ7m+k$svD;v
zvBAQ2+woNcq#oSBZ$IXQQ}5W+b-;LlJl``}R3@W7w++YZLW<0r?o!FC$mf0D>Ei09
z?9O+|aW-^*aGG^(ab|kqx@o?tu?n4*8)ND!>&aLpTU}ZgAk!t2CnSF|jyeC-g-3#e
zn&T@S0nr}WJOOXrklsRvW6X=!FA!h6+*RAHnp&DCZ+fBE^(j0yT#sltAX@}MI7I}%
zi??&9>vyN?%9|Am{siyfd)7{lY<m6IqqLRmcC>x8MeJtWv$PPwGwCoHY1aV@dd^zf
z3&&J@P0M+YX7QWjtGoc8fLc*aeUsG&N;3ry1?x(EJEs%f&$5z2pP45p&UK3<mT66-
z{YNJY9P`E!1is%r84S~UzLqqZG?8>zbyGF;QR*X=;idKHVzu#@QiQ>fT*`-(>lAmp
zID6;y2<sJ#!Z}n+^rhJ<8Apw~+TMNHIk`RZz2K!|jdUSx)5W<oIG(0c+>^RZxjnnt
z2?cU6IW@t8Wkb^i@;GLNz>__-mIAxs&CcLX4KXCK4YAPgH>(-~gm<dPYom)_TM%C5
zzY@RDyLh!vG-F+LFIp&SD!bhccZon^I8fPHS%=I?+N@@!sqG$YL%kMN-dU1etWaX7
zv!SmnJz-O$<){(vF_@lw-Gv<#6fsR|TB7`wq3G6y{bI|XyPz?`EgW)UkZ6$9lg@d?
zd3M`<S6KL=w&LsY*m3S!C(a_3TgpIka`Hxsl=^afSbS2<3#!9Ai+g*OVk>hkqMVYg
zXIyc%JKveI+QL$u_!jH~J@e1ML1vPj)ZO)v$14g&#-EST?4j-u@^0Fd7R*&P-Kw<c
zL}sY)>v#&C2=|J0e!mHoO0Z)b)RJxBb$<ov6CM7M-lKw0ws?5HLsI|SZU2X3&#nrT
z%QL}Bb%D!!Z1s18#-2u6os6~fS;|@6z0rvV)Ms0;hop)}%X#q73A6NhGuKBphmG{x
zmfcI&OKZ$B5>n!0B3EWYR$33NV_k?|XV+9DD;WdtB7Yz$od?BT?oHv%7-Xr&%Ni#c
zl__j4@>1Z`^}@wZ;P@o6S^$sgG$SS>L5@b|jSP`YMuu%_15|$@v^Rl<3Ev3qF7~SG
zciivdD({)Q2`1EMGOc*c+K{HJ@PU*y>+q$E0oNfu&x9*0z1E|a0VodHG#P{+$s<+$
zr+POd*kcqjemir1I<2reIw~McAJrOTFQWtyCvpP`1sM>??KKGG69581fUkVEKp;nE
z5NJmi1maEvf$(io8$a*@|3K205*G#C!+tVb3Sxk79@|K$*#m#$AAW7$-5r5Is4i|w
zstyWz&g3?B)<&ilhU5;eHiqO6tW5P&T7u=5kA>)B_f@D&zH*NlkRN?My5n3H0Xxgp
zEwCBz&;<{;VFpB%9^h_g9Bh+$?8v{r_5}9|@SpG25jV`%M9kJqCHD|>fCs%V-Yevq
zj?MS~eCd2YmgIf>*vXUh)@HNX?T$=@Eb^}BTB`Ah<!I~m$^N6A5P!pxS%G`}G##cL
zrB}k&(9`ol*=+^G`(uyG!(p72n}Fk8?+gLFt>NLwd%dmA&1i3q@!o-i;y2#=%vA<k
zm&5lTy&&fUO9C|)Cm8gvJE89(DFG-w$}8bv$u*uq@9Gv6tfeU=tqJ=ze;nrAIOmUf
z&)!+f+(WMC&USeqH}S9dNd$f-S44-PT2J4(o=w@B)NFtWqi(fAM2Rp5Hk!oqf}u+j
zqnE>nzu$j@yj4#*#tkxR%TOI$yqvsFQu(cR&wps}^}_mR_;tv4JWMQspI>hDkHvFO
z*sk!tBfRWlR$|Pbr9(p!lNx1pjG4PrpHVw=+My@pC2-+XJuRm?#m1m<6<72WL*{)v
z+wC9X>-@CKN3Wf^oVnbkp1FLNhtmJe5TS9n^%K6|T=d)oy4tNbt<beM(p}T*8@DuT
zQOQ4;->oYk`(L6*N~VPMe{e;qz^&A_Y17R?3H{{U$J5M+^U_5#?5&GN=A>VNA^sCf
zuVsDPR?MZK!~V++9#yQ+Rf{J<fjn#SOr0pgJiSVKHta=79a2tZeIkZ0W{OXLn@Lml
zIO=_@N<=TVwqv#Pd`s~NcdI(#E}nB4_lxpL&g>7yaiW)Y{?5rqvd8b4eAc|zyu&!*
zmpJa5emfBcmd}@N;=QZ=)M=Uo(IZAR-&~|Z5yY2p?pcfI;7@~SXZkt2%(fKFW8wt&
z`oE7v8<!>heo9W1!;pzE-!tH!gPF<d^RvCUf`Dk-_Q&(b7RUbV-gjKtN`sL<5#<8*
zLcY;AcBC6u#2AX<d2`XLS>=o@r+%ZMOO$t-a#{}isl-pPuQsDr$nt8W(bbEbkz*oJ
zeix}Nno60uY5gr2et=Vs>*MNyCYtW4K<MuYUbq}MZ3LGeEaBnOZX7ihF-zo&DD|$n
z7JK^is(I_*EAY}8$Sb%;vn}wjR6m+vhki=_=HmJrQ+Q$Idtgkhe056Wob)0z4s+>|
zSBwyoV0r1DU`LQK@&ZP!hv|ii5on6VZBa+8QqDWC_T&X)Pzdr*GULt_&g1&_6~9)g
z3&x&C!t2?5;#WGtHivoi$JAWjpi>`0cqQbD&kOQvKW%kfaq=XQxWnjDb!=xitWIyX
zG*qeAyO*1gJ=`UewEHO3t@7r3SE}Vz@-99Uoe6KpHmVm6)kj^wJnIvjLS03HW5!Fo
zfWv+wsVG@+<i;uL7RT?ry(@^3!1p3Zq2I(oSOm2aPHj)=2mdF<7Be~r6fsr$RWYMy
zGq~cec*WG?nJv%agv#{9rWkaKO-zyyMV`4OHK*FgHNfj{a-s>uc124&vB;VcZKaDC
zjP@55(+gKAW#wr2yrT{ES#_DQOc-S0MB+@}tLif#Cztm)Drw?6w!K_^bS0=BM9UAM
z9(YP3MTcxqC(+>I9MW5(&FIrOB^ecNQWtQPvRRSSt#vMv*PY&3+|KLEErC$^QW<M`
zeZ-<IIDe_HNWFNH`&~&)2Avrn@}L^-Vv&uq<)ry(vRP<P{0Dadho=!g_GgFPCX|t5
zf|#QqD`A4YyNdQd#;2N<8)9x&qn(P2l%6A<aqT$mSO}dWTi`{cYVU_IynD=>?M`bU
ze7sC<IiQ~-2FY6U5j9(o$uulZ2_etsH#;?&*Z}url6_)pAzigo+R<CH?|7Bju%P&z
zjZ2g6jpAE!%~mtj!j2CVx}_YtOrNbkv=}QQ3pT{?3oCIfeIRPh1LxJ45iK5uCzS{d
z$>h}VX}mNf(b<kxubYD;Ik1+XU%5&{*vc%sv$)vbeDG_tiZKrsvxbOvHE;jokg{ec
zx0JOW+cXFBzh~vB9PB`7>>3zNClnV`%eGyuMN}=FxJdh`L}aOBAF*CnqiCL^PRM1+
zEZsysYB&~3(Lot<6N@LEsb*$N*Ejuj=8pO#8Z!&=ldu>Qavr|7JgsJ#JC^(_(!JcE
zQoN9zpLzu(`}U5sH_=E6??ljPBJH<F&As`uzAe0p;>gZz^d)&3q$@+&r1;ZQIP%>_
zBG#B;nrvXRiH;k0#2NYx=kX%JqKnL}<Cj^_GK^gADg}XetPz(|I5rzjnMZkX)qUNS
zJ+8u-dUW^|w_iz%1W~j6h4e54nQ0RU<qHthiPo|8wETU-7b_$UW`^_oeiQTwrJ4Dv
zAezP*`ZKy>g=Hx{rA`0w$WL)Nu{EfJ^)=GU3s<!s{TcNz0t1{H%dpVFA3ohTKe=4o
z9rA7Gk;d6euj-*ssL2qS5a2Q=Iv+EBLm?XCy6I0r3}01!l|50vp1B-Zh3N2JL<1t#
z=(s&4h^g^h%u6|7DK=An!|z)tA%PLbCx)+81G^`TZ}e-sS6>*~on?y4&+IkhFJWI5
zu7;Y^t$J;-3~^+EH~QjtGkt@R#bbQNOnb20An{UGAdiZyGZe~1Gi@$m<eA_{=g7j)
zsfE$rryoT&OS@mU*cZ0%J=GL2^>g5Sgp{@1$e(24SST|ah-PkEL0Q56R%^w1VH~@;
zh@WL$PPu+bUIYS}LPOMk-HzeFthF|3=*ZY)-Wso;{h<nIMOmeh;lzWUvda`byXD3C
zHta>?gcn}39ii<&=erVrKJ<~Lf~$bwqfLR(tkD&rvqHQnqYh4CMz_tu(Kfun(Vn1I
z&>JfKMLD~Jus{n!Pqr4KeZmE#QQ=dyN$Efj!P)+i6BT--1i`mwN-^0vvOiFWtTI?B
zTiziXJ{k-5A3WgTjF?D2Vodpj;AgrEGOPD0WBxdj`j*rTIUq|o@L)y@znU1Mk--m7
z&cu3H+5zQrHR{<Avl<rrr(r=F9%Zp4MIZa{@7mS0k92*X%RAv~mxM$~&#obMuzoi&
z@?a1p6H`lJotuhs(2=Eb;ch`u_xcqPx+ie6`pe6iNAgRld7cbWWxr54_vb@q&zBdw
zcEUU3U)M)35<~Bvv`uhyBW?ZuK_r=S!ar}coMTBJn0GmgAeBIdX>8cEKXtSxV&4Ba
z)0T@t-Yb^fFaHaeRfik(17T3SZj#`6)T7{B5=MmT+mVvU*0;}1wlr_kh{V0oA77dD
zY&wYxO!u3}=i`K~mw&Rhs!+AVSf!5dGDZoy@~oT7Cnffo8|Y1`EqNQ1g>GqE&p<ML
zN6s0-Md&3cdYf(=JV_vqK_9`lYLibMif@66&f-d?W&Hj11$*1Pum|I8St<rwl#M_&
zFO><VzHOBd<LPhP&BnDePK4(?L{xL1niyH_+_{ud1~=`$Q)*qU!h<NIIdpmW@TLVQ
zpDAeJj|OOR90#g1c^>lpc6faHWr~Pehs*OUG33BqG)spX`;0XpS5@%+FKjFa<k~>@
zVf1L6lP~Xb-X^iZk(6M(%7m-RRjoeJc=MC$6KhE3_pEnTOSX|+(Xp1I5d8h<M?a-X
zKGy&g7W7_JNXcb(@7B|X=-QPE;zcBgKn`l%{*;JsoBD!ejs^Dzj_ZD~EpG~a!-tE>
z14*9OEd^#)uKOuT$AjsL#yhk<3Tf_#(yl|-A}19^zf(1O)1oi9YYH58529*~IiUIU
z&oiV~qr43EMzZ&#Wx~YAB`CzODN%etgbVfFKIl%IjlNcyLd{3eLWRL#%0)uZCu@P>
z!|TVNgtI^t4<j36bL9p3pU+6TfBsVZpMB5=z5er@|F1s$pH1igy_)~8=Jnrm{{KF~
zY>^*5B7&k0E`tLDN;h}w>mSDsbI4Kh?wOvU2K}z>y=h2HP3=BAa}ZfXHaZM4tDE=<
z6%!SG(k?2xo?Td2SR}yq_N^L3Wc<mj?#{)oy!)4~9cWzV{`xwlk&#hRx-<nv_9Xc;
z7s-_1t)sh_@*7=klfcsyg_-Hv+26cTMoqL5z?|Jwg+e&cs=;-Gw9MEapv!~++-N=P
zr|ww1qUsbGtj=z;yTmoLEV~~Qys0iKb3KoYx%6>sQ$>7D0kd>A#~WZ0c-+@;8Ohi_
z%Hdy2!8;b^bC9yzB%?HH0C6xc*wLV<ptiHIu_Y!a`;ntG3e46j7x#Y^0KQRla<$Bq
zzgM+&gE}#qC(LZ@Ym4X2m7djyFR^JyUa*3P<!YcNn{QdQTNe$~Xh3t1v(K9`_L}d^
zJtMKOKcLZIVgJfP%slJq1MIhL{F;i2q0RWb$K`obLX|z4d5dS&Zmqj_s<xM#+wA$A
z(@COvOQcgo$;F*A^jclXZNK4~+Xl)HJZV=SF)=vULV0<h$nwX9Qq_5v6V-Z=Im<rA
z9IJNOY^9|W6s)YoQ0$lyR}>CTD4Tq0epxANGF3)cCek!Cao}0qh+btti6Xq+P!36Q
zqssvgOSP0CP?c4v>kx^GcB3+5W7h~FPOGZf+s$lmo6f}<6w{jK%@u-^#ui(TE8|*?
zmEStLxfVk=&nhQq+iSoaY5d}{ksoSy1%fdUn|d_dwZIeWl4Tz_N$Ie$Nk)vxNa11T
zJr`-Zkjujxb(<63*KSonDWW2tX}Fby8yVUt@k_X<1ZNego7g`c*lR_iMbs_sF`){c
z8bh+&th5UFSS)zRJ~BCXA|omqzG;A-8XR0y^A$Y1<X~qv3U=Qe7zi_;n<miECi<n$
z!^dZ7p0O_3^Idk%xg^06QZ+GaXKwmolK*mW`hIe7%6G=`>r@#Ah|Y$Nks`ac2P7>j
zs@#>R%v_H1jgZ7=AF0}{D_x_`dqmN1H_dOpZ^x5_AnB9K{&7ljvYF%v)nBNnm5}Z2
z2ddX~TE9HdC}0iLsfu|Xq^GOMSB79o1*+qscz_)R(q{f6!Y20{j+hzGYWJmu)>~PN
zCCj`i-?{y1QPFXNd8ClwU^7(G!6!+xb7hYEW;OjP8aP_R`&0IF=cHxt7G+;%Y`){i
zv_b~(`2b`L^!AdYjwRWsac~hP1;||PiHY~(P_Ms{Qm{r^8V-Z34stm6yKf*IY!=}6
zLlB~Flk#$5A&tgZY}zQ&^C}@ZFx)tOJK>>sO~;1`MgZK3EFR`eOelU<vo<dS!AV*=
zJWqNmaVU3PNxqiJ6i*$FkH;Rk$91x4WQGLKK63V>pjhQ^ueq}ah4Lv5USK=9lj&Bq
zxJ~R9JIsOI_tmu~cewYo7l-cZC+@}}&Cd@bC+rn>?LXEG+vjK-IFS`BO!+SKzjaNU
zU0?&Coa~PaEUjphvS=Ic{SR8r99sRcu`%2_ljPLYYT~<(wRIIS;2E<7m9nlKw}A+)
z8mQ*saiLO@mGUgqM7g5$+)>x*rUWTLnH%g>+zr^&L)f>e5eGXOEo`OFqCd^Q(X*gV
zYuIq18Dl|U#NBdjqEn^nV9D1@)z*gI%rkYC!@-iP*50))gYK`-xY?`C)dq`Fcu8Xz
zT$iq*KL>w3uRu%=4*uHPQ8+sFR=!u8$f1gW?g#L)Sr6VvqN1XMk7Xx1iRVnud$Ms0
zz{MR2!?@TV+;!2|v9UkkD9r^1mka|}!BV0q$wuaDBOGjOYy=?EV1wi-u^*5xVdLR~
z|NV!OPf(C^`B;G*0w~=JVoLf1Rwc7vyF>Tu;6P|-XuJeZ+Lg*w=>v;Z%Hr?t(hfbu
zFLhwoP-J(7&5=7s)h)WAZ4RZ;K_Pp4^q3g1H-Ctck(LI52s#LS3bEx%$&b+A!Cu}y
zyjdM2QXc$gzyUsb82@J<2z&sb!^Z#kQ0NpI8k(4x=tGWz^$1Qpg1F;*D88%ipX&%k
zsh^=Dk_P(YM)t7q@`kzaMjRj8rgA%?0_iyTyS~fa_2w~Tx2)-O9TgtrBggWvBD4S|
zFMeYXe;A{7b8vqGlYyPxQvgneSuTc#h$12)9{#Y)V4;Cf-UO5%2k(s3R0j{&eMf0o
zS#Wd2%8FjMenn+wG18h;0^>^OC$#_Whza!wI`@^6kx2^FdDC7LH)GbJ(n4A#$OmK`
z!T^H`0#K~7t6Um4VrD)6Uz^Q4ngr=SFRrc8TqN)c*K;;AE864u{rfivw6?ZpI5o4h
zgqb;w0c17Uc+y^~Fc+6r%V*wo2-HT@Pw@E#1s|xNIZP?b{rpRcE{SLHObnJ<)aa^k
z*W7drn$CbqS=g5G*Ee6^*pMv~PCu@hr7h>sEZ+5)aD+h)7~P<GJh*8y)p%toM{jQd
z2o#DP`n_)wQnc$o5d~yG*m%;;P&Ue~d+wgVq{D$eDLeHN22%hgj{Ky4zXR4_$bvsA
zR4ia-WAg`)rdS~Rwg@ylI2cFUBkH96GJ#QR_1I)(&Gvjpc>!!XEO5`O)BO<w+1T~J
zI)poRMvO$B$P@(FqdY(u=t0yVYM9M$)Xq1HNLnQYE&*ra0~2!;u-_Rk?EL*ZKL>i-
ziN>^5+O>yCVujDBxxAYd9?^4k<+9xx9vOLxC>>v>)5;%wPm1NOQoVG%3@y7>g|qsy
znc+<u6BDDVsrg7=lSs3Fc4^q-H`~_kF57K-#djc%fw9GG0J6%yyYmW*jGS^L0o)Nk
z)d4F7zyNF&y?X3YnEjt!N|Te4Skh*{rQLYQE2b@ZK`m2xoFn8=AIuZ|*venYUs8iu
z8S3_u@AhmgF)fYZ_Qjvr^ZW*y(sgqq^dq0fXa|6ZY}Yogs`%e~d})<U)2f*PP;Cnz
z0C{%5|LMasu@P)UWMpKNzb%@zsmxbOS-ZJ4K65ZOG9pKT2e?Cj>1k$Bo1ksXVMxg1
z)}lX=Mc}gl5Umivyk?)k{^b`?<mX;+x+W$lfaR!f)8@$kJedX5geN<(Q?C9dJvG%r
zeeQH*1dCCtvHO25o@?+)>w_TxOL%<kUbCfVWIPeXdbphh<Xl^~<V6Hzk~LrQhc)Vh
z4lLvZZrP2DjitOdMNJ;Qq(EAZ7++e71VWY+<<B=bq%a}?8~;(?9V`pq1M&*hCn{yy
z8~@{RQc_a&9@ngxkNz3Mn8*Es9`%#(_Msp7QRbhi?5_~&W@EhWMV(YoTbKglH1RV3
z@t(A3+D4OeoelI>9PS$cv&8TB$FlI8WRxy8555P2g}dt=chI!w<E1nxyiZn3S;Sgj
zsHcl_iZW1&9_E*gVJi^#eUIoxu2aNzYb!fhd*mIsyJYYg=7WP+K#K0kDJlK{4Hj^y
zFDa-}RX<AcIs}eaVA0SF`#)GO2(>`aIVvmbg=W1?)|s<ah=a@fN<W@m*7)BD`Aa(W
z*xhNO@N#1Au75HuT;ePnNT<;u1TL{WeMrxAQT4V3b=egp$hy+8T6cSuCiK`-C>l8o
zwge2sT0tDgOqX}8O5STA#vLysL$z)<?tcG=Is#OY@zz?_r}22za4uF%k9s6^)HKfZ
z`Qp|(V5@^sT{E5CZTd-YOL7@K;T_tyg=2~WYF^cU=G!+d{&uTI<Z(&YE_DiLVMs~J
z47(3-TlR$mV3t{uGBJ?$xTm9`nyBO#*Adv1w&a(eZDCZ_*9V8<Go1($+qvdtntS6R
zm`cpln}Q-3LM76AU(&m|F2gSVx>f-%J+7yqKuXsS)gU@2)0lXtE$VhLb!#<$yyEpG
z24EV%BW-jM>-}LFzpAP#uMN$w-QN!7y&YUd3KXvXaN6=)Tig9@Rt(fUz<S}V<E0H2
z9t<ud7y$QiOaz?U+kL0OGhdYj^*Rgi8e~G&dq5LVc56u9gB>PxKwAe3<F|aHO*+R6
z8F&v_7#0PV<mquy(pwr!WUe<P3rjiv5Y!{LWb;4u3&7577$rWI#-xgmSmd3hm4&Yn
z?nAZ*>e|=+ZuDK(AaTlIZBMbQ3e4C;Xg|=c00XEuALf2p`93>PUS^R-xZf(<xxCR*
zKRV6)Z%+7YqMKWfpKHfBH`tOeQwhOx?=QV}Pahr$Uecb`RSL}R7X?~lb8|XAOXW!Q
zOw<(Ii9NGs6;GQh=_#l-T^<Y@44|lI&blq!eskzpkXZjmO>+<Fa97o<*xOWFtoU4N
zUYEa4-e21;$oR3hXR&>VgM-6M9W?M+58mM{AWT}a?KdaATmJ>T?1sYsx;&??jSR?=
zyjVfw;CI4i8cPSn2g*?_H=5IjVh-}aEoIMKKrpX;U9&~O`w;Y#26ggatGK1$Ug%n5
z{_r@qM)3Y@lNIwJ3&G6cF0rvbs#`1x@zbA~ngq{05r)kShFwND)ln5&Zv*<Eaud?v
zAX!OeCb@smWRT){RO|yUT6V3$>!w8Aq$$~V3VH}hm|ywVu)eKeKlUoqg;eQ%SUO3S
zMB(-Mck6yvJ^Gg!H=T(s)mnPNhtdX{M?YIFGiJCdvbq5?W38Y)`hWf+QgzW-nk3$)
ziLK!#YAbu?cD3sR124?c=wisH@rs$Z+Hk)*Za>?a&2`~D<ULr3OxDFkmph4*Ib{%6
z-nmx&`Nu=hxxN+am@VQ5iPKgKAZ(Zi*njRscn_N%$zYGj$Xopif>Q04J_Mm)3^1t)
zE}o~Mxeq?R?-CRgNdkijkaUPt)zv#U`V(BqF|ta+0&<Nob@w6Kry$K%4%MchhpB&{
zTBTaVu)Ek(dxrXHl#H9#&MwOW-6oIE$M?k32acR!!<G4=V8Ec<b0l#dGNctqdo=1t
z_Wl6zp)kRG&<Dh(dGQdK-651;)Q#)v^S_mcO8}2UJrkVPIkyv`%B$RcD0HxO*B(cI
zcy1TBTDpbn;H{&EhdnIuAACehcA=q=HfR@Xv-0AC-mjpo$EDY={w1TAbH^uT=mGG`
zO!(UiOgKDRA`kO`1*43y=A3!YY!SYOh3^9gf+a6yAbEk~&)x4XE>+s*1*apku#vR?
zWhjm>l3U2?=g6WO3880O1^oAWJ=&yz#_%V9O@5qjG^vj$I=DqYK>NGe^=yF509i}l
z%nY@ws|%o__)OaCzFEoLY+tN=l<QS!=2m}7dkc1Nse&(ScrW2#6UoN-QBfvi15h2!
zR*xiPKg}5SYC&kr#IR+^Yk{ijWn}*0wFlomX<z>WRKc^$%eC=B#l(~p1A^Bo=#dgw
zd>1J^6nv;(LT)p3<cuvXvtUR7O0IM*{L$4F7f9F2DgrrRr!p=YtLgcqCj<Q~kr6Il
z3jsHco@u&o!XJms^#RTA!686*A{fn^Z_duxbzY3)RpV-cP@wxry0>MZH7Ci32LtTx
znaiMGaglsE=ZLYHS>GA=BcXPHX!<kPRHtTV%=5Z&Be#1+y|MaKo!<A*?*s>PD`?31
zmOk^^XANBH?R~Gc;bLTjq^PLqktg2|*|m;q3lEReqRUnhbYRwjxO1=gQrgo|!fd@G
zP4WxA1ZfhuNnCG&ATu1-KnTl0paA`cy}dm^`O7^uoQi*mT9vmUsov8q<8z7$E+4;y
z-HEguuV`&v$#IjcH3g7c8hIiF8*guK(<s~zRThS)hB|Bq`x-J1@{_lGaWSsCf1q4H
zMxc=q9#%4<ULc#+2{br5LgPwr)ff*Jim}gTbY`&dT7uI7om*or%2$X~<6(xp>0OZa
znv&7Dd@-IRN||v~x0?%O-6WaNyW_}a-e8!~#-r;o%ezMQeu!@U>8#*Ec=-5I_bo2R
z6rknNJSpDkkD-*iQ}H~O-bk55etEMJ#<pR9suz%o=TX`LDn03GX=Q~0EIR_B{KC%R
zs+;X80A7ZX?5VqF#dkicwMZ`e5%{SH4+RK7%Vz*BQ~AQ)x4Sw$yJc-d{*s{2?vU99
z_(FpCeifl_C2pbq<-)w_un7WaVOVF&xPAhGK&0D@zp*NAvY8v#8n}pd_MP0#uEch~
zANhBkk%z~;9nWi|<sn#tVqemla(og1k8*jy$wyVk7xgRz`7fpl9ZiGGf2}+_pwT7Y
zE8FFsJaCEbGj#czHR=@s)*ipVZ~hL-W+m-om7ACM3harQ<f3_h3FKa%QxGpNg`R0E
z*SVUHPuRep^tZzs<V2>sf7j~dXT!twm&;)~wzl1WxbEI;UfpXrMBBMU*KXJrp-2g$
zw8sM)XIR1`OUB}T^TrTTC0asZJ&nvYU!2KFKIxd&D|$R7%-jE`yBSxTZAENUIG?=Y
zGYFoj6$03_9>AB+fIgAUa^g-pNRc(G41^geffB0SGsWh{9mH9-EpoJT!H6UbNBR*4
z27Qnf?kufk&EfMoKPrG<3zyH#E;|+84Gj+J>FfKI?TF~py?jwiKk3PLD}?RSI!n|<
zvpSW5<r4K5e=*W>at1(q_slJPMMVSG$`R-_>upv5!b%)GF=O}HE8Hqqys!{L=Z5$s
zZ5(;|qp%=tkI-MnHLoyFaFJWfX+D=ev17yYoRF|HPbz`H2eU9M=4q$CrS-N%B)@b|
zyjb_W91W?`p9vo*?Gw0dAF0c2lvnPRLEXk3Tf1I91+z0uq{9D&(s<?SF&6+>pjs3H
z)FQj?!m(*(@wUAfX(ikNuZIgI^iL8goM^tinB#IjAO<+bm)XUoUi#7E_QMiO<0@Zy
z5b#uhu{Q1+qW|SNg|GhWa?)_Jc;>+GI{WPE4$QNA(n?w;P;F?b5MGd)|Cwbz-q_zR
zAXtQ!o4V`pe9~d2H8VI1dyUcrQ$d{T@k2kx9TXJg^A&h3J_Kn4n5t*T>;+={GsI}p
zTha4rnP}vsfA!~j@jlPpda2dNq!IrJ{BHH3Dr84=;B8?%4gC(w$E3j`W?Q+Byu;0X
zy1dG^wcD@o-;%2Soz7%cb#(xs3!DlP!^0@>!PXS?om*_OMyB$Q7N4mxHByUzSK2?6
zQ0OifqR_I?wyt@x_Ef34u63DCRm&eP$c;D!Fijr-^!$;_edh%4S)CF7t5ILhl`zbU
zq3Fab+KA$Qca5cN^gDEYQm5aGY<T<sIUcBQvC{Q(tE51!cZd6rl1M#wqNyjmgx5~S
zBnNWbKjx|4KR-|WNQnA2Ybfr^N$|orW?_*tyo1h&2h(yatXKjg%~vs#=O?VC)2x?=
z@YIR!XnxTIT8O)Ax;5?gwwn+#N#8$W0igLHA{_m}S=O(yw?G%Ck-LDVO7kr0=kGu7
z{Q9ySG5IU{?N-o+4x6m4lVl6;CH#LYd`^TZ1OKgU2-NKCx-VE#qfDmol<q-pcqrVu
z9I<8&^m9>j>Kf;EC=b*eMz^ZqZAhiiycVXK{Xl9o<BlzQ6<kj<>(f=q{2oFHU{_nq
z2<3V@ixQFW4f5?d(Yk4H^&aRpEFdV*7oD`jtn^`S?SD2K4sM|Mk02<Nm+11KB*6)t
z8Pp8L!M^4$?uVH~w8RHjH=E-nO`or<@!||#)PU`uPN7Bs?8EaDrf2Z*Xh53CQFy6I
zzBVH^%3tD16qTzIyS?F#|32!Gd3K&U0F%W5VH~(4WLBD&PS4WueqI?UdW5>l-XO-d
z^f*K8FWE;AlY<F+B*gE*<h_zzRBYmIXSg~B6A$<WN<uIp?L&95RGU*^kJWNe+a!*B
zIdzj?%l8v#iD0t@*q1{TWrNT3;0c^+X^kMXpAPt;82ul%MW#-tSvuM3YD*7NurZd1
z+oHLX493_&u$DD~Z4Nw8)Gg5a&whD`Z3pJQR?mepnyi7a@&svXoD1ut{Bt2U^$$HI
zv%FfS>1j{jDX?d-@hg5f*c`?u>oIMRLweQ#4Q1VTYaK+tQBg}c`e=EzoVKke&0Y;b
zPr%9&UWDqo(_bp7KK)<U;oty@kCxt(^8nY;*)_ovZ~v;}DWeaKT8Ga7{fn)mvoSdX
zC#Tx8OF+ONjm1D~6RaRn%UdY@npKyEg*7bU(#-Rd%I|M_x&~As7JmLAgkKeKTy6M1
zmvyvH$(JQY4reWa3YK?IM$)fu%HNLg&?S)g1JdPe03|-q!!1>|d2yp<h?`tl_Qc<o
zTGqf82P>MQxZl~H>T~u_K|U}3c%<v-$pU&NFG!KH86yb00>*${z9_U}9*(?!toLgn
z?umHQ%@GQ6UF#oV5AdUdT_s@NTQ{AxY9mgE5N+x}E93oT2G{~0xIXn7hv=`4Ri!Pj
zggQY;+#;h3Vh5H_iw8VxO)wx}M9knxSe3z%q996N;(`n0|Al|_%em)|onxL$RJL&Z
zb^!~sR**Tp<U@40ZjM?w^m`}Q9t1?I2q^wK7Y4}>8f%^W*{|!bb&=_gpP_;pZ~0T*
za;K_yieB}8wnBAqV2)g94ikTQa5w>VamEsqynM5U_W(4}{lE;95R7Rz?$Bv4s$CW+
zPrls=-06Jy!%_=OB;3V^+{9Ei^Y>c7A|JP@FrA`QP*v6Nq`LAHzJ!&8DY38<A%6hK
zEN>#rY@!%IzSsiY?&&>U(s@+c%q5RnjiYGlaU5saT|0U;DDK9<WL}H=L8qKWS*@+d
zKPzSf&YkY%n>~)dq2F8khi07!R+nGaj=9|Cd1<|IcH$0@DD%6&xKA&1QQHU6g9u3a
zI}abTuX|k}ijDCF0&Nsk^gpP?$H&`TpBRz`<DK8hBHjPCBcSWwz#X6A6y}0zqRxGI
z++w7Gwehh#K1IV-OU-EGf%D7~O@W-Fs*H!T<*8%u<aeV!A}(pXU)@#d%3qRu&8rG%
zHTmDUb=UAjcOXmt7UHt69<AfHmi}`kZNO5U3g_|tt+(@btJmEGSGGU2!^xE2r{vqY
zE}bPJ$NbzCc)fgUVnG0O<obyJf$76g_<M0i`bARz8q?i^7bpUwkANqpV{7}{YY?;|
zdT6Fmwg#U5jg5?z>e&nKM9u(LAo=_S5B<E%!79_@*698Ubl{W*Mc6hfjyCCNndxp`
zW%A$&h=wLI@A|}7#nac{+*j7h9c$QT_o3Z_NpCoZhaaN`dR<P{bI)qFGQ*712Uk>}
zF9~>ZGH!@9@G8>#;<X|xPKs(@2i-0rxYL_EHwQv<mzlg9e63sUF%NmAPkR5RknApt
zHx;3mUaM1<?(9xR_!%C}WO5guu#VguUR?E{!SOPi`rmcboyRS1J#N^Jv^>7!c-o*J
zdUo;y!-~r3I^Y5G2+Y6qYSQA4IPb-l8gClzIYP^~#8l6+rOtLHXYN+>gI)3=fGo|b
zS7FPO6r!C~Jvlw#zo2P4TTc!WsIU8Ue-!#g!p?mk*J9F676wBAtn2M9ACYL@|F{Xi
zT~TSfMSjkUy?|MQkOg`tw~KJtIoDc=b)7?o8VG8hf@k0;MVuulRh)yiF#<B8Pd=T-
zf}_+I-1(W_>a$b*)M~!Veq@)n?}u!#fCGwHO?8?kpj>5_-d^`cN)n*#s|3h7AUpb(
zgUBy%u+f~?8p~eQUfc_ej-{d&I}zoFFQjfiG7|zVO66)mTtg0b5Xu!mi!<wlMt`$K
z(Tp2y&?h3Hg{dDyd-~+-Ot&q7zC-EeSn`98Q_LdQK5ugV+yM{Z6BE-}G2b(ATL#^s
zrLUbTYInYG@?(EzBXfPGbJvMYi9`Afwf%SLc%|jY?)ILxO~xZ;G+n&5`t2@&q=7kx
z@fl!C01DIuVQsbY7(iXxE{fADm))BCBVSm87Mlj3Jm0evG@b+EnHQQvZULwaNVMh0
z8szBDCqdSRl)048UGscqw&hLD3-Yc(rQS6fr_=T&*QvR7)3Yw{?d(3bptX9hJeM}b
zw|@>cAnDpI0E)%&ailk~A9?KE6HLstorz*zE|O<n6S`jsN%~h+zj`n&MH25amTV*X
z)F?Bt@@UU5&}fTGesRQI-qIzt%aT`jo|A}9KOvf#okMfE2#-40tr~j)v_dPm0i$cy
zZ-5CO_N0$`S^_~AXBh2H3<DORm%P+$cEJWVnYbKxo&k)=2WYLj^;w#aI??KZ77kM$
z*~Od6-&F*9y0uLVktwRx`xS~Axl<fj=e5`Mdy#$1<=tq>OK^zxN;}yUW~<hd&r+wp
z3g;^3ckl4mh2a|TEPW%4`w^Hy^z$ebul^KpAg)W@fxW4U%1S@7n`PvBhwZ0;G!EOU
zLZAi*72f9AvFi6j%AuJ23|Yh3me*u=rLChywL(h^y~{(5ns~XP9WzrSJtNk(I(4j1
zCl3<kGQec^bXE+pUp~%X=lP5eRa739lt7MbID7CJ|M;_E|9ncPXJkZPjhY`=>L2Z(
za1aot4flunh05n^zcNwp<nt6<Ox`R?8DTZiaBQ$B6)~qM=fcGnEov}K%cg!gX$0G?
zt@Q$#%?6fdxKqnwNC*BO^1_+#;=L+863B2RakhWqBT{dGI48*b%web!QcTEA6DM5d
zHnXou+8a=}w~6t2#xgIL7aZRepP-hExg3)*WI*>)*>eDSTbnf$;3nIb;CQh=FC*~L
z5I9OkR8j8u9h(X4R*&rhG7O+<AU_RY0(SELZ2loFb0iFGRIry}s<&Owy>)=z#wR>S
zfwJ&clKD(X8nPycTd1|gtCl>&47f%2rvJegbo$vZmhxpy%M!2MK?CJtGu{klJDOKm
zgkkiT^?1~F$t+zpIIzeOL>F8q_YmEV@wHa}JHZ3plNi{e_znR17tqfDTZ6~SGrtwS
zd^7isIHSI57q{oaD<W3yN)G8a3$@jzAuoKK()DuCl0o+Ox`OIB0P-C_N}u?YqSU_L
z#{FC$vD?eK28sZ{%3sGEMbWAWRe`RP<dd?24+l65QE22gm4kqZk&!!jnwN*i9CF^!
z@D>JIkPq<YxJfug=x1X}dInMUm2xi4LIp!x@@MTw5|WoS01fSW3Gr22S^Qn9s9fBn
zzN6h?wxx1UM^jRh&rv`fcs_0qx>&LXQaZrLbRO9HllGFr#mSGBS8tk1x3{+yGC7cR
zIe{&o_|khAs7{9khE+bM#C$|NywUuH9s*f@|G1~IsE$q?`F!pO2tgKJif!Q(b+`-H
zfWW#Pb(y3|aGmd}c85q{-&tQ}@XG@AV)*8`=qDPS-}@q*d0(y^ACT+({FA5CdZr;3
zA1yvjay#zC8Y)j&U8N-^3i6T~U+t6cZWG7joxvT~d3ocDH#Qb&UrY&n^XB;xFBX>8
zyCtmQP(JN<L)VfOD1cWSGGWxgXLhA-q1G+_j*a1bJ{Gg_bCxP1DKdz%j!zzS9t}p^
zzQ&NuidoNsw<Y7W>WW*Rw52hmN0(<8P6H<{MUf3P^jOQN8!uMZYYV>3LVf}bvv~WK
z;IJ=WzFy2uDL)9GEXPTA63`;Q%yTmU`Sn{bp^gaRRRq3TlEw>I96GX+Vft3V<RX(d
zH|fB>MTP1&3x;Eu1>cWYn(?K5?+kyL*A|&RBz)$^j8(c=>OucZP?QH{H)4vp`mxJN
zZh$xi@?5sLrgN%f;vuF*ZTOg5qhmg8ii!_@78nq3`~S3_1U|i5$1cQzfcRfuuX^E0
zrSW1AP%2=8VGRaQ^_W&^EQ~27B;kJ9fT$v!d0I)|gUjntwE9nu-aqz=T>hG-TEk98
zUEKQ4?HMngY1*5j+0QjjL{#T(g{e;hlD!3{6yJ*2L?+yZc?tgQ++yhV4&*F)rrPct
zo9`gIykFnINK#YXr@V}f<B}xibwA|$t18nyFY@sHZvjjEw%xzZ7iM~6Yv_UH&KBLl
ze}7Xa@)Gj6f3|32OBIj5te{XN83=z-xu1<?h3>b6$a*c8F(CfwC2n)Gb9YcehRZ1C
z&%boq>}|&Nfbq!Glrf|Q*1Ut1%U!&QrtSwqr?h7=8?CiNm*?%@^SXN>dpmm*wOvb;
z?eEU@Nd_|<1}y+J?w8Q>7C85~@8wIy=DtX0I0-paABi6mTf`}x#Rm!R*Gdc8M)?e{
z(G-K18n?esQ*BIg&16vrI-S&;Vrk16U;iKn=AjSD2fyLxhaEyd(hWLzRdux@_e%4A
z2S~;(t_*(Twa84(mEqzq;dsreHm(k2k)%$cC1W`T-_P4bJTKgR!rk}G953;(F5EHC
zxXx7`&;P<<^&rKB_)%p0tBJbBD!i)ULMeU&56CA(>Zt~S<eJ@o_KpykYeN3f;%OUm
zwh%g|3k{_7du|fp+&Z81d)po`Xt7V?>TW5xahHfN+J<+%&NE|N@53xs;rY4*Mx-;@
zji37dtagq>nSz-wQXjHjNrAKa>b2~qVHZnyN?VZC=XQZ*s6Q-16iH_7f)@6}tD*Oe
zEO3p{f9UkaC>d*A`p~KXaD=|3>^8iDxb>>9_F%Ksl_8AmF3rceZnU=4-c$(K^MF`B
zCVTtT*5f`<R`R!ARl&&9o?#2sSnhSHIi!-(2Fp+TQDAUbkJT?$lmf~m>!%M4Xn6>|
zeKu-tXega86xhFpi2U}djqdIYXOt;dI_kOK#k=cmn`c1lC~F%JjVn<)2c`CH!=E1o
z2Z%?yJ4}4wf%t8R<SXSEs9o_5diN6+k9n2Jn+;-^*V(F{cfLe%DNiwncur!rv!Zp*
zg68Ktw0h_7PRWKPrX@D7@?pFf(EmJ8(vy=}Z`I6nfB-sScz()xF_-KO#bhOT{$#mJ
zyorS+?#FbJ^tL<DjJMu;yV&wnXdJt(sd=UxS>HHe^fcZ?9A+%73FX!`>l2d1O2IWG
z0eUn5Jx`&EX9MB><z8N6$UrL!vA3eSf^R=@Mu>|0oyI%cC-HOr@~|wITD6{NlL*$1
z{Cs8EhE)Go&rAc3O56^Nk3RzrgGj+nB94Lghd&NCX_yYk?oR!>yK5U*Ua#N}*2V;O
zZ0@J-X|m9M0@Uc{)cebK96qi1td0HqT+F7nI}J77Ut?Nhp2U1<T`5yxI_5(T`oA>Z
zHWy8py-75`WI_6@ynL=yTHV_47LH4YqhZzzz5Ls=WlMeCwyGFkcu$b^Cp^19-onc&
z83jYrD;Qkt>?-LWncrLCrfN(8;?(Qcudj+qb%8`TdUH4V4p0ugpg8HHP6AYsj%!}D
zq24z$?036F-!pl=Uj~!WQynXfL%0q4A$@z+d}(Ec>T~$dSh(Zqp^&5Zg)|dO<*K(T
z0Zt0l>V;t3NU77g<0)`SfpKJnp<~>BN!WOGyVb-2P+5|-Z-G;J6g!WCyoNC?C-AAu
z2<7DD>SWHH{~dmK!S^1F6)IE*zC25M;+1FJR!Qf^cQq1?@}uC)zwEBR4Q=%{Eo}Sz
z2OH#K^H&9ZgyRH$X0_!sumJvZ<iWWNClk@ioAfPQNh-{7pL|PypruM~&3B^9X}kCA
zvC_D+u$%Kwoo{YWubJkcEhtBYHQ4KcwPm0tmc94r^|Q6cHQe*Efq{WG+oO4-OK%^m
z0}Jk2tvgq)mh)2Y6qHyh;+@CuEL1do5-E{a&g-3nwtKJmG}1Z2I>f=4p$u&Tkc(W{
z6*NHL0-Cm?K8|-il@eC&%*QkAjg7cx$SI0(-`G9fj;1c?;&-^0w=y-kNgrzbZ?KQO
z9URa&3`Yv+wxl;r(=IgAjDT^zrl`fidN|<lhCHAn_wpO$ad1<wk$KE}v~0$-yh!Db
znZdLx*1kFS3o96kM~?s0HrW==f6Vg~NqA)z{%DbXLn6}eu?VjES+#c8yIkWo*1}T!
z$x?)fg=ZSsZb{OrWxF-WjSG@?tOdYfJZ$WSjxLcAAvh+|p80vrZOm=lsrv)WVnV(&
zvB;$x;*l877dJ8Y@0Lz(GVZ)PDxGE-M7{#dir_j?!3@nQMxDg!)hsmk;SI8{c@2B^
zGM{Udhck@K7S0t~V&=x1aJD|u#Yr*0e*#ip=Z6*JKkD9ps+6j#=J=r=51{BBqsGlr
zWorW@BKEZ~(~THb9S6=;w}B^=iUf9MLO6MMAge7xPb@A_ws2y_AG7{T4%GbH*U}O<
zUrf$J*MxO=ea?M8Dj*Fgylr@YSw&zMnspfo2XY4A5%hKwD!fRt)`i6hemVTS5bUhp
zz|c_q-3Wix`$00&I9vLUEh>j02juU1E`sD<)$v^|wf+8ODRsJVi0^<C=zUsek8*8>
zSrk{)JZsvIjOTM3S3tFUkRf)Yqj9dsmu9(hTE=($;ex<jJ*R~$ogv*sf=VD4+E{Du
z3#1NUbNW3x`Ry$M85z^<3mK5?20(A%`B|wA)<xoU?#Q;#S9Ew&#!BpOSC$~Q;%w*;
zftI|s2;giB(k~l>K-@RkC9@G(F<%CuR9uj2e|=4VkDKF+?K^(nQT0n_=*r>FofWUG
z$loR#H!ruDQQ}Fs#MZG2CSK$_KdzFQFD7^IusE&p3s9D1j2w2ofzuGFS8>7%(3)gr
z-;}p-s_yWaPaRRKk2h7~zvq`CXlpuLoF3lPH_(Z!dpOS==L{V$IuS0pStBepHGN9K
ze9BAyr=ve$vr=Yfr_dwUY^gcxZN+-cU!3uS_qGjM5f{ze7W+$U)B0WrSjFvrk<qT%
z#Euyqi&f-D+?%n-n5@^o(QPFyd7y9V8YfddLti!M5;`f)r7O6z*VM&Q8f<r90LNtx
z_V!jMjZhD*@*a*LVqgX7I!65~ys?OAhyrhv&I83M&yb6(m7n3YT#Zxl1!MH()wEh`
zp{=!Dt7y2Kdt+>SUBC!y+vwa}RFAwV7r06kI4Y3k>_D$k6oD`ABW{ug5;(fzF^tT?
z1@@OsRe_yy$K9dA<5p*DwB15_cx}u^4^j`aqpx&RO@I=^m(UhmaP(0p7&#4aFI-}(
z`o9wZVI357D~GKmm}VIet(lAD*_T2Buk%NN=6gChS=#03llN#bkrzzERwTnEnk*uI
znlkon<?Q$jv41WtlwqVWfBqg26{OJaXob>o;!T%YKH{18(o>2F$%4yc=vpJ@U97E>
zlun%OTuB{(4R3^QV2|a$a|N?#LWQZNH4ZMK!gWh`4H=kZ;kqZGrVHhimRE>A-se5<
zZ)T%|=`N?f6%iiS^)%2rJ_Y0iM;-4DTE)hchToI4--vXq$GNoS)-JTHCh;@?F9WS}
zkc4F4?u9Q2Nk6w04e1`n>dYxm{t|SRZtm)K={SdTx_)_l;DzcA3mYM8Qab$sdgo{@
zE{~S~oCJ`odB4s_q)ApUy6rw}Ms$q{MXmdll)<tA0`mu{#;W+KeFW$g@-yOnJC1KV
z>lIdEw<6E)*Y3@m##x~qF;DOI+9++dy-`+jdK!tG?o(=RKfEzg)~@Q%r|Z`r3*>t4
zFIBHn;X)57<1}Ad{rD*cktU?168VJ{YXxum3KlD1CE1QQ?c={F)jA@*bFInvAgZK2
zjj1(sq1u>t$1y#}y#ldELBmEX=O|p0uP7)l*l$?Qkky@uQmxwR_F6xaW?5)iRt0ha
z>NK!NoHcLoGZE0N04mkUP?Bu8AK_Yj%H+L+yr~N;Y2wONUXKdXlppN`OP)$YZnJOF
z!3%O3n_OJfB7_a7a7UtY_`>MP0~X)1A<4~%3aWsfCQzL1@9OaUf*!Ahlo%5ci!Tea
zy`(x&`Ao^{AV$V)rnFkcPQSRGBaSdK&WR#>s4{wJWdUfY9{~sdvVkrX#SdU-?i&3j
zlMX;udAV+0<|`ci+S>PHjrOdW+lj86%FL9yA$l=OVoF6(=jh7F&-2!|pA0<!l3Sb9
zdrTgMq49s;pP~g%fa5POp(F$YYZ3|5;K+_c%d(i~*(&WL?+f?LLo7)3j&-~8Y?H0K
z?a*y@>)rwK30BcQ-_fnRMid$%Aa@mHaJelHi_8V-C-@aB$9dDeR68hy?SKLYqF?+u
zQK$McJ+8dA@t&28)U*vVxx~gh94CWr|Nr9ZucMm&|Nnn{G@_IeN}3>279d?JC7{5l
zNlR^vl5RmjL_k4MKvDtO7`@RY0#YI|1{)(qx<tCZ_dQ?l&-tD6`|Ig>s>h8--EP<G
z<$8T&kObMfqqL7)IvLg+)6Vh4Ap(2#t~&vM`^A?FNmbYqe0hg~9bK@$T|p(GXr6t2
z)tCpYQxSZbK~5Kqqhen12zQFl{|FQRHB^SXbN0QOYSr=GF+->Q6gB*LFbobJRuz`1
z!Er*S_PEB{WE|J+Vk#D1dVe6^yhRM)WQ-AcOQY$2;#lQ=a|0$$0uOjldZz`w5WWB)
zK$e)I+{x&PVb=F@du7}E*Uta*H_xSZDSE5-C{Oe_$m+2e`fQcj`fc=6UK$Tk=4BLj
zYw`1<cECbW6v{j>%DH6s7aFeIqCZjBxO{J5wxfg}8E1?Kg70QYI~cRXkE+f#br^D`
zr9Xj6>HWjrq3uR02Y&osHBM?ikiQW9;c3wUZf$uvrAhMX*v!3aox9@=_Zmap(9VIv
zn_sBF87NCR*UPRx=!b3skVQatSODK(Z2ozao1-lAr|t41dOOE|mn2-Ct9g4Mms%KN
z^8^?AHt2!`2Dw&%g;DR1xwUf%m#ijKGTFg;7C1ihlZhVOt_@|FIc0N_1lF`f$Y*0i
z<6!+|C&#*v5Ti;W><ai-z0Vkj7237=Zqby23uAun3)EyZL+F#s0MF!m8~q6v%NO&7
zAy&iEW6lw}TDNcSO?&5EtC_xt9tke&Iz>BIl+RrUogEu`nxMhv9vE0PV>uJ4woyZ2
zPw1`TxBf?@Uq6yv#+4@)Q}Zcax-vPdbF30xHUS%U6~i_Ywe4Uy=&O^Bgfah&`Vwc=
zZGYM^J=2m5F!_kmTD$4y;9UaBOxV^fFI()`0of$6{^Zk*NHvY7e8V#>|9h5Uw2!Qd
zGctJ>pzdP7=UQYTxgCbMIoHLos!PA3Dvlsx=~3SDQ*(bJ_&YBBi+3QqS&F<zwe)`B
zV5&~~F%+!0u{>oinr_QWi5F;0@=UULE)I$OH)cpMe#=qh+9)!@dG`p1L<?G{=NWi!
zUbLbyMlo;gPaPp$mL1B{G-p_&KI3SrZ{0*xT}8RlH~U$1G`rk{QmGglzfF8b5?{_I
zWOVVP-d^vdYkUX9Ksl9OPD+U65kM;(9HKPdiRE3rFkwml-pe4*5LhQ$<07A)b4f|C
zH}TQkg3<5QC#7V??6o59h%tS^)yX>F8B=}79FDL@mMxgx_r2?80&IhcZ~o6rAj%>q
zi5?#_Sf-w5y_(kP;}ccnj{E2{rj7gtHnGtCHlD`4pNc`78G*Y~L>~(Rcgdz-s`phk
z@ZHmCt)+HRp|6UH^2))#uptxL6qrr=0z~Rt>RW49n!4goBA&U(=*^|`#$Q(4rqKlY
z5Gw(<5!-ur_1cbA6&*x2T{Yjp+I-wSL~TApP#!VzgvJB_w+P9t%c<+ufU1QNz6DY#
zk~~Ukw!C~o_tX_4i~iIE_=;MXFk*a}>u~l#NPKMGp31_r@2ZllKq#<<6t@DZD&ex(
z{_(`_`m7{h%ZE(asI?B1$<N_IvoRe9%2`*;iUuCd7YFL~^;figmt0RNJJHyLj<OX!
zE3R27Qy1=~DV(r7My&hTawq1#XZ9^8{efY=!OvvtoK7Ybbj<uTcgHtl-PN161^}dN
zqGp0#`EanaMB3e66gpnqZk<+k6<XUdHm#y;T9S&-&rs}v!=;Uxz!DLPE}xsi(tM`j
zSx3*Ab;*;|CZmPjlS8+VF`YEliFeQFLIWMoQO2((7^!^%gPf6^AT>8Nc?)i?xyc=e
zfs>+$-2W&{;QcMOKae8xp$a`C?)5)33;|75w-Yfd5&PS?b{>KL=qqL-rj5AeuUzjl
zibZsX&zr_|SYT-9IO%v7_-tYOH+&n?pk~GieR>jZwZ}X+t*Tr;z@Wq>lK}1HiD%y;
z_IOo~69HZbFMt4ys5~SCBIxINqJ9j%@ZtQ2*@Ktuz*Ouj^9Be1M<-MHo-JNnyz?;=
z9q>zbe;#;fa87M`8Sk_x%YCdW`u8Lm;&SU+P0538Bg;|k88=tAX?bAOoD-9+d5k-8
z&pY5-#f}nPDqfqee2K$7s8zVzPSsO$?u_oXwH`9CH1*v&0nNJc8H+r0N1|7y%6qv6
zK7BO<4_!T#T?J|!ESuWpldk(O;i&(H@Fg(HBp>>o$e3-ajzxrSHtl)0Oh)fiZOky7
z>W0Ip+E(m*JpISKN?prcAj7YQZoj}78p-Ea<K~J<z=GO6z#<iTo_Y8RN32bdaD3HV
z3uRUw&;pIV3C8Jet9#)~Cbc>f70)lX>v7(Fa_fzFt<RetA$ML9=8iud7+}j*vN5BJ
z_f<YSDjvD_9?A|IJFN-du@}DhSDUJL95v!{qXuV~$M5o`V%J|`7EXWw)L0ee&-(?4
zFGL3oHd=sq*#-opX}tG3?z4Kk(G>LHI^Dp@{zLBL!#(CV3hdv(tmrJlI^#G%IrV7*
zstYI>3=m-I>yo4N+18h66rrurWn0PTvRREy|D~?hEbT{ckZpC)0moNMuKW4Q$0c>t
zEwTItTU2rVh*f|tb#ujBYzsovaPOry*2i8oYzF=tFsZxsn|9B9sjD>DN|Sp(6aK_p
zMxUb^r<XPd*U?7z%3Y?&>5%r%TurC1=1EnFX(K-fewtoGJymzx0XNU0f{NW<CpQaW
zYBwd#_xafj^#TgR{ND}xEswul0l6slu!8*jn;;hoQY<|^@2Sf&q`m>q;i=AYIWQdB
z+S*cT^+)T&`xLUiWgRL9Y74fXEQiKSHXUU7cIBhMLg+Eu7BYB4%j<Ew;7N%GZAdV8
zzh?Hdk96YG>vi${2_CF{5zNh&1nQ@4N&G?wi#N1$j4}o?88;ZI--r7<oof&P+kUOj
zP*w2*FXhcYSjL}9AJo2RsOoJJlfgx}H%>p0_~z|=TZ!PBD)Di!k^UmswSY&gngdz>
z!)fsrSLaw};jl)7td^8_jkGOjdlA{fvwg{!=>R&mbJm$}<?qbK>R+kKxR3W%+acnE
zOzZF&(Pn@N0PjkN_3Z9^zpGcwc-rtb0=;NvHuVpF`Lf(os%nF!3Np@#-rBd}e^l60
z9dbcSro`cK)1ygXiRZBf0D(zNoYoy!Dq!KrV~r{aI(}u3g@Zd`$??w7ZkLSodWdhn
zEhOIYD*3nsXkfV?&Sk?|0^|^Zl9uv<?S_eODCS}PNHn<29sAb)Q*Xz5SD0J=E`CeG
zUtK>A(@I|t+WS>W@$Gk>o`qc~{C977u*P6gSLZfZ)Gmwdd$pL6)c6A>V!+>TUfY@s
zxdAJ?Jh!!?^3#{VHF7)Axvf9L9b<H69DfLS1m)3@mKm~Y(BHBUjRdVg`PiMRb-$e}
zkZgI8cfFzv`yY`_4(EFJ6tAvjNC8!FU8aup$#qDR^4-aMwDSC8AQw0E`X1-k2!;Qb
z?{VhT-}ae0M0=2xiU1l`zilAd_^;<yy}!7@twXrv=Dtotb<)@R2Z>*<*J|r{-7GnM
z)yth{lpez(8?Z7vK7g3RAZ)q`PpS*71J73bBp8c(yDfYYVm!ZdV4**!eeM;(xS@6(
zO+O7R9a*zNB0ZTiq(JOnU5krF>1TI~sSxF{-YH4jrQOY}i4K{GnrPlK?=j+|MZpfs
zyuG80GsiQMt21tEF2i>f<w`iTKMd*xk7LTfO^UApgjtroer$LL1$v*G*q?nkx^=B8
z{Y1amT(zn$<HKm6>k0NJAS;LVZ~I3wyzqkS(QO&KnH_=MEmXO^qXV`YlA$n8<LttF
z99kE7(lG}Pg{{4r7~(Xaz`ON<Eac^Z&ldrUKzWiw|Ne2K^_Wv+9beE%E#4FhsZQ}0
zT#qA>eyP$=2DO?i0HZ;ym%)pW^~mq$!9zy%li<DQmO=bORh%jN=6KxN#Av_1^oM78
z9L+`F2EWB!1p9YU!SlcmB9LFk4BX;0uaZz1`0;N(1UkZH@y;JzOn(EH+<iBhR+pcx
z=UideEDw((^97|vev_>^Z>yeL1@{dzbAsv5y7+P$A?${Y-_5zv>L8lI4?Ayb0zam=
zY-=aO06>^JlCuKmnAH^$3rz_?U21#<RmPV-GVUwf>nBHoTUU+MEp}{P{kK(4PZ7xT
zNlw{~p(_pZN72q_4rl2dile}n`SDME7dRbrR^%Jd*ei-mKbv?uz$9S2pIWnUGn22J
zr5U|czFHd_2zg%8Yi_0+=(ErInuK_oWH5ervMuGomoFAB6VJJPR*cZ#<N3LFL_1aQ
zR)7gz47PSUyE4V+_~C~I(C0P(tzyo1a`ZG`+l}%Ql?1ELny+&Mksvz>f;*I{2@)pJ
zANy&_B`C+*Ez6>UYYdjI^^YK1-TlUg*>B-{bdgHY?OAm7aD6HF&lRZVIT*Ejy;Nos
ziL_wQU0KUdHh$=tM+YL;UpWEY!_xEEim@O{ZP(;P{cPy%9mr?9eb(r?Or7c35@gD^
zl8YP&+eHp1{KPmTI3A|D@7F{=B|fXK_)=1SCL|jY;)Bk>^Wg1rx?c2Ru#Lv54vJ8v
z6&Q0N#C-=x=(l^5d6U(eI2;VA_9emKS1?{@!3;J8Ex?vc0WJUpmVR_;7wY21anA}C
z3wzw4BNBzgK%}o*sM{5J^?c9Qd&v`o<&)*%vRm>~)^kem1YMz$_o)2j_)+~7^SLZr
zw0+}-n7s3UO3IHiR{oetr(X$~<J6YNM4@9ZAAZSD<hhgWKtwH<y6Sb~$jc#ufRP_b
zeY4uSa2RLpe41}2THh{xN<*n87C1cuheD-H$y)|HFQre5Q1rk+sHn`NAZg6O!9e(g
z7rF4APwrjFr-Rn_Qvt6hObBf0-uHJSe0k4@1;>YDBwoP{48&K4=wg$2u=Hyfm*7FP
z8&YVa*LQV_gieQa!3g-Ib36R2!J=!>rnrut>!UB7yOs+1LRJ40H2PI3s^$DPw{nmM
zgFW!Hy`j>o*Ky>Kf2J}*nF{gG1p{caj)i5(ePUzt-;SQ)Ty)LHZtlqpd>*PKGt^ue
zK(k7@x};z3WCFnN#S#uN^vCw~QyikD4uhG{DG(~*;(sqr`YZ}<tK8{b_HHcaMuikQ
z%S!#-_kk<viHMY(u8BLZxt-@2|KH#a#**o-+UYuZEYx$*yGXYI5NX}K`<E;#+~#RY
z_eVn5^<^86tK*WGJd=hCkoH#(dl!Wox4Id2$I0SjL2tiKdAECm{m6@vX1%B9On!d;
zMaJ;eg#Mu+qXkxe%UnW)KC6K0$XWzpU5@?bG*7LS;Czcl{Ax){;9qt@Jp`(GQzqQ6
z)U26QCAf%EgI&-v;pCGO@u2pK*yVQ>$Kn>B&$j-}$T;I!Zh(!HaW0>IgWuO}GAZRt
zk-mHRl3cZw%}`_G4X8LagK^P2A%4Y#Jj7ILaQ)+>2noUPSudR{S`zc|FSc))@(T(B
zAUe;k4BSY!O1KEWb+lKOn1iASLjwWSCsW*rYIayhm5HFa^01uib9+&&GLQe-_sDeh
zZy^Hu(|w^KP1USx_5Q-I34@<j-lfR+tY6G%GnP(}m#?eFrg{e5&Pj0|0E;4$KMb#e
za9o>6O-&sTv}IJ{T#4+vGVd_)T7Xqa`d6%Qz`xcA`oLN*<S&&n?WE4`-JfJXkyqa<
z5{nnN(88OHyCd_H?VI*P++H{bJjVIg+M2F~G-3)LHS{qe=gFeot>PVMD_WWlt!h+<
z)xSL@5*S33YJl(Z4LnV#Ww}?v@i=p0IzOQ>6Su0F(AaezZTB^0^vJaO3Ey2uXs^!=
zAPGEBpwlvboAP_$yGvE4wkjcUWxKRYEvMSJeqj8&JSpyUbF$?fI+lmPWTUrsq*Zn2
zaKI*gz7W1(`XeFXL7>i5*A>RyAN`C<zhFwTz88OYfQ${Hy|8#tN9T6P-<ziZMtk8#
zgaw%?-h0o#bjhgDyW0G#7tFIn4Jz{W3%tmcC1WU>ZYEp0f|Sw3{+$9t3F@AiACBh{
zewS{M#WQVUyPc?+=lul_WY~PFCNBTH$I>t4Q`f6`1STd%kw2C+NRigK1PnfgsxASO
z@_JrgCvM506Z$oN&c*m=&4eUHPoc3#_JG0gV`gS?wPK>rniah@by!DT#Ch4_I53}4
z28&W<m;LA9Bn51HOkt&vf}f6fWYxJkQ{87>=4XHi{_^N4&6wEd0y2hJh*z=u0VRx>
zk0;y;6r7AxuN!J&O|8^GN5od<>|j3xl$DF6G_c^8ZsE+(m;Pox@K#`}H?bPIQ>n{K
z6M}i6`NE$qq2_!Tuhf@9a}_L8?P6JxbP?KO_`9q9RSkYRSLS4(X2401BIZ)~I-Y(E
ze(59CM<bvJx;r>QPA2;sUn|S7F}>IQoiTx^6XRgoU2rIOv6umZTMlW1&jD)`nJgD5
zfmJW4RBfwn#EX45LTRFxzQ;x6M(t34E(`WU;$b|l&(74rMN6UUe8X5(Za0&%^WjEo
z9P`>xor<+lW?5t9D%>IlaS<wRbC|8ZZ9``Hnz>429Xha`HBp-YfKy+K@mi27`%#>>
zUtBrmO=<F2zlI|8ODJDf)Oz$NPX}5xy1<rsG{b*Cfa5pkg|gWr_){<(TM2U4)Z|EB
zPr<61GrvbYJ4Q-A6z*;va<y#NR*3U64mW{>tT=(vywjsBVh$wn3pAM%Glgtnv795%
z`#1?(`DdwfBW{Dal}(qPgMv}D*o@w`=Bb(<zzML%EwjQ6Nn72UZax13C{Mz%@iH9g
zb^0SeI6O`-?aVByQi4?Y5XuM0tlNp<pK~HCc*=170_pGM`LE-W%X|}`WUOaMa;)zl
zPH{jKI9H^Cvw*0F4T1>CF(y2uzQ^@<#xV44KQ`lMBl`_H?it$oE2_mO$d_SNxIgdS
z*s%R#T}96L<eK7ln5e;&eim5kgHYx32+@-Em6T!Q;968Egett(zGV@iHM_x>k7@PH
z2^%SIg0D*tjGHWFNwGF7oBg$3Hlc*Sw5YKB7J*|91?qk)dE)~!!>er;D*<trn$G7h
z@&2h4?c80$M{_bflm#c;baEeemD4^D+CP5Op7|4T)UkVCe-F_ECvJmhS$VQ0;33CG
z3Qdn1w`PV@AFgp0O|T}mYL}3^O7)*pqpUZ0z$FeJg%q_x#^nB&WF&$OsI>01u)f<B
zx$7TLjunFkRHpL&AC^MNpEH(4189T9h_J2OAW+la(iQRMtZe5$=^|9<A|Z}61Qwmz
ztpYGuTR;SOI-wbUz_86<l5bTKELW^4xAtB%lpX9GE=$iX?j5aO_5J6?;$}H$^?^M3
z(buDC1jzs6^|&VdNIKzi8>U_9sW=2?l9J(yfA(*DTSEGd0U7f*FYM&u*MH{zo96dr
z)SItB5QT4M&YgEy6}nU1&;Z+{b<ujla(MKqCsjG&!f42KPQ~zr=ic&Vv4Z0h68s6^
z;y=6wf_37b^Yumf9PnxX50uE#`}fR>k*cJ_>%8$TO+T+3&42EvugZ8i;}UsRSxNgU
zq5+ZRQrKG7qjH0?UhKe5rLbJsG>^qhvv`3`4_x>8T?5Nm-D|X(+AWpL)z#Mqj%(UV
zbL5auOX_ipSU!PS)-jAM1hyiJZm_TmE0Jj)mZvipF|rcQ1eOm`r(yRmcl^AZYL7tH
z_5o!tIXKs}iW`&50SX2xYNCGXfQZ|SU}<hUcVWxK>RKKoK}*EH+g)LNVk!XCg}^r<
zMYDGyXyv}HvV;FeV+<AwEU~08x29#VBuBoQVlS-3B`qp!{NnuvZhU8UqSyQF$Wm5)
zWWdjQ3M?B7sQmkN6@p%R8@E#BgA+x0!-Uqx8G<TEHxV=vQTj{6fiqe!w3CwJ)Jqqc
zU6S=QA#KtVaM<|QrBYG4WiaL`?ZjNq-23slI;8D*sZDQdW9vh}EqVTL13?f%6LIDG
zo0FB${t3sbZLvm2yX>S}BM8zW`Kdga>Fns=MNh1qcqbE(z|;>rtcs>eT$P_Wf%>Om
zrsj60QZkzuBXz<(JVx>&SW@Qy8yWVcauk_zBRqQ%P%Zs`0qdO|`y0G84c-=glE&N0
zv~H`$0-lwy9JM^mQ&jR@JA3?|-QANT4)$Q7DyzdZXqyo!=B7(|WTNyV-iS1UV*`t?
zW8L;dPJc`NLg~#a@m_|T@)$S8%qE)>I%z}u6}Q_{{T4uC6hTB+J-5x_THJgl{Yu=<
zuGcf~&}5Cc5b+pUy8~3!-9Pd>>who@BO~I<F_%qb{VM?RwDE|grs3yX;EEo$>40F%
zxv;oP;Le0{v9iX1A)$O7=G<hRtF5<;orurqH({(4e1l)?+5k=__C^6Ri`exAXDjVx
z5I(U~XiC0I>L6SfOEZ~L%iSt1Fo%Wo;?xYhBPYu0$K-XkZ$vt*%-~T?4m)|{rC*ub
z2qj)K6y=FvxrjZ3!4tFMFl)wE6#-)S%n~?qCcnh4rm-Q37t?>RgCaDQ?Hq&W%FXWD
zy1lNFrj;TiIjw9CCcd|zpMcqLvmgT$OIduCI$dMTX%%IKik{!fmb`B3B^KLXR<sa3
z45Xx7g5jXC;%^pj1P6NVdUBG~i-Cv{U3QQ-Hh54qz~GCdyBDAqHP8O|tGiNPbwbG?
zA#Fbmb?suI!nfn*?9xBSADIVGCVEdEwPt!KmYbDURbX_n2QCVF4hZiiz%Gd<;8D!v
zxET~b&M}X3hYny~@47nu$|a3<jvK(rb*gBTNtj7`Kz8sK*k2^<F6GvmF&}+yMHhQL
z?v^uL<$dHjzs220rVRHBJNj9jrYusq!+URwqw+!(L-_um+M0q_MOqI>)q(s%>AZTV
z%3Y*PaWo#G$zSUbE%!-BgoAC2SID70i|E|raOu&t&k!x+b#k2tP|=G0>O_)oM0kg=
zk6{tdN8D|LHtmf8wXNdQ=wIe>quvM_IMAuVCDHk^VD-gQ$W0#yLIbNciNW`_>#-i6
z@>5d#VTLYjDT!=db=LKvYD=Y>i?1YNB^ZpD-}R+^RPKNAe9k~;3z3MlmggjcoGwT(
zR_yB(`&~6~@fG0jPQm)HBxsIN7e2s<#g24o*0Oe&G}KR=1<%6uVeHMYzg_0eCuZso
zTs1eJ!l3tB8<_UdXIgHr%Daq-O1XT!Ot^hZ_P6M`enoI3hGsp4@y8$g!{0V%LH1z1
z6^kV0sLOj)+iL2VD(3@wte9B{Rgh>Y5Eh(Ahzz(q3$A#JCJS^o5qS$_LZ5rx&NUX%
zlJMpk)3+}hY)@ug(UO!8*}oZ+<#Lrhw!i~%r_-wX{{3fN$#hm=PRoOrj+$FXS$dMf
z9FS{!CNtH4ZI!|D`0C$vKbsQlKz9y8T%_Ro5Bp?;i_-K&pfUMvAI95P-!T^a9C7=8
z0&A{GnTxjs?z>5d;>!n;ASDViyH_8)czS)(BtSjoMccjVA1FKwt@DpM)i&Mti^eVH
zPsvf%oa!wh*+3F@L2Aujbx-#PXgRo2-@d1=4H|!a;x)pYYGS!8soSk|vY4{)eg)pw
zu<iegWxQP8t9!Bbli%B)qx&@vhXg^esIK#jKrF2kRdr)C0zcyYOZ=q(RebA#YvcLj
z=aJuDU_;Ka_g`XeK2zuQ>wXEtrT*B;tCYxZ*3V))XQk`|HpMZ)KQG`f=CtJfN3Jlw
z<)pJru)dHow2e<Vjw*uDfa)~kN4UDbff0}(Z~gG&-*&;f8xj1B)b3a3B_nkz#NTtd
zdM!j$x4pDqK^Vw8A?CmArP<C5YnX#2SK4Ps6t?l^nJ7g5)MA^>!~K=WkZpH{0n5{7
z4^zr2*-(ChE5D+K0Hipj|3|<GW0X!e1Vmzwz6lvMZ^fO(fAE=3&d*3)$8Vs5CAv<Q
zQJT7VAPlpBD?i(~8Nvs?Oim-4lGjyRxjf=2^vdfS<b#K2iVT=;lI^O`YMl#MQe+1i
ztqk-Zu6b?4tQZ!xyL>m9c?78DnRo9g<k9_OxPSjHvQs+;QlfD^ShI?VasZhC7fV&`
zE<4fmj;xa)X4dWDGYA5XQnMN);B`Qj{U8izFpy5#FskGhoF}tZha?mfH4x9YUxv~@
zy1-D5CsLz}?a$Iu@p9oifU){Vs#N))tdH&0^O51@CE_db_M}t#7F63n@j{O@B7=~r
z!7JciX|SzWk;FsnD9H}1H*n9ZJ=k7}-aT%>7^M4aCvH^vN|#k;e&xA2nD{h6UpAxI
z^#h(`Zp#bzr*!nkDLf_aSLq0%PqS-B&mny@=clQ1h%}{gTV`x8QJ~95DjeDjWm37z
z_nQi?ia;w_M0UZy6>l#UJ{9n{4?UOauRH4v*bM0=t0w}e@;O(<H!q-d^dmQV;G$_q
zRO~c1O%nAa8>pEC2h)hNO7hY%&4Y87U(;0GfKCrNFK;zTqq0iNQBO(CO(}#rYqz?)
zbt~k>6n!6983+pcRj0SYZMi2JcqJnzutr>9(TU&w?p^CTeBH6^Uv}mt6NU$MP;))!
zj04e5(2S&<t5*pq9biSlLY1i|w;I?UMQ4cGC#9xdfE<tn3Jo(yS#;yPzT19%(!3<X
z>vSJ;HZ1m%2F73^UluX^9c35}j3rm`2_njb`~6U}JR6fW2oj8~{#YrTBpwK?LLasI
z=3%RhTckN^@>u&hZ*(s*EWt^^&NQ_fKE1g8C$K)jFVFKyguAx69xgThhNL;>mF2CM
zy?eYZ14e5=i@t0{5|dZ8cG;2^ILX_(@7#;jGv>pdmqBe+z1uQE!;y-d(+}4|i)!nV
zR4qj&`!$>Y1y?8c&cK?2ApJ`%%174kPZ-(wO9jU!)Zldq4B=ip<&v#EJ&(*HO_@7s
zDKQkmlid!T4q&eE$$HXC)~{28a5n2(-43-`!FxdhR0-Y;qsL>@fGI0V%V&;V-0}2O
z-BBH`erQ;yt84YRsJais8>nVt{@Gk4C^mEYzB7`s3YBKe6K*f<Ph3!{Z11WVHCBed
zB33f8V_of@?Ys=<_ksP0>+lB(TElh^eOciLw6sEx*hij-`cZ)&$;Bp+E}R75Poe6i
z=0`5y1IEntXz#K6UMfn1nz|`DE&FIr)bMuruC{MP9Kd$d<<q&S{0V=L*`LhJCWniX
zrU6tFW_yrPOJ{%$4hG#+H>IT7RO%f)JlH{_6X=ThFaN%4(GZY~`OUJm8=Uu(Ci`CV
zEWmCAK63SQbLbpQ!j*eEDhx)IH32CgxQ~aIN>5%OW%c5kQBv2Tfqco~v3TTHs}Jv~
z188z*yql?CzP{8i%9sy32goz~y4CNFgS8|nC1qtxMyKH%;}RirXgMNhbtdpgTZW>>
zD{Q{g*w8pf10svv0tetvOOd09nTN!2W36Oj+KQ_nG-fbn$hhFi@!t=us3@3>`Y=c=
zNQO=^QcS5~+hQes@ZJT7m#*@{;jTy_ULJ+bv@R^PO;%*KThD;4zom2I_qo)sb#wuq
zo(9}uc!-I}Bx(Z9^+_HNg4OQ8ls{4)-?$eQ9eqniCK?z@<!ebbFWw5W`tB$<hq`1F
zlC=e;Qm%^Hw-&uoz2Rf{{T$8c0E4OB!RA6kvjSMrBeg98J@uzl<fxZjrG~(dOfcnh
zgs1=4tyL4uef?RHYOwv0Vk~|qQv*TBFkr=m2g0%NWIfr<#Twl8+)E6&zMa1QUg@SO
zv(kWZ?v;ku2+k>xGkbahuaa=b00RsBpne|Npy2CDJ5S4pLW+VFKSqv$y&Le|(p-GB
zLwR&S@)y-NlO~}S_LKMZyTGA$rN)xkzDBKsTl|%Q&(`x_UY6at@aPd#>BTy5J@N<+
zAahDFb@r28DRzi?F&p?u!p~XPFTO>LPDoYx-ah{)@na%*$gxuM-&mZ~yev*g`Ua{?
zD2Owst(}U{$0*YA@$mp*lpZ;8E-l#nfLf{e2uU-sw`6NNK;JZP-B<+OB4@n<D`I=L
z)1iM;!R}H|HJ9&lPqFJk&cj=1F8G?xi6y65L)PMq^y2IK&#VE^D0>lYDs+rReI1hD
zsi)Wu5P3{Z?QBK#|9UMpRqSz`$^1aaHa<M=((U)k#)HucJgzJ1PZsU&T0|>eHXCok
zLQ9q~9u6?63+go)bNENtQ)5eopB9>LW0_%?vBqr!H+)y+cJ)_bwxNz3C5_IP3~Lgx
zwKp7ff}7c2rtL^lDE*zP)tU6Qu~tf86wE<~bIIO4jR%m48pN?%rP(!w)Fjxn=~z%{
zM3tMe!urvtn6yI9TEnkWQp<)pwJf_AVHt5%vseAn$%F{LX)LJQ0%z4U1MX71Q<bP#
zzy{bo(ef7#iDd!;Z>8Ukp2iYUpLiZu(xm#}=qw=d7w8=dKWwZ(weZsR$kGwFB%e8x
z45*aHkKf1KYjYvC><w)P7uHu8>i@^*{6!igZW+6&%8T`kz%M9XTDs9^zjh@FJLp;&
zuLRN-|I6TQEbsAK;3ppCshT>pn8H*Aag4S7s-vFBYO|YCfj_Y}zcA9n=H?w}Ofq?-
zH*HnM1xh6&A^(*l3PZj56ZfkW^I<WMgK1rEOviL9DGoTKKR`8uLLK#cHL42``5J}c
z!O?Ppns(QRT3z79WVs2wbA87;StXrx*b&WQ1Nn1G5bh4~IqOG8F`3l$hW9dyVur7t
z4iD?>?2MwD0?j@tYHNT_hfIPzlOx1uzR<~I!WgayLITFKJ>8MzIqC;gx2^WayUM>J
zECxbe5pcW+nro$2_HV>v{ic$U7P~GSQuCRC5@zNwczqVi&LX5p$VVr^1Yvt44lAGR
zrh`<({9|n}gAT537@cV$cIRKMv3+1h$;El{=74o(&Vis4S~ASET*(u-UGr?=wquqK
z7i>XF_|hBbbh&l)<6zil_mT#88Ssc4U3fd2HXX1ni)+2|(PSA>=$>xTzWDpTi&-WA
z7x1pW90Sz7G04FRiTB6vCFNTefqp;w$mUK`Yng;{2v)hrUg^HK+3!o)U|Nv&lRMSm
zic_z9Vj~@8wRP+)NCI-Yy4w2mw5`Z4bz7HYtE6{gW404<(HXCX#+3394B(QVnWb_I
z4qLO!@Y@&7v+<BRpmQI_UHqx|VTcaujqwb9OxhQxTs~tG1sWps^B<&|vULSu%lS@n
zs^%6uqMff7DxIWgN%mM*RA$PQd%_W0PNQp&&|QR;WGL#?2*e#kq#Y(0FAbn4M>mrT
zAf(a2kyX2<zqX<HbKF}fufN~msp*3qjLm<yp8xt}<W_kQO9-v=Bx90XUs^$SkB`L1
z^tmS1<VO_%L_x81k7v}3HrMDR?tGD>cE>um#WB_=yXqX3B#CsjLy$%1(SwortU;xe
zwzdc0N!-7U?EU%EJVcx;b_yg>DNTbQxKLVN{_Wx;e#A<A)v|gGk546)*1?^2RGNop
znY6o%?TNlrC5xHSIc)M6z~?;1*N<LJ6x1b{Qye2ul6NNK{UgyjhozUjUuoX<&|l(2
ze;m$fFT%>d>#Y-r|MciLxWAq)=zHdVgeKd=PC-#!V7WR!jz0VlKNZtnnS(09YA1am
zKa+Z2lxV(f;v-j6>p^Aevmy&OT9*zPk!G1icRZ9cVQPZBO-_DIvkHV$0fOEp%7)On
zt+{0ojVz3)`ZS&iT9crDSS!SD^FNg0!CJ3Vp!^Z?c#I|Hs~srSSbvL4b3UWF*azha
z%foz4gWjXDOqkhza0MGlgqKXfY2x&}mS`e3)c?+#4tsX<&v*-q5BH-I`j=_SlO_x4
z$D&jD>N6KHGd?u6h9JLmS@E^#3ih4l9R41f$|lZM`2O1Ozj@pKGOqe$Pwl{RE##~c
zqwK6ZQh)kx{S9~FW$EcJ7ulc4Klf(7S!#7b=?9g;*0`fob9!U|8WIE^mxCzR2Gx)A
z?k}ygobTO<5|Mgyq3Y4?(*?#LOsjGEMZVHF>%S;IvSae7fiha^L%BiNRnb?DSG(W3
zL<f{#`XCu{2pTSIoiBy;51I`k({DtcJdB))V9N7RN0}iiJ^WH&@Iq?X9?-~^Bgcyk
zME6vo6)@S4qcv8<uA)WP+Yw0}7(z|QowEXKO1q2uHuC){_l<OWIjzxfG*JSGN3^P?
zd^hnf|78eg9vYQlaIyvoh9EGv^Vo5om4hbCqZfsn{(=mYjuP>%PW0%$DjFx?V=q0_
zrFZ_$s=AA>Zn<2`FiP&}cHK46OV?%*@Ia{>%aJ+CY3G{(bT*wJL=VQ~uksgi#wF1y
zUNf7{K8Horm7{WV=1|fm?(B@+ozqx{K@7S6BrboYtCYy?4{YF-^^&9UWbOk|7Ucu<
zj&y0-$O9k_P_Q#crN3WA4}=Ju=aEM6JLRJ(8f^=|T#*g7xR7^-$e86Mj=X$X#_tdp
zEmFu+XH1FQeH5OiZ1C69gew~Hlabt(%^>nh8D79$bb3bc<Q{0R{RI-Rpkp<&<k2Fd
z!=N^L<r0GHm95N!?koaM5k{vY+?sXc*6Cb)K5aTbO;c;OD;Hm!@L6ra9>n=ZJ5K9~
zLm3<LgjHxEw^lEYHTq!o_bwEDR=$B5ebEweQEI$tq}&wu;63j1B@jpffCY&4b^QJP
zWBt{$z}kPRPehxHTWjHaZ2o|~=>a%YFM_>Wyg>TSS)a|@T&}b_O}rrHTUgCv1_W!p
z<>~EhSDn?((I0QSf^}nxLL~tcG@`k75}4A}X-^{+1K~E$j+?3?4&NufFj)B|L}moV
zP5Dbf9UUDW3j^0QI41%0Oup)zYwYM!c9))S##j?Xo_BJ_&cBUQ&+8f4OHFHqCR6Fa
zIiGUV9x2h*3y_sM9D0T)i^XEW)Va|$mjGA@YanfHmbfN=n{6fYg@w(YqLClMq*MwE
z43!9e-+#RF+(jTQBRUNHktslhybNT@=X~hR<BO$q=UmQA_FAgOiqExPF(`b{KrAhO
z2>!1v<*2iU0#(k0Tu_s8@Hk*Xa`&CKm!0hm<o@9Wnr(14uphP4uTTA5vH)5TIb-@3
zK)w&h8RD_OqSw`0<)6IzI<~Wlv=N6Y^zQ`<-W&cEx)VcHy{Zy)9#d3wkt_BRSL}kC
zhd3gv>3FMT20S|FRP%5glw_O6KfjA2uU#*L-a|vKZ#+`{RcDcOW_5jHkAs^NU2cah
zm1(l+sO2uMtX8QkmDo)^EcG&=%%fGsk(60s5E~0;Tsr;wEeBm`KXZ{Q)(E+j`7p8?
z4xKW$q<NQlgnS1qi})L9##5$&^~>Ws5r2WnEIS_1iPDA*Ha(BhWA&aNrci*LV-e-&
zcpX%OryjEHa?cVn2$k1$%EOypVLnS*AKO%=z>ADJ(nkFrep49<vRVe<!`SHPgS&i_
z<6HM73|l23;A}Y4>{Uu-nuj}mXE*IRqVNCL6+BMqS<Eh!3NL<O-5*8QFREhDrF<Wz
z8^-mxQGXS*4#Lv+mu@0deNd8LS??JmKKA~j?FsD_VBOoujEiZ^C483GD@5v#-g<J<
zSi>EL8Mc4TV&1aEAN36DYQ`(Hg1g46FE_+{JFd?LV_?t-j}A|2o*%BA0t&0QJ&@x~
zG1oxU33QK$!9F0Rs>QE$+9T$BO_>8wdG_~`5o21o%BxqBlR*4smn!4Owf3YY4?U%V
z2&U#U^}V1i=YK&}xLL4jf4Mdaa}gyNs-Zu({s()M6|+EZy%Ks;JUd7nGs-i099AcI
zI}Ser&uP2i#j@=)3bgWrrgAZJ<G$Ml$Yjxq4r|B6N<7h~IRjhg@WOlfQU0H|g~8+t
zG>v3b`Sx!$?L_uE=U_#LD!U7{PvOcTo95rkbsNmD)p50VA;FDknK$P({&#ZvWuY@y
zq~2r*5R=aMP?y5MM)j@-s@!6W)yI*~2~#pZ<PgD-BR~DYf}wL*`IQ)nl8f)D*8}8E
zuOwc~t)(d)Y)0cAnDBgjuocs0JEs28Jbq%PtSFJinc5xb2F8JBY|KD+z3JW^`tj2L
zY}qk=2OUg@f8ONcjQ@5J^^^b|NX4BOcn5c-@j2TQCrtMSuPDWrhJ|V9|6o(344eWj
zmOl&%+yrSW-a0AdgZ7&5uCGDL2b9N@-`bh^@^aMqW2Vck&Es*NA*}p(6x~Uh5~DHW
zfF5<t5oB4sLC9+FkR1k72K)|%YZA6hvnZpdtZ0WpckB_Q><l^T(%Nn1)NZ@uBVCa4
z(HtG0EN&FKVN^*N9E}>6?pM7|Z}I+^|B=b^;YXUAKO@>9PkH5AOqlC1=lwuO08){D
zrLS`TSMh(Xdxh7`BJWE*1L4nrh|AJ<l4<fXO8x)(fdr8VfM5Zwv^#&xx5_AnrE3=$
zd)`(F^II3^H;pogPhFCVj;lZXq1nY0bSuBt2;kJz-dg8fc)}OWCbcY^>IBLlt0t&}
z<s8v}WqpL~YIi&o`*KEOs8_}tJ{MG^*;Rzvw*03ouDJ+ZVpKB|#gfI*Lxc)aAf1dY
z$k~Bn5Yjdh)IL%CsQ(LR7ud?zrVE#XYz$zEuat0HWNc<QXsuoMl}s&^`=|%)YcLQ@
z3gADz)&Z#heU~-%<-3)}a2hN9S<qF6A%mUBx2h`|2G*r8hUhWY<`pi#-c@wc%?TEY
zQx%?1eU<UkeJ#wTr+IYynvaLDOfWhcc}xlD^yq()iMuDE7{Z<!wB?<pM~E`VCK;cu
zATDP6)LlUjn}M%f7@h^@`v3)Smo?lD!oq!(PT+i-GPO5ByMZNw_>(5@@paufpq6<n
z>Te+<y_l2_>O!Nz<JCz-6J*<`;~ad-pE+N$4w$xe9(VJ%o$W=uYk(|Qoas{H7W}C%
zC<G!As>U4G=_aR&Uxx0tI4vB&p-q+!2Rp`>Md?5iYz6cI9Y_!SY4WYHKMyq^D2u~K
z#5tI16<?4ZVA+buJ0Fmw``A0DHHjsPT>YWzyyd9#Sx)21PQj||u2}}<6&A1Y_suFw
zFRnpTdZ?q58lpkKu6)?i&b}h^)UU0|c$vHBS-Wb(T=#R}qzKU6#8YcfxM=1a=XI2+
zA+c#HqKR4xK|^iGfEATqd-l!HxQ!q1eBVS#%+9A|5Kp8LCz1!v;F$lm-KBes%eupk
z&Ek*EMsqMJ+<R8|hsizXfu(6Os;YhMf_f2-IRFmV=AMj$$?y*=#>R{>#xE_nnBg^<
zFXVPaoFl8&SELTmu@9=)Fbdh6v@mzb`q%S<Jb~AiTyK<*SybT-nl}cgS}Mg2EkeoM
z9HPegmpSvz(82<hRaTgQ(brVICtJxS`$UwV=S8gH#y$Kuv1*}n0%E-fW$vF(t9Tsk
zDIM|PWJ(^m6Cb8mC!s3HTq}A0M_Or;jpcEJj<<27mN;P;>K18L6lNpR5;r0j?@jl%
z6Q-=}3XQ$`z~e5GyfEVhQ-mq8W2rNL<kEnbZm7ZXFx!fDP9~xwL^c<kbhQPl>3?OW
zljt<_0g4vYS(=A@{Bk+~Fyw@?J9YM$GKUvk7}m5cx$>%z8ulT@S1V_3CO#V0Au{K-
zQw9Lh%MDDI8(yz&^ImZ0g~>i38-5X7U7b)y*7<*_YGkzq5<(#|$LpjgO8Wr=RkmPQ
z2yX*$NWt<{6Byq9XC~&t>Sv$W&zkyMNwb^;NH-k`cF9aj=eDbkb9X~{EY(QHQM#cl
zh@j6keZZwvB6Y4*1zm>IF~|<RBSz1a3PS8ixLb#M|9@#|__e;wiI(q1wL=c5kRK)m
zmk{Whe$^V6`F`*v#Scb;IK%z&xc-Fj9)zI+BYD(qzG(DA#epsi-q^sYUQU*tPqr^x
zRH|`}9C{dSAwykOcFl@pZsP3z5gKMBSRu{V?)rLnOgMLU;7&(rxfqybO2rOW-E_UX
zh8Yy=phz^8oey|TJnAf=!f}&`Vx`nffM>d$C@fS(H|^z9;{;HFE=ai14DT+5ZdE>M
zEgN%cGo~rtKCYVa(WNwRdMy|%tbJnC#k9Kwl;h<_xVJ+9F3DyGjyzGGw~#%_-~q#u
zr$$FPfE_*z2j~H2@3esN!8V*m6G*Khw3I>+P)vdO_=zP5E=5c^gkS1Ky>>p+)PB=S
zTm|lC(ohV@WkBKUMw?MAUIps&=q4f?rU7Y3X#Vx;nVdV^>HYvx#`tKYrjGW8^Q~$G
zU}oe--F+RogJ0~^=cnqvk9WKV+Uts;YM`X1Xlw+v2-+-vJ6z6npq~U@?rY=^c=o5%
zROdaB{;|&=e+1)E@r!b%O*yZ57M@TR=$}=cGaL;99r&wtfmAzm4=A)JC|U#fVI1h9
z04V;`rU47%4RN>BWStHIJk+p%sjxakDnPBB0;Oi^0A$z8TPN4ymR^Go`1N<tra-6e
zbY`d4P`9XhIB@QKSTX8FkkkDeq)_<*!<)<pdZbZNI)@qWNc79uXW#4)%<i{^XNDch
zr?ox>RAcG=?p%>_k$p+pfhtm|fGFp3GkBlDXYrRqOo%$&SXXY)R|2wC3T#6Ls9<X-
zji<Pv%Rz)~x5Ce36_1e*gTGvsu7=yhUcUq1(yuEi_F;S{_on9!d9v6QYsVQ>IaQZA
z{3n3uurgqLD}!DID~D!A0gY*6>m}mrm;AFDR7O@TYzt5qvY_w*7fIH#JqdzejqvD}
zKp%aT@u{e;sDKf}Q3IVZ;Af*Qd`%+(Lkt;n0G{B-rvAv7zI=pSJl0e0AbVhc8oqV}
zs<j1`M_ybNrSLk0_=heh<Dg%l8sY-F)jqPmxqUnaWR&hl?^dt6H62O+%#iP(QAJ*x
zE;&;8l*Bb16>@Ql>FsY~NCD`yj*qBFH8_pQ&lu|Dx|^(4kl`~opY4LMEp0d$*lke&
zI?!t5V><93yk8HU?qr`i{UfyZH;2}7R8(r5Fr)~2RFgwVR*e2Nc59WJ(k^)yQ+O9#
z|Inv%E(+|ACDC0$u4_lvM2FWZD4~#uZO{zhw};ry$GO>WR%d9+hcpE*q}pi;DNqtt
z`i=zH-<>6^`Z`OKz@3EQvm-qm`VK3<|Bm!_@BX$sI*g7b8hwfmTRh#|j+=u+E$pzS
zpwN2f|FC!R$@;gLUqdWSFYj_@l+Jq(jeqz6o4Idp`p-tE7?XT+d1sL+z%oZ+2nKcQ
z4bL4Ew;yB5z#1dTa9$rcFerf)>#Zog5@3!#^()yN1I$(cUf^HYnHxycqNSSmCoO{9
zf@XPn!AG?L+Sxx@96T75J6;Bhpy<R%Ga&nhj>o@l?pc7fNbWj`IA|zuKdyCMfA&+8
zkDW~0K+Fp))U8mwaTLV4Jzs}|@>*aw1MEd2i#xH%)YzM|_YWK>=38Ey(ZlS{R%<^?
zbrN4cU9c13V1l+5d8Cym4Lg??{IBUgPZ_vSETa7^&ppZ|B_J>FyO9L5#>0+prPEn=
zTRWV!tZYph+f{Mhk%J@sr7c56SqSBFvXfV<%_G&kFW1WPll>Gj6`MZG3$-!1d&BKz
z<gPCjx9%!rkkBt>7QesR*!YMM)JFz_moo-7o$IM(U4Ch;C2?`}TN+tAp#^CixT9Sj
z4k*%TzygCrf{S;`@}Jpq?`CE4NXx|d==XtuTrunG+EhD<D~gZ=IgOv~O2a;((Z8k)
z2GMG~mKiSWU&y{6s6Nh@(oVKV4mEO_)3_A<CIT0s-A6hBn0A(`*H3@fD-w&%S8bjv
z<3{zO-ua070m5Qn_KM=`&)Z9_4|$n{^9O2!pL>KbWF6eEG_v3W2P2tH+orghzIuOY
z9Im_gtJ4<oZy-p8yD`I{q;=ucxe9nKFJ{w$%FEuYC_pW5cpWk333qO>B6RY~3;u?7
zJ{36ew`U#=DpOk833!{t8T8xf%h2WH?iGt?f44JDR{q9;ZeoymzbHcIvYe7Nlz5uX
z{nSk<5@<YG{{0uKOan^pTAP)O!#eXIJ<<-BJw^HCPrd6s|J}g3p%`z?BTq^O+@1K*
ziOHiI&6J+fQPZDJXQ&pBu}4%*y^v3mr<;AM>SF*3u!mp;1$xgF!hUFl!RZ{3f7H2l
z>IS@!q5#?d1_V!VCE3F+a2BWtqW8{oWfd$+3auPTv)p@5TwSf~C5e~Wv#)$Jb#B0>
zRvua;C7rI#v8ZWKHQtd*9$hJ-t#C-(vD2rjhD_cV+IqY3=4D4+>?KvIiz0Xe?3`3L
zFu!Nx_<?Z~?*$D>;Ei6U0w2UE5F#CA6E!)^_J38zjPNWgm8f4%j<to5Y)E!TA5O^5
zd)QRVuoeBdqv)ovO~EkSw(i#H=f=WJm!*m*x%ov@44xX}p$W}%Ci^(ARc(0x`6nwN
zUl9wYAz;MZ43H}z=@Z53>!gf8jUC&$%@;JzXkP?m4AXm6;LCGU9kU5X^v-6n!^spf
zX(bNiH#QoY4x5&bkbKSGEwg`6mny%}O}y}?(BH>%Z$K;S%gok65ybF`?llhNq&Kmv
zXLzpNf30EXzfxWGp&QG%8>}yPM3V(#_5Bw$N`g*9g}>>V$GV((NECnD6COLbB%0~&
zbod2{RJp+8oG`o)QRHL{7nad01(NANa`W7u&2=CeP;Lv9U{Tj<`mKAzwJBpGiBDIS
z%wZBmR6ceYQ}V1yyQ8ho@Yp*4Sf2&BI@K2Eh6kiz`<tggm2@nkW#!SEc*pAc7ik*u
z*9Z5u?LkMj1if`#j<LLK8q{v*Xm#yaf>P}s`pP`gge}`5iU&09Gt#ZU`^--6+hW*O
z&NhR}DtUeJOJ&`Th6zIsKsaYrS{^y%AovZh0$ZWIpSN_fHHeKCY3h~Uk__?ZrDP&v
zOI*@fwBU5@DXS5DMFxc#L4Iy^SE?e=xLo9Ld+;i&Vg61x+-+uxy|dEOMZwwb{Ub*4
zO&)Nw&ReY9*fSAUg<1TJ#}vJ+H&DU8Ak-!9EvXiJw){PiF{}Ml+xtFA<Pf8l$SF2X
z`nI#`I`t=l_M~OS9i~8~-V1hkh&*_EBChw{FtDLVcP@m*uq0%Lu@b{qRIZ2VGYg1d
z<PBpi^+k&KZ=FH{av7vrHYAk}uPgl`wb3B51-JW^hP2vhZ61r~59e6AVqcn|7BfS=
zPgB0wA$lWC|Kj-qRfJ%P8Sf65D9&Z+rqf$EmW+0-%>)oOjOCw{rBApO3^gWDaK2+F
zTL{pq0G$#>Q93D@_J<~S+LDGL1W!+|c@=(XAIyVLSRKJ&8#UpXgInd6*+IBoQ%)!-
zt*watKQi9l${TK>)fqyn$Y5cU`vg~t1n=HrRyEAMWgL!o?lYcB1-HBPd;UedzCO1>
zJy`=4yKn-mMCKsloCXB4=90hOTsrrx>W6oe5B~PcuA%_brQ^ovIK0PgV}0IRzePbZ
zqG5FhG*0B%KtbP5N{QeKf&5ikrZ&0WoNh1V216%Xm}7xDI6Rs{uLDo>62$uSq90{x
zRKT~Fy`0uU{7`zYf0}%xJpvqN9_a|2t$YGBti_@DW#84pc)v9fx_P})z-W^!?hIK1
zC#&-6rpjaTBF|@Y=hI)brwE<HTK9*J0ny_K;u{kIMS>^uCh^culVSmiDH9-@rGMuo
zFW*lL@gqj*F+$YuNP5^h`a#LU#e&wGLtPmVF%nbz)c9~lyyC5736k&m>%{<Q%JS(g
zBcGaL*f1?3CZM5VoJnTXKHLAW^8d60EwC(I=Ag}2H2}<lPdhV$uNc+Vb7C=-=ZHGb
z`3CT1MV49MsC|&TyDTAlk$y5wS5FT#<c<{o0UN`e!?XO;Y=@>*M_gnF<`hpJHT7KX
za@&wla*^7uVy0YtgY}Hx!TAt=+Orjyw?*L|7?>LNH7E^I&ami^3Gpc@g4S~3^{A^a
zyhTGl_7Y<d%qRyNQryC?)7}zD&1<xc&V^Ts3d-$iZiAmWrL4B{;R#5Hr%5m@%KM}<
zyw!dxV)iv>7=DpUeXRw&SEKd&cUo3j@0X<$B(9x1;+=kX2WnS*EE;W9s#;C=3|sYn
z&3Pyw2Spiy&~ugUx|g+TyVla`ylF9#;3yAWGG914N57Yb{!Jv5;k0XUi;8cPWaG|{
zc+a*S(>MC;cQ+=kg*AkNgbq7b0{{iThib}$CUDhThY_~h$6#Vf?p0rB@bQOqk|9OY
z#Brg}_47R7{c+sdp-q%`SuuHY$S3za{A*Hk)R_Gh(eJr=9Cc)w9d+SJo&N<zs)MG*
zJ`fOF_F@EQUS5uoY8K%(1bCR?THlgS)6v#j-ak}YUS7fbnG#$xlqz8RZ;h9vl~>LJ
zR<W)(Uwe&Vl4yA)**pVmVaU<m(z}9A>oG}&kky68^3;VlggN<iE&558E1gWjDuxJ9
zbPxh*1jVE@H@#^qQYPG!qOCb$X}&HKjxuW{PW{wKPN7QV7d>KU$IoYcx@Wl*!)Md#
z#<N+$>#B4@PXL-q+}^qelo8Zx;z`MHxx9~1#MG$w)pMC?q(BUg+taRZ!W>T|9xV&5
zSx-IuHEga}C;pxIx_+a^(;(IRxj{MaN?(4$Ry<+r;9cThT<ZgE_)&|WK<P@|;P;9@
zC4D2h8_g3D-yubx$#W*L$u^fgQIgmHDvh4H#6o<qWTvs~;N<2@p`Iq?kTWk;y&ga0
zz?xiQwPhSurJpMavZDb2W`ES97t)zxuf$_PU-Z<zNnM#w2W(7!&+F%KCyn=QAMic~
zVps9eMJ0Lu8|n2fj@2m=Z(uG{(|>NIgYgqMyYH`(#%WC95tj@x3OC=ule$uylzR2P
zK$+U^7t>8@2SR8L&gIBbrM)7ORjy}~91VD1AB5+w28OsHf*BEQkkDTW;g4}qJ^qkk
zhPMvJHY6wr3Xh~(04O5&bh|5%At~Y#N|~tj12~BF{>)$^Nrz_DQ9gfJI%rJULnXb(
zvrbcf6R-uNJJ=mZdo!09DrbN}TbM7hxmgD;0FHnP0vZolZDDoy`uV+9cHWuI64z$0
zPR5^ZW=Z+we)cZ(NbGT6<bN-ZQ)^pCY;OX5F4q7mHEnzP-#Z><G2+qNQTWt7qgIY}
zD|wzmo{+ku)vA7iK?ZZoc)Z^cLa=V|998?)FZi{F0Unz>4#;?-Dj)%Kou#(0AG+01
z8*{IdCyHiyY%3rLXbp6&8+Z>J|1TB>AP}+^1OtcNt5N0U_)V{iCUt3ot_Dg&$kZwq
z(116#7uAnvRK1^<KLal>&_e&2C<ZlmoUlx+vg0$UyOFOnb^Zlh?8IOrALZE?fr)PQ
zTC92&=t+u?K3qL2$9)xoVVI%HrFH$=d>erzTM#FDD_FYw^r*~<HQ-PQq<>NQOStj5
zDje>Q-UUYEzVClP#oJ{?ZIPF{LAc#^)bQ{L$X=&#9fBi588rrnZIQ2V@aU%$D^bb%
z!dP4%X7stDmx1TMONW$HYoOR-4DJ8N(^*GF`F-JjC=rknL8Ju*rBu2x5EKxkd+3k`
zVd$1rKtMoA0Rf4jhm;VcyJP6Cp=W5gXTHCC?^>?qKQPR^^S<Zov-k6So~28xf9e|n
z?Q?(6&Y6Wy^);tWawwg<t`2`7>2o-^E^fW)dlS(d^t_k6scla$uU=<{=`DQ-_!g>p
zzlU*dlCSGU5H?@s%;m~&=@P!gQIznwc)lC0fgb!o4PW(>k-+6nuoy)oeyo}!Y#d+G
z-DBD$xqF>E2Jvp@gp3PObcj6r6Z4`xzW&HE8OtXhfFhj*v2J?9$3<|1By!-J<VNgS
zr&HO56^)S9RLnmi=wBRJ;q6`<+ZHrpsMgpz(??xe{c13+D!sabROqqfA!Sgj^w;7o
zeRg0hQjLBZ3``Cw%Zd**ZTz{Q7Rm5|WfO>yrfY%5$I<)RQ;24e15}7xXAAUR|K%mI
zh&?A9729@b2~b7stXycjQRHUUpX|)=$R%<qZ4{yZg+TH`^3~xjc_~uqa4~$Y%#G(^
zM<cKcPDvqC5Z)3nR(J-$iHusSz>|x0*0;A$PXqu(>GQG>@)?W7Jb?}lFiOKjQUFNy
z*g&u&Wz$v?M1Pun^XD-ZfSJ=MeeKH=0MOh>QNYjP%&(Y!F8M-sSr;<q%r{~Ua0Vd5
z?Kd1P`~hxj7&H!H9x*LZLo;lWypWi`FQgjDd=;8Rt61NVnj@VH&f^JEW@$dW8)s<b
zJquGhprw2M-9T<a_9rt_jEmhguvWAq@8GQJ(Q9~a1N-|;fHM8#%JT(0yICeTc?}gL
zw8lTO-X#W~Uj#rGfT}gFJ%aj_xyS)DzW1!O4QmidbgAW5A2bpP%??qMXn<Xrp6P_(
zQtY`q3M>~F(1%0_m7_0<NRMszSX6y*wDoXcQ*9EWq944hXhcCm=KpvveQ~5CJ2J%s
zL&t9^-XF^>$U2pq3oVyl<u-54AAgz2x8#Zc?I~?^!-vUCQOZ&70{Y-1Ux-qI-wCG@
zpU@m`rwGp<%G%~#i2I+s;&LEV@e`}_HLB9eTg2NY&n`PZ(`q>%df9YT`auWr$fimz
zAF;S>hD-qMJr{A%o5X@wL5-_*v#IS^wuho3^)fBe()rOH2-;xkE=6fv_mWcP3A8%B
z$ApV39s_aXGZ1^&S!=8hvKSpJx})XTUdSDn4(`^=Tg>_VJa@Yf1Wf0z@Gc1J$zlsx
z3<1O+BlB%?H{!i8-cwY;#vSqTo!f%}pF~>jtay{$0?#tRQ;LHu<nAk(Bjal6>5+3&
zbcX)r-t79{`-mO1-Ugc+LQ+Cn(p~+SAJ@NQ)@4l>`7;Pw-0HKx9vHtfk=~~(RVka|
zC&7*63HGw~OE2KyOsV)%4EOAQnK}8MlOK3E?BOM)uaP@Ka!QqU&om3}+T(AD%{~jw
zFc|z~C|JQ><}tH1r?f7pH~b^^XsgJ<E@#i}xXQ_9xVDZ3n$7nJI8i*&_wE0UVdcK`
zQUm?&ssf|;@pkEq*-~3k|9C6Q#>(&P?3g)zG;POt&B<SvYA`uEKY@tDSFMln*;&<#
ziisf*GxoPKFVvkEOW%|{+-c9f5pA;Ax%A!r;3S~3a@1K5hHH&L)W+KjuOCiG1!QRS
z=Y27~FMTyxEEVXtzaY1t!Df818FNz>GxgMl`cCTgN2@W|nbiaBT4{9Q+aed$tl`l^
z5d^$nbJSz$(889|7i>e|sMS;Ox@}7Unplb}A(misvM<jrA$V&pLhE7&Xe6^w{ozLT
z5}le5+hE0Vqv|wHX{e6Q3ngClTh;LrZ`Mj`xb6f0x3c-ZRu%K&DxS|zUBKr7*5-H&
zSCK{mN_U{k7xOf{5g>$s<81-RXpPQ1fjFASd(EB!5KI^i8(}{gAz=Xm<YJ?D{Sg&G
z@a>C2oiM<x6vvs+YBA6Eds;*j%SEs(e;1Bk_K(9+;fOw-U*6w|ut`PC6<EHjC{41Y
z83{RmrLi0BshG-?K1>tPr|4J0AKIvnq}LL^JO$c<VRY#zyPMY86-lw}5<mz87q!#X
zC@?8M@IV&l>T<k6it?`9HyS*!EHd0^!wM>};!r4<@d4?9=B?(u6n)K_;9d$_Ey?kf
zL*ndwTbx#6`2N|f$9Ham1<!xwp-T@m>Q!U)#~vDCfMwa-rbu3atYSB}#rfbX84JXQ
zLj<H%>Ff~Q+TOia2*2WkP{q(MM85y%?i!>K>IzJ#MQLBHVLAoRQOGSd3^8t$>yKid
zNUNxHA%DOD>((`Ul{t=iOFX#b<oyzB0SNUq_3L}3walocoKRuEKI2E057%OpOyv@{
zh<|Cdouds)jlUuP8epG+d=F@s%GF7QDsAb<4|O#%BYBeJU)myI+txN&3cJy!RRtk8
zT^&r&URc`#vT=e|zbD^qbG#>z)TlwCWz%wtC;?X4Ph=&g14eq_X7+V-AaR)U)h2FV
zPTQT0T!NMz5Y>axXFIiP!SN0S-qU;6m&NEnux-=^Xo4+1J43;`8m@k?lDOwqyNnsl
zESV=KNWTK>E3*l?GyJzksnE=gt=W*X77>p>6JmC`2Zya%mCrz}=~!%{tMW#n%!b3x
zo9VAh?|BEum8AXU<v7m2XM66%O}nlaTY57&xl>!S2d|uh!v{J)weQnum!BK|QG#RT
zTQD6!7yiksUt6PfjP=m0saB3N&=VR@iV^`YWKzRUZ*%cFrQ?$ySZ{Cl;{l7m0;bPS
zv62$+5;f08>qe*`BpIQr<=*IbeMPgw@p<kpKFg18HafD35t&OEAobhstPZRknjn`y
zwRv*p>Iubmh#^f4e58N4s-^&F$>awq!sVBcqC55l2pO$&DG<qU4@jg#uV_DALYvI)
z#U)dvr*djfyOc|DhrE6Qs+}@2E<(Tp1Xer~j^uWwzO$h57peLuNAoJ)TapEN*;+Ud
z=-jS0w1JxAcOB>(2zt#VYk0ARq<`cGD!|S*V-3o<bAZ_Z?0+#|WiXlj?!)%!V`Wul
z#8z`T#<LJa7);qejQ{rXgVgiBRj>Uqi`(RG@Nq>fX9NJeu+}JId8K3Gs!Y<^dmpZ^
zNrD&YmcKpVhm0EoUqzMgDFgyyKa4C7@eAxBTrp6B$8nmMZ=zLOppzpT+N$?6D_N@6
zVXp`~t2L{rE5*ud$$gbguRQWF_pvlj-@_OvuoC8n&XzRT9aS-Hwf~E2M>D4-jmYco
zhT07d|G55RQ&2z|_hHb2*(K7jz~y!eii%+hDO4#>KkmXo6=;uws{uXgu#v43=U)jI
zTfIhmAO!jgYEj0ST(pam!UOTfyq$+!%;%uHf9%Gm?cPW`>xX%|F$I$w-7%DCp_%er
zUcNkjfryI(+%dx|=d8`GzuRbQ7;fQ|ixp@$TqV}4Xy|JC+1!n8FRiZbz<a^if_hw=
zdN(XMVpt`W;gw%-L<U9-e?<6i4HOoL&Wd6A!-T`NZ&LH;eY<2)<wG?#1`J|X^S(M#
z$`)-r{snev_)G3FNne{?us&&1ZQe*7fOpm%(MFDr01cG&oJ$!`Y*!(ASZrVuZ|@@V
z=ZdcRQhzGKDp-i5kh9$Fr;~vLLb7!<?q`|mbNN_1htHOlTp};10;)th@*j_^ZD3$@
z;VJ3}`4mO>O(^b*TTM~#l*@lKjQ;<$H+<LCCtT4@K}LtnC%WuV%>t1SZ|JYXGSnlT
ztv$HxC+W_GtHBrP?an-zHZNuuKLu$~$a~Qp6SOAf&Yg)D5}cjI@03BjF*WLw<2`_7
zBm!pU;IuOQwMuo3&ZrL61<*gBqKHChq<`orc9CYK@nQq`=5F2ZsQPO(&I#Ox5@2|Z
zP)g`y!)(sdxILZzgcktej>N2>ZvD+%RZHinf#~rJ0l;daPp|ES#;-&@wV2O1-*x+k
zm8BymCohRI{!6bNd3NWDUQ?FmJMDD<`Ej*j7Ii%n%M+y3W8<+v8$^f>GazpG&dU2W
zNhnTy005)<Y{>L)swY4jQvp}yy(3?xqfGC?0oT$8G0r!D7dKZP)eVM)*?W4At;U=W
zIy%iWgUVJs|H;Q&6<Ak2qxAzLmH*si4o1mN&92pJ8SIdL9NKY4DsjDMlSG}7Wa-Iu
zqFRZq)uZz7G&#h#rL|z=S9Ejjuh?QY!daF4+4%tu;KkX0xmM7#S0hA!aJd>emy$I8
znye%xdokrxPwv#`|1#u{6kj~vkou%a@>yv-d#pTa)Y`gw{pkP`(V1X7(m+#>x&{%f
zlKaW1d{3`mcP40>Y6Ons3S=wDzal^OzK!A0bLfZGA%aGE6Ak(}E4FY;dM_i&%S^;j
zT3jf{ftW6(6T9s9ddi48stKJ}_3`l_V{MA8du7Yde0Yw8!T&Wj3JVr;-a=8&<bCA+
ziLm8_WSTW5j~Tb;<<DW%uaT!1^$NmhH$vc@((=>8lLS8}G0P}pv|IZ+n$Uo!B1*9M
z1M@(Li#YWdKqG(?L2G9=@AO^c-3Nh%0>1F##z?U0&h)ikRZ0^&m~f(WCo0b<9Hx)8
zvz5AGWs*rw*Cyx_tn_+#>5W--K3=r_Kc@NBpwtja!cusJIYojQ0oLodT}O_YOD7eZ
zqg$(8+~gp-Ljasm@2KZX`F{-AjGRF4fn+pnaPLft#B3yxUpyi-(fD~uQFvv#OO4O)
zEDxfKxS_=cY+zsr=Kjaak1H{ykOh>Z%F?v09L!toYSBlmbYvh8vz=<@Z%F85zVXN1
z;lk0|Ms?If9Oo}jo;!5JpBqBJlD`PNLC&cz*Rz*mJ;<Vo)am?7QQ?r8YJ|5D*q_V&
zw(U#G&sZEVXfQwq{}396!RenhZczh%ZNot%DIg!X#@p7}<e->3(dwq0h|ZKR$8Ip5
z#%n-3+j+oOYu8^r`TX#AyOc%?)4`OGrJWXjL8DnPHlKRx8{IY-T41$4+Ir{&-G>O9
zw+7dtEo^}9(xv;2@MS;*2j>^N&rC1;7x$>vOvwTfli#*~7N5p%Sc$yfH~R!5H$-d4
zd8`kIK!v5Y$8>ojjvi&xEP~^5Ut{fxr>mz6w+)>Xe5z3TINVC7TKQoiUWt=kF|6L1
zjbkIfQEs{@_R7kx&iZ+xZQ6U}qR<<{<~}EoVmOaT^x~PT!gjXMM}=3Q?L?`P%wJgn
zRvR<S4YeO|%5ArdKr2lr$=g_TsAz+)>4YQzExEJj7Bs0RF<s{UWpj1I1pxbCMJUWJ
z)y{zVqM#zn@8k`we1`vV+`VnF!iW>3H}m`g<y+x9AZRkVNYV!r?MA`)jdDYSd%RQE
zz^{Hd9if!o`E6__4VZSbXzc6sm{NUsgMm3wNAK{0)Qck+Ts-Y+<4d2V{ZY*!I?=aU
zcH&aoTV|bU!q@Mr7sZaipg2bUCIGMN?>4hwF^XbFO1dW48=Y5O+=u<eW}e*H-c=uO
zn#%-@Z6Ph84-*5)>HWLJZ*~bJ23zx84VyyIE=AkbKI?CmpQkYowrR=$nc(?6^X>ZV
zuTMNBX&Ki80F~5+f3jp;UObF!^_}!lzKwb+aUH`t){zsUEKf@p!=RB0z^9eR_W>Y1
zM`L~=?zV;9wxXy6YdnF`QaC}mYm+R@H{>@B3}Y+ob-AH524-CtiJ&JMZGs^1h4GE*
z9f{laJ+&J^A{QDZ!$3a5cL<D+jn4)?V-M{0<P0sHOOi!1{mN2f(O^#S_i@X7k#%5Z
z`_VHk)56-RJU)8l&*9tSg*YpEiL_P1A4aC6>TIHWtDha^Pc01<9c%evDmrJ|%oEFL
z{j*8QI_;w2))g}m2m{d_C$~pel4ZvqgOi|s`dbQMago7?6yd82bA`9rrKkf!2B<TZ
zJx5is`6$F9Qe+lL1r6(7_TvvhX20bAZt$NO%SP(PO>O55jp81Gu>)3qj~z&?6iaf?
ztXU_PQ}smJ_>)^-k66HqHIC16+CJJyTb0{~Yo_5V!vXagV09jFdn%~$)k_3VQcTjE
z8*Q7x5pooJR=+MeasWu_&jsQgH_wUFnPQG@e}8T9wTzAC7Xaq_H}uC)YlSale6ZA3
z=k@pmoupX8XmD@_G1V{NEl~Qxd*gF+K&bHzqx=UyD7v7>N@G`4@mKKW`z)*gVEO0z
zzTl?n=W-46%E6Rb`#N$MpK0!<oh6Uc#`h3h?5e})WQ!f$V=l--<bIV2)=iDg=zb@4
zwGNga0@4h>^LoLVS_^Sh>Xgtr!28n>+iLi8-J*#&s@QpO03<<Bue`gUS1(S|#D}y%
zzUwKux&%Znlb*KQ5LPy9aNKXxW|OSydkvS+4CCX&Ai0-}r_&%47faUzZ6jIbwN2=u
z@PFC3I-wmFUf_K+OA>&1A3M@hI;I$T>O-JA0N@l8uXY<C0L@v_*m&@YZ^6mC8R(l?
zU}^#e(JA?+sB=(nDU3Rl?>V$~4lMz^xZl;`Ikre0_a%Q@7U)o46%gPEQ5H>68^{|m
zAmuiEm83*b;EKX#w#hKcTD^yrO+h<)Hy!KL9Y-T<o)4SuSe!`xW|_k>g?CcO!|jz|
zV92=$gbXo??j@C*uQ?Y!ctV7Hf4YgyOij?)t-1e5y*WYNG-kYP7|kpP)TiL;-GxpT
z=^6uT02ZDZ0j?In$YBSHArO70N-ZGOoW5ydC6lkcGF<7am2Wc61$s2_W1)F~P=dFg
zzw@&c)7d)Pfzg4w5R9QECh0e;J5dON`7X<c6<Z^EHCzF-{u_{1lV|$C>_`~+J1Cs=
zW{k(Fk<=TJOaJi>`4117KVufbcuvX1P4J@fOLXZh`CoWRqf;G_mWI77@^U^j{#B#`
z)uF)}@&Js?qBT9eaZrOe7ko9KiX9~IMzItE9%_$LD5&2PID1>zV&R;yp})HO-Win%
z$A46tJUK0y{2A5D5WA!9vYWpl09ybgL0~U-L3^UTg4RxD9{_66|BFYwnfWR+fm{)2
z2j4z6oU(7{y18yQNDl2idrnJJpR;8J8rfS^Hj&_sa@M4xl772l;}WDsgn@$>6}tVa
zexy&Tt$g@w>;i~RKmgQ_*}knO=*<xShg3{}NLQvoDXdhcnWtfbde+C)3ukE)Y*g^A
zNo@<?`N<V`p1d*x7XZ*Pa9p>)7)6BwX6qNp+352P{od3^*fAmK@v%!K^V-<0jmZI$
zD#ZW4XvT@_VBVX!<iskumqVD5;|3|98RUpYGSdu)2jy-OCH790t*}7pIg44I^On@_
zv0x!WR0+wH{&4%@38*d)1vvC1jAvK=NF-Pz@S8;Q_7W-{d%SyS?Pq7$I1~xy;}FQ2
zVPw_vz%P_76BrjVT-w_UcGHUkx@?1=a!A?n`I6UhrM1)`P*K0x@bCgNDO>qd3UT4z
zWEW<yWJYAu);G$iVbl3(&IU<BkSdJm(qASw-AX(uUjdPIim4tU+j3(~C8PouygqV9
z;F>YhPOAvkY;LYofp#souE1E%PLZhldm&S2fODbWuOa06<g?%fYZ~9>y}T|yDlC^!
z4^^M4ti;QtY&Q?j?P_p*H76xmeCky`#}6))j11vA0Bg;iH$A(^G8BdGuTB4+7Gk})
z!T5;_HMLwsV}-ICgQ>vWC2^x*r^qWb3Vm}MDrWSGqdVx+@TRWq<e6LRP~hp7DzYf0
zLz8S;6Z|+PbnM0+==hC9!AAyHSWuB)LCxjYnD!j10^}RrTVIl0qG$>I-bM@!F|upU
z&d4uGUzzmA57?~zB5WcWz`iY)^&^5-Je{q{FyuOU1O*_3_&Tg?jTgNmt7w|44tU>$
z;4BzM!dLg-Xyu9jOilIu!`v>;(oq$+9)5uMro^$G9|Z4t`MK0}bbG5V*30WDeZgQ`
z!sa+;jCP!Ac;rL>ej)zbB2+`NM)1c7@pfY_He()hrrQou6MqfCiUV~2TJ~G@D3`pm
z0!h(<H=qvjUcwe3lr`PE=%3wdzL2XEHlLG|tjm+j44fkZZ@|VbtY}Lbk!7F~W<Z$~
zh>fxj>G>I9Q|!P%!y8op-y11M2o`>|o*Cro(foju7SR;K?ZJ)vUQRk!T6eAuHdeUF
zy#&ztz?ot%<!Gr_%Gh~ntUh|90K^1h7rW4Jsa-~J=dz)%5t-{~tfL&}sspE~YI&cd
ztJ^l?N!D_ONxx#)^E}D;MbwkCk@POKQrz{p>+_}i%AuTMORm#Q&KbY;=s>;!T?tgd
zR=|%<o48wZ)N<);NLO_7xJu$nLX{84M?TP7^`CTOo<|mt+Z6FS>?BpG7U!0I)tCCA
zz2$=+k&0*Nxk?L$^uOUB7A=bJW7BI{onI%e+lAw3Z&tD}$m>6Z)QSA$T1SIKN`quh
zJ1nCsN^tKNnGU{D9F0}^S7U}R8jmSBfc};TKmUe};7<#iTPhYj<JcS`fSlHNqV*v%
z?~;P6&U5lS?^bLL*0u*Io?(_VBwir75g;vH&<M5Nq?Ibn)M^}2w0%LLJjY9s%O9X(
z59<(&cH7l@owjWkN%KPR({a_C&&6|kevfSbjj38tqjkp}nMqEa0NAV%?cUhk-WL=t
zJroMaj^F)CP~`jXCYrxwxF5N!2&p4#3*g)F8ve&OjDL_u^Cc@8(6I7M6~qyd1DYRo
z(6><t9NXN(l7~*R!g2odj)OryA?yp)<R5#x1JAB{MA5E&u8r8B<KbVj|DfbvuI1Za
z=pr3x(y>h%e};>5rmCR($pgHnqtWLO0RDo^$3>&7rhUYyQLO-+c^NQvv&mk)mCHbm
zXWW6hi++-Z=VqY&oQF7oesFl!fy39L=BedIf=3j2vhKJVS^Br9;&zvpzaoixt=|YG
zGf23o4iRJkQ+6y74mfCo?Es7rMQeA#c2Klg2Rc_fuL@^#Jm-La1`ItnG2~F?yH6~~
z+HB21CX=#oEJ&X{((RFcL!aNQROENfSGIEA@1FT0HeNL`GJZlc-{l%apwE;=dID|j
z%J@3C*=jMT>pu(6y%1DS{@bHR#$<|fGi*}n^RE4MJNyn~8t{DdancDX2La5ZFJEo6
zxqjVTO*hNK&i0u(qRsg~G8gjN>*&)L0QWIRDAo0<JRaEz)<pBcUY2bFm3$dQ!|rQ>
z&>kJK7@ps5fkltt?)myx_}#Vwaj~%D=Dg8Q7CbMW&PnBYLi&(oZGdi%J}$s4{{xM+
z^V5{xcHmV>2t<K>KJm)7+?KI&ip;@VDP1!CQECdf3RBR}O8+fF9WKr9e__aX3Clw;
zKgTz(g5=IYk!b|}$ShJxtKMOg(5hg}0jKHtcN@Rtw|eFzdWylkhIlDY^rR(#q5{4m
zK>yr)8rn%48@c7oMPU2$%*@Iz`4AY>JPoY7?XsWSRzfE>&gKk4r41k5;cC{q*9o%>
zd%$(F5pwFXCO<lb^&fec;HAqo<q}3G*!PRfbEf6NAD+W)VurSCfCe|vShZSbPb1wA
z{V!;e=w_zwzKhL&Nu~HN{8?@Sjl&Yo{miqL#L&iBB*!B!{nL9@w?3gzEjH81r*GbK
z*QHy);hRlGyoOE#Q~U2B(XTNDFW^tTQF{dQe<IUkKx<p1t<Mar9tQ1<B09^9bC70u
z$UdBapk5uR^eW!^*#FRuK#Rw9D+lcB`(T&TiLX%=(bJu)tiK_Y+CP5u{ZAk^?M=ha
z9^6oKh4zb$V74(}hU<cOeCFoHmwmi7SIkio!+XO3MZx<5!n7lQGUi1e#UV{!)tuzR
zX6DJ)?4HtpX&m98WPMZ9KbiY~9$X0uP?tCLe(27>^4Hc?)9&x5JtaSR5Z1r+H?AJ%
z!dX0WdX{k6zdy(pI!UtMJp1fuG2Qp+R7JuX08;}m_NmF(fTl%n&dUPu`CQ_w!RNqc
z<4!Raf6%V+&CXp!X%hdA-wD<E?F&dhyiExjams5Y0>y@ew$$z@_iC<q=-O&Ve-Ln}
zQ8IqpKb0=D)BdLaXu>o7V&>BB*TZ{EpJ|R2p?PPsGk*G_7wFAbz`GN+V@XW#(;pX`
z9h6<>o|)T?p#vGwnwtG}qm>%qX<=Li21*Fz7AYz4((U_G#IWII_%9jBpz-Hd!CQ~V
z#ScyG10@)<<QNb=?leK!K)T41RU-4}<I1mUcRqQ~szNX}QXRq{>m3}LLNyw`xsJFd
zSpRm#3d{6-`?Uw@r;_Ij3=}|vPo63r@!~OXRBwNL%x#51BQChjoteMbD7Mr`n0vYL
zIBVqp1buui4N<R)$KD6Frn>GAbsm%Ut8#PJtC9{U0T6RHj#sHW9TB0PfQHvq;jPJ(
zlWj^!u--}x-gb;lCdgehwYMh&e&VKq0SfY)l$1X_GljK>p&iVkbdL*`1!`MQ2H!v$
z&$-&M<YjN|@|BohkGxs+zD>;*Rtk-04VhV#!y`uqY*L6N0C06EYl8TNMNr)r4cVps
z2UYavl-kI}x-%%MN(0_yZCPxT5wfL8OmN}Sa$(kC!5*rYoR`mmlTr&hq?=o<7YzrJ
zihVBX1nw3Vvd6~&UnQ3)2=(>!tTjFs1ozazO85WG{B1%s%BaVc%#r-^U?3X`-{GB}
z6rG@%%4m4%Tan<nQcIR8D8}vSbl_gKga{nb4jecM_<*%P#A_IydiK?Kl|5qt)cqu~
zIy%O3VaHA-BViI%1(HbmKNkAuDGtwH0yzT+UvD2fGbu3cjn5IueOR+OwZKS^6wpW>
zG1&M;K#AuVJYapRh_lY?j?%k#KcJ!DQBFy*eM8~95PX*+x*Z!0e5mn8Mv$K#_Rtf*
zTT>)|uK0-28v;`pa4U&4oYGwTStZCG$Gq?r9s<r{d%5LX?yGFko4D)cYrVNtK!gT@
z;3{|@k?v~vp=O~VvdE0%Z%;V(ROpvCPT@ZzO<D1$G>CA6n+}~k+vmYIDO*zPYEYcI
z7XN#7+>HZ&-jlXvcX|0&k+TFpe-Oy|(Aur4M4yi2sd-c1jwyJM?KuBpt>lmxcY9lQ
zIZrjfX5^54XGSzSh}g*QxyzAjw`sW>63#bssvV}s%ZQIE;WiZ<*CqhL2`+l98u^>D
zp5KO6fZL)L-KdEgSGr-X^Gx$MoE-(;xi$1~Q^x60yyU5nzVH01ryXlnlvRKx-WGT}
zStZuD2HGM@r#OFIfk51T5OHc^(){;H{-SFEydUfHa9qGO43JSzO!Gf2cCvr?v82`;
zdSUHXz1W|!PP7Qh$;olTX5R<d5)ZQd<=FnV;Tshb#<Gexe2FX86TNhoRu#RceB`np
z9Ju(jWct1psdSz2eKEi;FElP0JJqV%t5KHW!OMZ^u`gZK1KR+$Hz0SMJyg4_`vd;L
zFo}FbP-iOKeXO21ZxW`oks@BKQYlD<SJJ&{_iil2uzV{iEsdAJ+7wbG*aWTjqy`=c
zEEmW7)OllT6Mr4xk9vB07lyJGyj^drXvo!F?A^5Wka~m1PW|ueI-Ks7^uAT&#<;Z0
zgFte|yvtYRaqu4?@Su}YIOpXtQlgWOf*5kY!CoEgz3)WFzac>aFJ2uSOz{m*+d@vC
zJ*^O?f2CR<R9yjd=xX`0^eq+09*m^@9pSNY%fr?sTQ~76T_9TM5te)gsk&{tRUT7H
zt!rat^}Fb+^=K`8ho}eC?wIE2yUg3<!08c7m=&I+<K$K*7`MF`etVE>FT6M)w1BD$
zv`auU|C1L^^%&w8)|DY<Y)9|LCQ4cu*vhazLwt@`_YC3lLCTQ@H7^t8N6tJ(SY#Jp
zB)~J(`QU>%uR-l6kUN2~^!8?e0M)RKm-}7H7;0&0^SBTgBp&9_|Fw5-dq(fJ3k9`|
zUnt?0=DizZwjdube#|c_P3rq*jgF%)DjVv3N+QRg<+=3%Pw9uEZlCC_T!PP|&3VLX
zDJwU;X)Zxiw1`ZY%>*JfKcFZx!e>1G;He|&(+`rjA6NP?k({~~M^epT##Ut6d@sgK
zFHhteo&PKxBc^PG*G>Jmzd%x|sC~b@@Yw!RPP7ffPzvd>+5YhZ6)vd3fZFa+^1*fv
z;KLL-C&hLt-+5CezA&;~J7ZS3t*n^6@6cGKiJqVmOo|TuSr3wWoyd(dUqu-w=AQ!e
zF%uEtz}-7e!Zp&V<MxPA5La3#K5;v`b@(NLNjeKle-ZpFNHy5C2KX+)$N`8?K}{nD
zWwy;-w8~x4)oZm=*V_dXc`YeFe82)+q}#0iYy%S>TBUZaI_4ddazIohQi1fYpOcf5
zNxaACV!u=`A}UJ3g<r4O2-^iVrSO{ed8L8qlpxzbz>&he#MI*lZ#rR}2f?d)IDbdm
zE}F$4+nIX*{A<q&y|1AZ=FJl@=>6;vb|V7VO5<t>!)ERjXp-0%h%=OyuJpaYuf+OL
z7J#Jm-&FqiXn>K$>Rhz&b}hmSV#BowBo#aE`5Yo3sIW7k-+PsReQS&VG7Z)xK*`9Y
z6vC<!dB3NP&IZ-*a;~?Qt_RRsyFT-@7fczO%bUv5PPbwUD-04}AUOA*Kbr4)u+1bb
zEj!v}&04NAKfH6P=kOK5CzjwUa33pZ^-0a&$AIPzNOlGw09=B3tB&2X-DBAu$`r+$
zNoyrWXV;r=Kg%kuv4arIr{KBF#?b`_vt?@h_VbEZl7I;lMhxhWmc;Ze;?=_u<X>mV
z2aN|wDkg}iU0JkNAo_ag75!`?)|a+bRCv$~G~pKwU3z_m-*B2Do+n;np~A&Fe$oik
z&aO^5gjSCR%&pZn`EGuOUR(`4N4lhIYxrYJ)XVFdwODu5%7NhMD~3QYZZVVNrs1;7
z0tdu6{YQ?tE>zkC89<ti<5G3xbGM#pCN1+4XK>VXzB0?eS7I72K_s$XfRbeKl<44!
zv=b#gNjkndx*6)V#aTeJF*saP_O=&z!P+&9tM2WpWrb>fs42ZMk#8sQA6p?_uNa|W
z0#1C1An-+Zr?nva@&||jPw-V#P*5m5yG<Fxc8Q}hPBz`eh!0JsDseuYl9FI12TsZE
z%Bd_HNhRClS@IY5B;%Ax6+DyHHXpMogv1%0T?0#<Oq|OfyOO!_7i~!XMiq4VTHbv5
zs?sGB1GDPJpStux;V-}(G21#bv&}LtBD7o$Da14GzzJGKaOgo0b>ouurZDd#d>?EO
zVi<~w5WY7>Byw{R$8byCAD&-|E-Vndp~|80ue?87UM<Ja-JBlcSlgVS01)u+^2p|<
z6c_&5Z#gjDsq-jlEH12-0s|}-69|kV5OIEG?UxYtJN_vxqeLl@-Kwo!F%uJ;*Qkc;
z+>+AW1CM>P^h@;0ESW=qP>AJ{=VD3g*TQuprWhsyY^xazGoiI<V5k%kzljjvN$q)B
zDUL_86}a@c!Zb2#$hXJ-7S+<#{IU%4ctJgXmaftARw-!MVw1C!l@om}EG%;1Q|UM?
zOwp!JPBF#SteLW*D#cAf1HY)q@$PRqdL6h6zm$3cLZN-jN{9E`*%pr%v^&TpD*P_K
z1XSywGRBN68phTEZwu&8lAwti1Nkn=VA`kOXwnchw=?!8p0gE5(+c5dw&bG+o*=Wc
zf5`i%dazb{(2xzH*|!I@KEC_BYNS@3Sdej;H0kAYd;K%&R?`%x9b)$$G7vb-yI+<}
zPR+yet{VQA{=#55Ds#(H;wI_XTa9CE8Kt*=6}H0s4*$;hG!rU+`fB^|=5`qH-&3-5
z_YyTF7$3>_;G6^lp$4|dD=;$2KO^x19plAgPyjShdj)N8SIA)dZ2-~?+Uhc(tq*)%
zBA(v%<a`CN+smN80z64rAm_@*1i%^vLQ-Q8EQ5t88Ah7I#C}PKXhiL&_eeHzrAw<$
zLAyR^oV<DeFRF8Pu2;M0<*g<j2TxBrATr4wu({PZmG4;Kb+(cFOk7;=>>Mb>4Hv+t
z83~KK6(X+--+G}39&`98u!sW~JxF&#4UH2+Kn)?WscQ2PI6+0$yWl>7T#{`;DeFk3
zO-62NV7}a``Gra^7K(HSd_^$7r{Bbfybaa6k4*u`_7LyrrL}VwJ{t>ButTetl{x_8
z@85&tD^dR!koeFnP2$G#;f$e6pOu_2Y*YX9{O%Kt7GTvLGM7)6vjaIBM~|>R7wcKW
z8u6wDXO}SC68e_35sZZ$vK++Va$H_sCV1ge0SccIpX08%8!0;21&0=rxq#6f-vYr1
z@8#om3I3p%{e$b<*K%YPLtrhywVVAum$XG`9AcxFgQ`?+w~wfe!`<L4PBv>U*Ls@@
zc?!J6O|CPjKG0ug{Kj=d&GhR#z@7j@L`gaDOaXqXpN?Y@+5_$?`BeF0D1*4k-nxAN
zAc%V@Z`Z$3;Pb}S>!Wv#H5TXkL3Z&Wl-OGPD5=s6x2g0i^F5uA2V7qj$9S<C!~^-3
zT~o;?27_!YT9)TWdtvHyR{$xp-}=Ns_~O}kS=BDcG65E)P{-mxfMqqu=`Z?jJP<!j
zT*e4A0Q5JkNjDn&fHZXG82)lkZ=BWqzK`8#E=f7iB7$siWXb!jPouVw@`uzi-Wo0x
zWPe%3{xF5(J4*iup4<Ac1)GK)hzH4)rp62)SK~tiN|>vJ^mctgnoXobvHj*wF6Nb8
z0pYwnd<A<B1I=kHhYcLs;~?9TUm?@mADV&7#S~RAbMu&H?_ivX$(>;0Z5`sQoFr*M
z5vb9DGYydS+;jv*0PmdO&C;(mf|j3;3c4p<FXi2I&YwAU9Z0FyrkZ75Y<GYiJ0tG)
zY%Pr}h>l)}i~V5Z&b2_n4YndjunoAlM@stQ1M?z_Cqd;B(>!58%f&+eAcrpJJ1?m>
z%10q3@tyZ$MzubUCMXnGAHAq2O_d;Tv-=t-MGiWO6{h0P;wuqS6&e8Xrx29f1&+;k
zpZ4bWSu8jmxzLmsWJJ6e`7$|$tk()Nhq!UZQvjl`;|9z^&o=1J;8wuMzj1y9xv%^t
z%O(Xknw*l67Yz9I*ahCNiHA|~_>jGVA;$d>y9HDuhe?&o9G-!+J0yD0w*1zi-mz}^
zV`S$c#omUW(7L2Ihhv3Wj?E7dyFyjH;4a-<9r|Kv3;v2EjW~_EPIB>D-h!rvGHx-l
zLBhc+{fM(|U)E%8oE;W$1!QED5Z!d5*Eos=ngz8HIl>~`XP{dH@D)Z2my7?5{_gQx
zu(BhIKgkrsml*fG00u?iZwi{YO5(g-JnV=Q3~BeanZ@V0rtC{1X)&b;dWQDCZQUJl
zeJ-7yHej<TzBjftrb-FbbH$uDZ}>Jux=T~t+ZW~pQD_-tP3Q1w_}B}IJCcHa-{D&=
z#XqH`AbVC1!mu+@IaZuo=+J!|Iku=`A)d~n%H9I6l(94IEm2vN*cs1I7NDAmV~6my
zs9-uCjqG?2U?#wd`hkZ&2mZ6dc+CAY6|QE!P(@=?(2z9FG2Y$xmSt(HCLQNHcL^@&
z;*aD^6BmN;se#yN<=<=;8u_#Z@IFlmuPU3A*b8?z3h;H%UwM1jN!hy&Bp^rk_n_Ie
zL(mRrG=!lU{v|8uG>KR1!@<!%bvi@uR&U(OnYV*eUE82R7SFuPg7|o=$(O|C5@*8W
zB=ShWVMv!*FSGN&O~UmH;G-#CTqEn|Xyta!8<ytoC+hRo)K!&NRdJ-ch<wmYNLepT
z=~dv#lUL_5<Dq@$?5s$4GllTAagoZMFdUBi0Y8a!e?soDJjbp0kcczH;ls)&n`$Za
zrlMNRDQI5yD>;C#>b?wTluO9Y_7<B-wZASQSK=3DAnUw$#n|`9u<2#3!PUJhkym||
z4UN*1g{OFsCRXtuLjtWWoavq91r=NhSz2`owi)UwJJLH(lewIbn7&Hr-=SwgxFvNX
z>170szB7^Oo|vG91+9OVx-DalNxHjltS=cA!Wy>S(~o%;rsxJZbXFMAfqx&3$P{kT
zs@b(u8u>~M<NtM9Y9kdD5&3x^!s2f-xCa;Z-q?P8icFtg-{`822_r33irVVrw~77o
zx-J2Fuz8{T5_-9RY4p+aAuqG|lD(}Z=MIm*Vx8ptOkZ3m8(fro5pI39#4NMo=4)55
zRwG<_^<ws{{crr-<?g>_JwxS=@hcY%Oe0lXDhK%rdBa)dnp4B2VSBg)x@RU_LUW`w
zbCewZ&`gvr1_^e~n%kB{4=DUw=GD^A)W^k|A~FXaK`1DEi@V;L{xRUBiC0cjTN@Zt
zsd2aX6CPGr&hPNR<lys~a{sShc4Icmt)x#R^$TOY_^Qg1AKk1Vuq5?vh0zKX1q^rY
zF9WPvWg)RH@$FfvAu?&=A837KxyjM>Z~sF*Mrp?8KMBbq5rBXymX>SevT!1Sep6W1
zP1P<T;<dR)ApdLTw339)CJgKz#SAx2h1hbbuAjJw6|(*2u7t;<f-R1+QZ#vAzpxoF
z$nXA>&DTW+?QEWf=dwsC(k5=>g3Sx^vv@tlQ*yGM>bhqv-;7E0<L(0mr!++y+H*6V
zmNstb+L&+P^F1uRbB{K5CgQb-Bm=Fkox!*BND>%v*W?9{a=&@;4&BvWcmBnc9P>9>
zo()<V)9${0e)&hP{Cbs=+t-P!$`(T1n3B#F%_SFi0_-2f=Yhyws02vW-xjWuzMjU^
zN`OD(hna8dh+c!S7QxN*(|oMdOa{YD1t%2ldJ0mSl8)jC=uK>mn@=(kgS!mR1Jf%0
zs^zdGyt2s8MyCOd{<L&xaJqPYJnPW?Ks%_pj$T=E%Z_1p^Q`k<%kII@dW?Da{QwpS
z9t5Ho;=@pks5{J}e|vf|sb;VgFjMPp@lJ^9*`!Kb(QbK6SmzUIm?m$Ed=xrq+NC!Q
z7B8~7G+?s3vz1fuu@Na6h2!g9fzD3z*pIp=p=RSCKD)Yo%BH;JoI3VU%Im6{Yrd!`
z-6a@S959KxqSz=`cKOvpNO`l#c+^(twaT4CLhv3ECApRIozG5I^DjP==pBInpBNrj
z^z%iDUy%xmD>K0e8eUNA*(6$`T7G1}7Y2JjX<>+}&fYbx2!;7AaP8I<4M?3Y+Ig(D
zim#eB9<?SPa70$s)~edvCGSOiF+5v*H_2F1I4shx(+KhI`3iv4xhIf8fp-zDnWww*
z*)Ovo5F>uuo}YI&56tR_Idp2x?_XS{*R#!?oplWioCeQ%D)*&HCzP*6_w4lh?TuDt
zt0;7-^-sGZiuTSc!;5Ac7Z~upT3pm*_zW7~!rjc)I$xdEU;7S}d&e%-tc1n^SDDt_
z4iU1=Oh>lLx_BvBl3@OgOl6$Y>0qyTy)paCK|=ib_k{D#HHwfM5d5U!gBQ_XCQ)w7
zREJjo^o6W$kIuQfz)}w@)tpYR0$PM*SZ_yjmN7(j5Vmt<h8?-^%_#A7tK_l??=l5D
zGfQ|~X+zHzdmH?OYe|cX(vPNhwogq@o3&=?_a=gn6?0<s_RSTP6Em8h&m-?O9JC(y
zqN@3RGO~wv7Cr}mACh6V3%fX3-JICML@foA=uvqUR+C#8&6X$zx;n1CxA%5jrgiCu
zvoD_$3Qyk8r6_`f^@9qvm@#R0n)8ZaLiA$Oq!>a@(vH~~y|jpg8c4?!U#NJoqbN1*
zh}PeZbF(iO5@Zj0)&zlY`^yQ(#(r^{p({TG-@Am5QapcW^+#<>)Y^{|X1{9o>wQCq
zq-$6{L|iLUFD<S8!(@&QkGVic-oIFh>LepM2zZ42pT2F)JlLb-2#7lv9KK4?xAyEN
zkCe(&Z@ibT!@1G$M2Fqi^8>L#;Bg}re?htQ)r(ybr#0T*v!-^JVe={SO3j4HzfV>4
z<-uoX(=mj`(leiYyk%w5aaN<(S=}N2d9d$sbi7~gvmyF$5%KLi=7&}6VnpB6+?LzA
z--@2=)tiQDjVmwbS?*wnn(Qc5c6SwWYb}4mh=*@H>3)cVZv-^S?v(oucPHYM9LH|F
z^GLZTCL}w){4R7)!m5m0aYNE=J!<;*!<BaF?b*A&*ZpK6_@nt@cj^MsEMVPoK5R3o
zFU}L;&kcew=++h~<f&(Z-Qhu$(nUFBb#Q+hckNdqQ<7l`N|ID<Z5r<0-Evzc3l<P1
zM_uDJy#QefC_Va`G50`Y48mY=R^{wnAykoBS366IS~Twec=Ec@l&vk8mckzgax>JU
zeGWBJHZ56GrKr>x9-DJtC?GpfRqFfu9_VBwX9V!UXpExn-j+K57o_G;u-50$m6}CW
zgHhgfhA21fz0KD>h{<hulz#2zH$;`tFEuTB;{+@Rl->Ax_KEUIqfWO?hXg77jT4n}
zvj?rdUu)ixdIW(Cf86hx(obYQU`mN^SojFdI9dE5=y8rQsQKv5ip(?jaCE)cOhyuW
zaMQ)CWQTb>y6`=9&oxoeiFoAfGNgx?dGl}ivQ94ZQN*K&4nnygyh)g|&TMV##E>y8
zjR6|&O8<T-oLN-UPm8fuM{oQnb@}^hDehB)lo!~$opyV|_|{&D|CVbZ>VZT<4QI5n
z#Cl!v*K{471m*@euXUS77tA-)Q4Dwz4lwY#viL9E$Fnt${f;qS+ul@rJ)hl{{jN6V
ze{>ap5k*$`(5#Pm%E8hkQYOzDuy-fK=w%9LOsEtczogA2?oNAukpdr0JZfZ~cRM`$
zD#LTDWoI{;i~F;{W3beT_2Jv0w?DhJc}Ce@CHxvw{)JBc{%cHJO{anAe+%|=a1mOT
z76LR?WWJa<{>F1mxBpdY^)GQ|;@q`{EN<?6mvG2k^zrz$gSR!6n&kOnYH5%_iPd)@
zQiy-><!$m^ee#yJ8vBR4mzU}nSE%N1e!}b$$w!6la>Ysur!;of-$v0?9vh#9efaN<
z{y8yj7Q@~d5O37t4_E9KanKp6e(2_AJCj)z*B#(<83)Z8?>(_<EGGH$Tj3S5eKVqC
z;9+DSwf_%%66~GtpTRWqMjliK54wN%{F4oQ)}%(%M9xge=Hhq~0J#N$JoQnwE$RZ_
zSlYC1*0a?I`HxnEZwhw{FuI8e=r%n_Xe(I8Y?ZzUriBjXNj;xJRj$`6y@EV{<88+O
znyq866AoU#D~rCfsovf-ih8`d-TKTamu;Us(bBnGQB6HV-^zV+lHAPoQpSE(d%ISH
z8}ySpwaRrkXz}MM>O9f;e~yL}1n?oBn!tNt(pC6`L1Q=i<WM<h_o0|}VJ$iF`_Y`Y
zBOf_?4}sgSmE>+vtxN8rN{rsjdG`ZMO_lu&y_cB0o8=SJPo|V&k71$nbqDa`Iq6xg
z@w){tnHWp2`DZp?K-!-7OJDyHpd&q;o3u@k#ev`ty|M`}eFBjvt>NTSP&wTo49&E|
zFGy-w$BD%)kp#wFYXAN;Y2Du_X{n74UGKNPX8~3B;7j^9ad2pZe&=)kPz(uyK$?Oh
zWxfzo(KPcvnT-Ldk-1hkjc}_Aaj}=)MCCPW1Z(LVnJ8s3H9y|zs#@P6#8J%f&>P>X
ze*y!0-3@Q3Am9%DuACzIaL%CivCaNN@U+z~^{YKoQb9Ew$nAEGUkN<4>}OUG^0Qf}
zybc&OKW#s@@6HJQ%%SK=fcq5u`YJ0;WQQ7ZYeD2i6nG*Ri^Fp?!Foty=;4roJX%0g
zdo0a(EzG>znrbg|U|)20T;nBVYkPUu3jA(5y(N5DC+edT@9Wnwt%hx3=HGLc7RQnb
z|7G%RW(TVgS-bsr-{a4ITvP7G%NtDKOIlH^Wky01!9r!5o8U7ZLir>k;X<6c)JjIP
zeSNl}blT?`2UBFnJ`su-`~IH)`~}tuLzrRbj8f}bSt=-Jm(!*HJQlZh&hO{%s%?X=
z8by5d&uvqP#y{7$A}@YuCVnpwaT5D4i*iyssxzpdTYys{gBd&p5DTXDIcWwT_9gcb
zFH#Ps@Q%UGy*2$vo=#7bxAu*0e}Hwo!=PF#fz|nZ9jfEzk$3JB^_uNq`gEA#`G23I
zNjfUZOU^^r^5|V_LK|@t#z>%NDY*cY^DCpw*CYG8lq*-C6qqL!J6mjsL?xtR(x-KM
zh!VdMkTu!te@`)t?mP!KrmtOa5E1t2f>aB4zb(17Hs+c5HsYNl_W^jbWploBXw;K5
zowzD{pMm3DayY!hS%5WHYw=(p6nXV;L*uj)jtBcm^pQUU*~)JtaOY+dcqM#7ZKP=m
zXW3}KboJn2of8N7;gYI+gZrE|^6tmB2io9AaLCB2qM~So=E;((Ico%F#QJ1e+3zZ$
zl~!HZH|zsTJ)&9+BDn4Eu17fzLe!+BtODn)5W1>x%?~#rkdi%Ub;-l=*^9i3;a>45
zzv$Pw%m)dK>_}Dp9txlRo}`M|`iFeE9u*1R5;sJs^<uRJSVP-6Zb!>57uhjpy~<x@
zfwaK4#)}oQRWt5GAUA8&ZA+E+7gb(^+PJVaaXI~LdiCNg;^Mcm2*v~eeBu8qmyYKz
zv6W?G(DB-suWuS27xm{ZlDn15xo%|H=pA@k=_KR9lbkQS131nrUDO*cVyoof#cd!B
za|U9R27jI<w*52M^e6gLX#ji8Bt)d`{CKm}ZpJ|0G3lo54#$<$+qOLI3Wsxsv=m23
z5OzWHAAFENbL`^q9wE3oAUT=WeIOF26Zvs2I$iQ%^2hH1USHwy)&IQ3q$X(jB|9gZ
zasInW-#m~m#C6DBRSRcO_wc!Ju`X;E8oM{E#OrqHJ7mz93Jpq}1gK1kRiWFZ0sZGB
zYs$$AlD)CD+XD#+;WI*1$DC)DYEER(|5oey-X{hVvC1~cvQmlk{idM&Tx6$tJ8@Cn
z`@EynaF55C9Z%Avd6*^Z#Y)F15gLk@f4)RNYk+A#zgiY`T}7@Q`RWzaE8Au-gFVlA
zzekMl$$z(qv1Ku=mWaU@Dn60Nx}HPoyTV5P-MEWMR`~X_N%=N&u8oEW5$*xs9-592
zjUQ{1tI)QSC{G-d*d2z{qd$KC?S_zmkJ~2Aea5DaGhgm9CJ=>6wt48eEvR#IK9O*4
zbBZ()#_>qflu8-k+A+!5RN;NDI2CTr9LA5i```VJ-&$#ByH@%c4MkD^HKu-<b>?a2
zvHh`NDbmd~|C>Gclg>Wbp+ii67pu~P*XaNIJ7Yi5ZgQj`gRI^!T_?Qz6?n3$628hA
z;A`L_y&1}FIaZCr*}?tYBw+`Rt2?Q$c5y^%xYK<W|EA5gXU5bh9^pGE-E8`f$BBJV
zzRUl1?M9s_yAo??J&DsSl|`8uh<V9Z#&v_E3uk1&j|^OJ7nJz_=_5u7hd<mk=A+SI
zZ(y`f5R7<zM^s{?+VkqEOJvleE}8tQJZ<$f@y5=-UnX}N1^#Zw%Rf}P)4HFl&N=uT
z97i42-?e2nKxaGPjDior^2VgQmHMkoml9f4XzJ}Ek<MFgpLi?cHEYApm<{RPQlaG^
zrreN_*49~ZxeXUxb%d99M9v2R@MawE{Ib>od$6JJc41i9#R@rD?Ue;QQ|hZcSitPU
z<=C9tTFx5m-|?w4r=_^STMPQC=_I&EbQ7)U#!|ibDHDUl-R}BI>pd(Cf^0LdIP38J
zxJ3%qLtG9f4Kr{z4Zf92V2zM5tJvwj+h5oT7L{R9%BV-C!JFWF-DPGvzB~sj1=xqk
zl$9)oR^u45{@hWS2>-ACf=s&~sjEgQi8(Lr#Mr|BncF-csKC%E_5Job2G6NP^tfah
z+#n@_2k2F*5y?*6oYt)D4l`#)3$u+MzEs_~*+ivTJYIK|+Pxw%Gwyq>3l~D8?1Yk8
zor}dQ5>W&Y$ex>jL`T1UO~Lf6R4p?rGgR+K7iPjT33J;wE8_eq?YFljg)`YWxFylJ
z#cGa%F8WF@zxJ;&GQU!})4AfTM$$ZLjo6w?%6SXdjB^p|O(TLpUM_8KmQC9-zvZM!
z)pc8Q3fuq-@4@Wf7+Y>CVz~v_?PvawZ9YhJ3zKgoXW@RGXFcz!cLN8071D0dbq0N?
z-F{}B#NKK$cwtaClOUhX3d(w^^u)_cAB-Y6Ezn7Od1_=vuqxZ;c-Q!VOWF;y))_J1
zy)Q-`9RW6krkYg!v^U2nl_^pd2)WUxF@0W+Q*Ym8h+Q!)7c<Q5nY|?{vv8}{tXyZj
z-`r>WWyU=1*C&Pz+;C0M0|NJOAU92<BK$!7aaK{CL}z&3wDj5@FVd;)BoOk)y$zLQ
zOWLRutx+)A^W2U+Tb9~QRb3-qpVv||kO=z(e72++?fa*yzQe#6ji!BZui(}vPDpfj
z-d1@$+6_}Q1};^Ne%d;w0%?bN0~SPr(uJojhum#->>NM_vOezP#iU_pr24Y%Shr4|
zJik&Y>-N?im&l@i*5)bf>whm$=bjQ?=R8Vl!!_j;kIwM8&HvKD-d;ZAJ;l3WnPGuJ
zSA+b~eDLybbBF(17J7ss?OBpG3`o3#pN)9<MlPjK0v<RQPyMN?THA0+>dmQ1z*Z_2
z-h7NI%j?UtX1O-}y6lhdMR)vV+&xZS{-?$4H~$U$VCc_a)|Wpq65!82-EtAj-15}Y
zvdW^{9!<YKP4^<}1usH7N0t0obi-x2zD^FDKVsTPz7<!EZ~fCk=2d$)D!09Boo)|P
zQ>+HX(a^?`4Aa`j^z#&3TU(JUxN6?O0C)wV@a#^-Aga$m#ezZ)Am+fck*7U!iP9mY
z#WzxvXOG(uxA>0TO6KqQXEjV0#b^JXmP+cao_fnYz6@j71Em2Bl;zJn;F{e|w$H#T
zUN}teZDn(AS=8j5d|ioBEzQor5uyIvXZGQdKSV-?H*re`IUbG{iu;b^nN(!hs5E*P
z0%7^3y5Ff$Il3}2Fuz$rR;Km_JWNtVO-lFOVhn1WUyG$iojL@+o&v8wAIr+hSR<@g
zk(>|TG;uB<Wp4`X|5Pj8Ttblo9u)@?@H0K#V_gT}<6;kQ9eh$%Y^(+Q4^P)O5DmxS
zoDnJB+K!V-ZG?oRuVZeaX-|5ZwfqS+98iAcq1r&);4Sq8q1*fCT|LzB_f9`6D+;FP
z0ytjS^V$5#ukrz8<7R2W;boWo@g>uoJ~d^`F7xy^=6W|drH3X5`{lVeq{jMf;ayp|
zv8>Jx4Eu7*#a-Z#Hc8LN8et38bjtK6qy)Ho&83<jLN9x#Bqwe*VJNl*gxpo~8?}m|
z@rh8+@OnB>3BQNzoCrf4I1;_uM%!2;7@{brv`kbyR#P~06H=8gndkHcTS^T6JA+YC
z){Li4`qqIC-E!*exnxexvSroS#N@nqMtW?oE}D7+PB(6i2>yK)5hihcaX4fp(8CO_
z6L*7}>6a{n==SAAUfakE+Od_`kY3^$sxLUqx+lbrmbhAbU;J%Q7+dwwyW#oyKI8b{
zDm0l)>4tYBt9GJX=KiUEG7s}-A((%w`)>eTd4GRGBL(55QIXtTu*I@A|5vNoKgb*m
zMy`T><qBMnxdUjO?^ljbxze#fOgX>fgWqEL4h}ot6=)_oI+Jp=kFLaIACs3W52NV7
zEtvX@DB*r`@6aRgc4>7uVbZ++SU>4f_%E<227(23z))SqyB>cir29wGb^rC1VeGk9
zdQ7qJt{neNLrWYLjFnr0cM1G13j>Zw=85P3sq4$*q3puHZ@ZMO5=}xXJV=(YWKXCh
z(KCZ&-}il~>{QB<o+PqMC1g(++fdmDC842_U6v6UJMT4~=lA|T?;r2{Vcauk&VBB4
z&UIbi@Aq8yeKtq`d0-<yw#F+4Yt2G!19fiRsL;80Zn|F?)NIJ~P%vmzZ8vvUp#P7H
zS;J=W`;m2u5|a#(Z+5^&SPwgUh~H?ChSoD|8Yl7U`a|G6h-V4v=99=VAmeGn2R`K`
zS02sXT_6^>uXqIee4HBn$|rzCwZ<KjA0HVB+bh4gJS8_itQ18jmU4(4@8v;rE7<0O
z=1Z3#vP0&rl36)|eXm#9-^EseCZnAQE58l-z~gJdzy5wPiv#r^30nwL=vY*W77kr_
z^5DT-=+uVBdoN(|(7S_!BR4T&RBn*DmejEHr1tmZ%`#j_KH2Td%sZ-#IJ8ppU^IST
zy`jyO`aDhWO;8lyl5xxR;tqkl6}UdK_T$Ng4z}TkSZrc$#x}z;39#Dw8eOiVav0SI
zAmWdx8kuo)>wJz4r@^?Z^I%`_8yxLkMV3F6tA4qqN&n>bjK5q$`*P4V`h^G;z#<Vo
zkNREZ0yR7^2E6^F3?s2Do?(vvhMWKZ&<0TC$lavV2~<$7R_$k`w=TYQuo-BoE}dt!
zsSpBJEuLlkF?gyIaQIYE*~?z=Qsf`!##ujVWbH?DAQX3lcTu6l57W(zh`RpAlbS44
z=oxm=RDcGt#7)z(t{GRDB;KjhyC8lk@Ttz}&wjxpZr|tSJWj!>X?ViS3i@8SEYns*
z70uRKOvmhpkD-DCS^4=79so;jvy~3^;C?2N5aPjZv#FS_M<SwAueRBj&kddU$Z)VT
z;)TcswUa+BqJ)Lfw32qQG;9NHSh-Z$(2}*-L&;Xylixq^f0;p(xi`wp1!h)z9np>=
z0v`dS?cYU>8ca%sH5kw^28pNZT`4t2M`G3Dv~8x{9OGv?CcMA6R}!*%$+8^YO@Ye&
zvwolbR1UX@3B{Z{5iwtWO<Cd3vW-Fg?4Gmk{4?Hu$C{w~qS4ZfSe+^9=eO(=IM6@p
z{~}-1?9hei{ik@Cd^GN+k{Xn*|9<mvL4>L)Jm}on>g6#<J2D@%gswyo8B0@v&uBNR
zYu-{M1j1HSN0$2^=4Z|oI1X-A)ey>4(-8ecZVA^mL+P$#Ocpt`2gH;8Yux;^c`=t?
zsBBdP-$fx~C0*h8smr(K78X)=Gxc&$r|ZSMc6y?h8(hh+T&0-)yQ4FUomP^=yV=&E
zbE%qV-pZ9f@#r6-n$D|(!x>VCp_P9NB}j?kaYd{2=T9C!R(CPtmRHcw(LeGBKOUzJ
z?czQE8bXy`RdX80Pa9KSnI?~dzF#bNzv-LiVKm2oc*fjPRv3PK!<u7nW(sJ=>Uh))
z%SC2X9D65wxYDE$U*~V%!$eiY1jbDYpJ58H@W5L-VWUVjEiSD`Ds1t27*Vjd!!e&S
z_-MfYizY_j*vH0V-_!C^irP!~2*wd;Y*jS>h73-L8;UjL<%<Fsb*?bUGMAB=w)ye~
zVVa96F)94Q0dq{1GrsfvkEb0I<B;08^sAuw%NjzXFaXhyugd)_6o_#>$(cG}a}?X7
z^Ge$D`a4ZQ&S|-oGWyZ$npVP^+&Fcn)<iL*m%TH|YEXJH{*AObkJw=rdxQ`3r^;ta
zFN^ru*xGYo11~5z)o*yJT9d&3uTj1>wEkSW0E1xX;c-n<Q|Hwa)B2zrfLPA4)%H1G
z2K{!kTnl1IRR19~y{;)6%@JLpgzTmU_4&rld*_p@7{ZLSxvDUt4~u5YYHF_JAbk2X
zj7KnFPQ$}yLaH|(Q$!MVu8Ssi`k`nzpDiGgcbI>WNJ-YE(^YZ{u;riQDexThF;cVg
z9Twu+fz!CIQ+`MoU(Tcba?ez7{_`omipQPMsL9>Xjo!@HR^h5(t#E~%B6IG-V}dVQ
zuaWuMe;YVhmmii%qxFnpku1rIl~Q-NSAVH8c{Zuz;e#FqR4{NA$mQeIGSCE=Y+nw|
zd1+EuM#}OS0S--{$x!y5H*u;f^BIW5DTx;L4-xwqYDMU)3D%{K68H4Qk2taOS6<B4
zf9zZ#ny91YU7a?TeCBRasfV#_Dj{QJ6Hmw9V?6!Yy)vrkUi3))>dKvA-kqHM{M(IO
zVKSn4e1x-H>h<D&t#R`Sk<9KW9BOWr+Am<eTAJgKc?~V%X<$t#C8H9oWl^kyL?WDd
z06QGGOptdH|I_1VCMPdnqw2tMpt($2?~t-#0TwRkAL!!c(c;eu<FF|gny-h6%hIy3
zfu~ToXXTC=G~QS748U8VgnyI*7JB$po!Dn7M}@^^e7_H=<~I!xiL*8Q5T+V7YZ}Bz
zYt8bGB7Yn!iP!Tm&^bC1Nzek3l-HjnY1YgOFrrV|Y&)}6o5_V8{7xxC`S~dz{%;iT
zD)!7<n~IMI9-_At(1a-R)mauu!<JJe@r!dsXBKQh5Vie-@1DkaXpUjQX<SjN+08gN
zt*YvMqCoo~kyR7Xj@K1B8K->#YzVw&o?!uqFtByRC5a@PSWg6o4zSBawPC??aw%tQ
zq^bwv@&>evyN{*1S`@$K4!fuRY-cvHnz5f<rSzZ+XMfdsy|8AT({{YLHtv0nT3vB}
zo}=YBM;cr~bT(&1L0`VD^z)wXLqE9-N;h+V0M*lcbny<a&A?sREfh(%!sIGEpPaju
z&$7?CZ|*Y#M_Bk{DcS^C%uyrFLW=_yu=C|a{O(%3Q~bHgi*@GYYU@I$6vvcrT;DST
z7tXC0G;E0ZFLK=HwmB$|#qJa1dH&j|4AKF>_~8n_&ieUiRK&%xq8#-A)3nwEuuU}O
zy<D#`=--vS!@5!sk)JObue|gih#W}TZ>nTc6HJWa;EeuxB*Bv5fs{eS??_9RN+Ot=
z*r2PwCHHqt!SjJ@St`C}Nv?8YH8nMK1+`*gVi0(+B8N4x*uN5Wyw{iP%9X?<S+Lkw
zrV<?$F8!VRBgyrf609=9naa8CxfO1(CUq#V^L(9F#GTR)C~rTrB$(%~u3s)2d@B+y
zm~__S8O$`O>Wiz~43^;{CX8-;V`VlQwS3gS`7SrX{%;B3YoDwQqCZ(XCN(Vv2J#4t
zYRenhY$E>zmM2dw-(I#QsLBn!@vWtNcR?SyGmZP<_cL|BaAU7~W#Etg^W1@vcS>_U
zJNgD|v!xh)P!Rrxz6yK2D#7CGdEY&yYvF8eQS3kkW6Ys7%IYd%@NS%xg%zt|h(|oo
z$<58p$?;RoWk&)71AU1R;H`bm80DC>*gEkD^B%n*{&u%MWc$vD1C#lw(&@V8cduF(
zmt}Lf|Je@i_Zu!f$ot2+!ONfoX41U%Jfqw8mTlX47<5DmS2f{%uDs@29_9&<V));;
zv7J1h?UVvBKd*mwQV~DsY6!mIX@}i2@qM?$IIAlUJn_@5f*Y1i&fH#~NScU~|B=<$
zvpndqy$if{fbKO$lrt=Nysc1n#&;o(o$t>Nwv7dE76@cf_JqZMxpk@WQ}61b<!WbB
z4kny0hq2(7pJ&*T>gwtq<>yZZ!hKST4eO~5PCH7l8$c#;Y-A+sZ}B69d3|gJyemD|
zZQe2yO0ddh+cy@hrl7!cOKDd1!wyXWWWniW>rZd!LL^HE^4_u?wH!I(j7FvBW`-yp
z&g61eRdT5?=?_sOQ%2TO)r1;pX<zD#wC?QG55kRY)>i`8idABrSK!!I`slKbtc-e8
zg27LzXL2;P()OXM3JTL3oy32g60Rp|Zh3rtDl_+l>VH8Ef(N(=%h=PH*dq1wrc9ET
z=B?7+p~h4_NcC<zgpFP1_Zry(;1eKhmOrze>sB5cO_6%{!lbYrc9oi#NV0ex&LPs=
zlWjGe_MuD=*q^#1I4^L<{s94Y4h|Q=L2gRi<GO#ywV^UfO72`{cgN)9>$$nPOBE*b
z)x)GM0!h;UuTx-yz{SBoP41|3G*_LkG8S)#OS&B%_j~N8excz6xYF!fh0XOUu5UdC
zVrZs8PR7$lDcigd7H)1@^r%}g?B~udeeam_nfdFJ%P;}KR6um`kk_=)?(hg%PF)%%
zhZt#Uvaz!h+RB=S5>>=wpX<cQ76t?a5Vo5JLc`Bzh6;*@-wnL*SCjPM4aM|lb#*E}
z^CXyJ;3uV@#{%>%G`s>KxOU@L!5j~0@iATXbN8gS8(uq2hH5Qf6$-=8rt77~v@khU
zpMt9`HOxMd(3bpDi26q<m5@TSX2ZXkytNpm_SCt;+5aCcNiG(KW!t!kc*%vS%yg;d
zE!dnamnBv`Bm_kw<BB>K79`?m0dZ+Uyr5#tr^H`&Fb{)N`KuC~;S8MKg|mm*d?5XJ
zz0*O}#xQRmzEL$JE<quZ^M0Y4Z_Isatclakfk*pgh6C@h;yTgeh}Kv&=VoC5U21am
z)wa?$KpRXr=x7n1O|BNpU92{7pR>|0ZPuwsKkvLW%i$n>)F()!DInG*<y(csVp9E|
z)3^^gEhQD!;U&CG-<HGd%bm<`!HCmaf^|xYC3S?Oc8`ryF6H*x+Z`Ji7|7y8@C^0x
z=MhTwUYP!rw)RnBu+D!T6&IPu@@0UORT;G%EeGg<8Na_pQSJjzxnueoee%;1-CNeY
zN*{{W>Ngi^e28*6V$T|r)35jY;Ut^5aEsyO$i=b{hj%808MM*ARjf0wAwm8Th;l$P
z>Q#QePR{4)b0-*uX2`ocWV-E*{66Pyllpra*&daX9-*5=8I^A^@`3iDMSac?iURHP
z-}cXz)z^3F#nhVz&dZ``g81*omZDAS%=y=XPkM)@uY*CR+#f54m>Qa15;Jrxz%jG0
z&p4j}@2Pj<i$XI<L|>gNs=Lx$*&CYM+7=dyZg9y7os+gNto0gW^bkkzPrIjT>Sf**
zu)N84HjE_BzlX2`Q$8Db0CO<%-r{I^SK7>EJ!KO(f1bMoAOx5uhQU77yd7x>9}pnU
zYBd84%uM*0&uGx~eQwgHx5oV*!+=Y`N?~>`t=Fy}ecUmDQ*{7~T2Da`N4Zq{wZcB<
z3*Bz36v~<e&HWj1fVbaZ_>XgXZF#x$*o=EuKXhMiYz$u?NO`@YY>};>Jm7QNjcLuI
zVf$;xHGR_>|H6}k$FW!?@aSftL=uVove#*u++SoSvrGBY3#nNukO(s-RaaT*pjsJR
zqm-`aU}yJfE0F5&*xzX9NKr~VcN$k|NKB0mK}m;Ww63kzz|J-TYGM(KgKfX%L;uk#
zUSmVC|GZy;vccHCHR}Jhf*W66e`t8`0Kj+(g#r+|^~aA4YR45L2kc|lujcluTwp?9
zl@V)z5@my~*Nnz*l*V6>6T)4`_Ib6^*#O`0Hy6IFd+Hz>)pJn*r+zG4HmTz_O92H5
zCco$mj(*`g+=YrwCA|Ju<5YgXVq2vzk}R`wbXQjBoF${$fl+iwZ6;}Fp2RjW7u9^q
zM|({cccD9#^Uh{nC^ejqoEWof*oq^qvEHU4LiA!Vhcuv*znR%SOwnhiGut720kK80
zTZ$@%g@!V86|lbr&i3KTmG+R(G178Arg1lRqO5i<^n3%0vg4$0Aai}tdYSSfo~}Mb
zrP70Bf04bd)U^jqSfPJ_@x!m4uR6@a02Bx2>nn`t<!pVPwcA#2(FebtE?%3{oTgi>
znlsCspEJvzXBv6acj{^{@3rr762MO@k#C3io^{|t8{hOf10@I+6VPUsyJOV(i>0Jg
zMb9OW7?=tvc`ehl9&-(E5u~2Ihn!YwEUxqFx>RWFU}hmHP|t;Kd2pXX%-xlHH$ibt
zG3f5l{o;N!-bqbLPD4)Syp8_!^7rKtcush@8~d0sO3vLJi_PnEmN{->Ise(pEaw)-
zv-2#FhA9l8aAoI&;L^uK*LplhHU(_kC(0mH%goK-BnbB9Vu$5ZugYwJw%xE%(9rH6
zoS8ly=@+JQfAX?;97g0Y6$L}H8a~FdOOKJ&8`y)3x$XFFkN<;z-%?U!=?`FI>AOpD
zTVH`uZoON6KHJ+rieO|*ttaE*jvmyVYtD5z-<c;tW5-BYzoAcCVLJYGBnJAFu)pA&
zbFqQ4)U&VRi8@Q%p})C<{Fesz!A220GGFb>Z!EW`do}Ip!O=y2!3tTV9l7>fD_W8|
z4CDQw+x@4H#H>?Rf_9cO<)>(}<e3?dchB*|1qKb7uZ@&PP}D<pT3jABk<U(lc%G`o
z7++p45ei{=!_a^DhdbumqGEf}7pi*&nsMw_YFOPTaEW*g2d=K}sVJ7kw79_2l>e&(
z6W)k}yh3%E;mkeT8fYpf-a7&pbieC9RZK<bM;?QftGH|`mj1B+G8QU459kmROck24
z;(PQWCMfUC`MQhU8U+I3qp)S42bxV=qlLY#O4!Kd_1ZBvYDrM)e~RHLBhIXa6p0QB
z$4&5+?jA$UJC$0FuR%d5;ujTsLa^tyCqn5keAHP{JpIV!c};hPI?SwMgZ8EOQNNtt
z?kcfbY<}+_f0bnJy@I3G%MrE@e_}rf#RKgoToffyJoQ{OJTz?h1Ply?Kw8Jdk&vOY
zOw{Ybub*Sa^IB3%D5$YNKb<0=FlUyv*E;$UbsQvMjO_`Q2m*wgCBc&VjVhakZ@3T=
zNk`ScMz(m%IkI8V1zvmn8rGd6K<v`&_XGG0nH%up2#cI=jyvY<>8XB!dK^Y4)X1`w
z+VMY~lgzVM6|%Pm55*6IUKDO$e)%M|FtbWC!n97<_Ke+Y)viDUft_mvAr1{4=KSYc
z(C8sLj{r|kf#TYwXgJ7}B+InU7B@)9yASbfR24Vu;A($2AEQpuR-1U0BO9w9T$H*~
zeuvmeKfmW5f4T0F7hlUQH}i$r3KjF)6I@0umA@6Ya3qFcwJC)(l9xhzaIPP3{ZhLa
zv>xwOCD4*hT<s&zOXam3&sEVLWGr5Fl1nY>lHK;^Ma@54HxE{Cl|{updf<zSyI(*A
z9Yjwk!OKsHa|Ubyb^ES=hV0n3$sbDc*IAw^uh@P$0;SbfI^)rWhJ}Fhr0#QUQ>zL&
zLjfi&Rze9s6-=|ndgeEOtwQm!N4<PGu95wlRVw}C3U|XVmry$g$==HeM3B3*36@sQ
zpk-i0xu_;5k>tc-`KZMz=kwzHEhT|z;59%sA6*C=<4{#+*qhP+{Zc?=nl>)aslo*4
zD|PV(9mhyBd5iaTuaf~b5&l*0rv_5qD5T}Y-ZC>N5sd#cLoD~&z>LQq=$rKenc;<q
zPX9I4Om4G&Cjm?C7Aw_EvLd6rygb#V)(pQ64?9ZDI1o)d1yHrC>k%+5s$xv@ItwG(
z2GDgqJph<nml7Q;_~HdY0)V&=$lFJw(7yaHAoy@NM<&Z;(@5I&Ib-1naN~1bZOA&J
z(&#;X3U~@?Uvi_DaUKRTZ5V%6Ue9}wpMPEa$g4i*=M2xab56|orf8e!>qgvz8?Gxf
zv3(hjUBsQuC6Znli+|C_KfT5Xc$5nF17OilE;Z4=W)J0gAVS3;-xke+W=Ghs56rK9
zLOPC4_Yy1+5(rn}DKQ}29QBXWHx)T->){c9-$dUxyjcSROlgx#kih5dgUR*4aSnO*
z&b)(2v;2`4?`tQ06o$&wmG<=;^WvBXmw>C}|3Q5^GDbGm^$njy$QuC?Nz1+#o7*lG
z-#gzAPlq_s+lGyKQd+WG>-bQ-Q6mazWG`mVSiJ=v+4}yuPuy56Rah*VwZC?NBdinw
z$TG(C@VC|I<Q1jNZlxa<28a9OrJ@1is1}!I-OkpR6w}AQ2zsa#eDv7RBi-lyUXFIL
zpNz15y=}~RFJ^r4)Z1oFBR!QfvHH5?3$&6)=B|Vev7el^;SsKDCdWRv&i(z01Ue}0
z27to8eftAP>)y0vuh2Itock-OLsygO(S@WItD0d7IzfCmz46@aXoyh+a)7_o&}AqD
zKX8p_&Zw13YX+BICuqAh%$mE;@85U@^X<PloQF9Gp^nzZD!vgGI2qft6xiYp-LhE{
zyJHnL)<g7i)a9ExvYZ$(=1ZA+D}bC-pW{c&8x~phj5<JfGz#F_jCz>#JcSACABQgh
zamp|Lkf(UGS24PKQ{i2dg?$r!OLpa$elOZk0tpK0a_%g1k7dv<DLHTuhl@IJ?3z^}
z?+VdxXvqFLXrfeR1+a_tx;Q1OG-}K~qa~X&#LrB4D`L#Eixk!TL|i<QzUzi?V5}h+
zVWQ5XmS<j>k&$%wcf>`WosY<6KMwm`3-(THeDQb@4tQO#iHSpj&B2%VU`Y5nUlj5>
zG#1rRY*=76mDPOPK_UqrHXZ(%0rmGTlJfB*Z8H;<IOz)6=&5;_<+9>^<3E5z0c))o
zEEnh;7*W4%m4r6Q8|;!3V<j-zg?~0{LOyD0%gIDb%PkvBFB(L9kTXuF5_Ev*fHak;
zW9RsWk%lLk`D!mU^p_8!)q-M4NgZ~2xbUNrMG=+6`J%BNz6cuK_?1&bJ+5*Du7hCg
zwmdd>QwjD)TDM`M!;`ldUweR(+CJgkU&h&NJ&xE%dQtFOFuU#VG1{wRu^?uGa(tUc
zAziO`<vDVolM?yh*FkU{_V=u>EK2S>LiHm5uBK4qRQ8xZez)Xa3ymi)Ew~*8ZCLhb
z3Q9IslSKaa6YXhL{UHfuHt=U73iNoX=o>airXQnxWji<IH<>gfl4N$>#d+L^UYi&9
zCekM_O0|NaKz+Gs<<=33%;}~Q{hXXeYVHf<Kewt)AKhnU^b9=Uy?4T?x4b#Yyg2m_
zGP7LhTrr;tlQ=Oa@P2qNY8|mn0Z-YB!)Q;I`kw#@5SxuCu+coa0G`$6@PF}d@}UHE
z9T@_l<lftVAOGh)sbW36{)=`0IpP1kdg^!o?}zFF;-9Huw6`C$Q*g$RJR$hPDFwU;
ziV?BD1bXiA6sXct5fm|4sD&hg*;Vk0s5nMedPGK(n#dRSmvbf2ehrJoVz93a{_SfH
z<$rS~Kp25LS|0l<Ud^HQK4O{FJ#{*>JLZLmnHnCwdTw(^Zf3h{hu4Z3Uu~*~zPcPl
z3L!IO(jk&45KIvM{QlV|-BV7?^x#8tIQKvRvrORH=~H2z?STT`<WWcghn2E$E44<u
zvXIWd3k_3$sp_@*lXm{F(1iV#V&p-B_`<>kmIDz!G-D+Rw5QQH25R34IQ~QyqeX=f
z%M7XnuFB3xAssUloL#})IMX%Z{RFsJh{IN;VZk{@r(B{5e}~(e9A7TUM#0UYVV;Ll
zYwoV(NPIg#J#jmBwP{EGEVn45Tf%nfTXG$7s`Q5gYB=d)aIhY6lv3y4Yq<U0a>(C?
znHHhfWkN>w3y%J30kYRgn((eJ3cVTXU2WR!vr^;MF!jq}U~Hh$W#GNScJNc;H)mO+
z;$o6bmD|m;ZWI2g8+9%24P|vdLef@0%&M9e^qH#E6aBq?LgkCCp8E4$gzSL_mxL4A
zVz+*HvM?bz)NCYGD@@*;P1rD|cw@N5Nx8IlBYWGuSVV`!Vyv3Uzfrr))2-}SGe@+=
z9F4*X4!^@fSiR}x_x<&GqFte;38~nMsMx*m$(<r5Pn37vt!xO@n|J@Cym-Ad!=aBC
zp@w^n4mXCbJkgm~5zd(L`&iL*zFvackUQz67~Wj0R?p&Ij_YPtF(y~Xv0=VUEk23z
z+hjcr!!SUY^|>i4|2@m%SlrUi)q>HhP}dt+eVjBt&u3UyW`s-2f7m=W&{9JwKvM$E
z$E56UFf7QM-!ZoH(_ir)aN(sWP~3hU;Q9NzN7~!eXxz3(dA=<!IQUzzGlx9U%_E>V
zH1pz$JP0_u#ju!bF-txc$H;GsNZNJ6h||C=q>=3<55pP#lvb8%%QHTGzmojB%<V%{
zDbWq}N@Z?!hFfr|;(ZZOuF|7dyz(=F-lz9R2B;=Gpm=d|LL4z`U_x~HxhxUp%Lv8f
zVTBRjyv&KsGf3cZCAY$ZdIdW3?*->q>K3mcl(Mt2%NdWBb`Bh4QqEg#E@UgeOh+{c
z$<)Q!!{Yb-E~aBd{z#wre2Q-IrO@4_4<?--gg&JBbNKa6CftG5q^Cl49+JPgsOzN2
z37LmsKchSq<`K2O#@=HaqPN~fOpBZd9wM?IlUYF11vK<4O*h5#ru6N334yHIjK#Co
z>Wf6V_+zb`evujd<3{Ed-6cd~L7BFs-yQy8-J);g?c9z1RFJLCe0B6A=_+%I(P=ry
z#%3q!w<d*?F>S@QKk6;sRO|%zY|6K*+P&)+7p|v8*4WF{>C`Xj{<00t$hP`A_=dZV
zkj~eghpK&J_N5L}TBmC+>&obs->~O)?&1}&H!3@p4r~%%Rt|Rf6U31CnyVDNy~V8m
zGn#=8pR3+Y6$YzjI8{0zNC<}@<xohaRp*TVwvZ+2^xFac4#G#RG1YHbmS{rO^klX6
zaiZevuVSJn8Eqc$b#X>^aW#-n2%1H;Z`%`90DNAEJPPkH7Rn=$(nbHFJbXT9_36rE
z?>pl1C;I*xS7MNyorP4`FE7t3E7WO^`LWeVem?t^kn_TceU;)CKz#4M8tzmljD#&(
z9u!BIIdhimSh;ijJNz~k+`rMhzov)ovhh<)&ye+*syp>$dnFF8#y{CewzvLx4kLCm
z<Xqco=UF!fWRVGY5{ERTx%e@->Khrz$ICpNDrLM`33)cNSu!yTCDo=~D{^sVXO<7(
z-6=G)6xQ#VjCE`|tQQ9xn(NpR0fncJl4MvA^$`UuvG#sbh)Y$pZ-9f}p=3stZ>KCs
z%8_GLZti43<+ft)VsigWQsw1_-EYx<N*>P#^ahb=s7?~kOUE^I4|lmTZ+qMO?>akD
zDqn8tD2zDIajNEGT%zwFOVF=u4HpyX;9IvkcXls~ggw81E_c^KLwL&HnqaZ##KRcS
zj(=r4(bs4dJyopeeNnuAMcx5Pe!I(_)`2U07SL4$0p+x>ma(s`m9L$mjkg`VA<~l4
zQfDO5XQX5?k_w8lGK$i&Vv^E|l9GNHm9_up2W}p=PWHF||39GLcTW?3aEf{cPg{G%
zOHSAAd^9}#+-;rQ9W*>{A!s>C358oSXjut4F-bAWv*|<9Q2y}$lsB~V_HpuXNAxwd
Z&qzt21$Zwi-h%Iu%UXJxMH-e7{|73@b94Xz

literal 0
HcmV?d00001

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index 79c26522..da144876 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -456,7 +456,17 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
 
     If data consists on different sampling rates, a separate plot is create for each sampling.
 
-    To work properly, the data handler must have the attribute `.id_class._data`.
+    .. image:: ../../../../../_source/_plots/periodogram.png
+        :width: 400
+
+    .. note::
+        This plot is not included in the default plot list. To use this plot, add "PlotPeriodogram" to the `plot_list`.
+
+    .. warning::
+        This plot is highly sensitive to the data handler structure. Therefore, it is highly likely that this method is
+        not compatible with any custom data handler. Proven data handlers are `DefaultDataHandler`,
+        `DataHandlerMixedSampling`, `DataHandlerMixedSamplingWithFilter`. To work properly, the data handler must have
+        the attribute `.id_class._data`.
     """
 
     def __init__(self, generator: Dict[str, DataCollection], plot_folder: str = ".", plot_name="periodogram",
-- 
GitLab


From 26fbafb8d29403cb5b8994a22b629022f40eea9f Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 15 Apr 2021 08:59:55 +0200
Subject: [PATCH 080/175] can be merged now on test pass, /close #

---
 mlair/plotting/data_insight_plotting.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index da144876..2f75b16a 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -467,6 +467,7 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         not compatible with any custom data handler. Proven data handlers are `DefaultDataHandler`,
         `DataHandlerMixedSampling`, `DataHandlerMixedSamplingWithFilter`. To work properly, the data handler must have
         the attribute `.id_class._data`.
+
     """
 
     def __init__(self, generator: Dict[str, DataCollection], plot_folder: str = ".", plot_name="periodogram",
-- 
GitLab


From 35b5308bec436673710b9d162a7b161ed4f79597 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 15 Apr 2021 10:33:30 +0200
Subject: [PATCH 081/175] fixed legend position /close #298

---
 mlair/plotting/data_insight_plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index 2f75b16a..1176621a 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -691,7 +691,7 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
                     upper, lower = ma.max().mean(axis=1).values.flatten(), ma.min().mean(axis=1).values.flatten()
                     ax.fill_between(self.f_index, lower, upper, color="light" + c, alpha=0.5, label=None)
             self._format_figure(ax, var)
-            ax.legend()
+            ax.legend(loc="upper center", ncol=max_iter)
             pdf_pages.savefig()
         # close all open figures / plots
         pdf_pages.close()
-- 
GitLab


From 1d23deffc894b005b5442f6d67df8dad978c6298 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 15 Apr 2021 14:59:02 +0200
Subject: [PATCH 082/175] new plot PlotDataHistogram plots transformed inputs
 and targets

---
 mlair/configuration/defaults.py         |  2 +-
 mlair/plotting/data_insight_plotting.py | 63 +++++++++++++++++++++++++
 mlair/run_modules/post_processing.py    |  9 +++-
 3 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py
index a874611a..785aab88 100644
--- a/mlair/configuration/defaults.py
+++ b/mlair/configuration/defaults.py
@@ -48,7 +48,7 @@ DEFAULT_CREATE_NEW_BOOTSTRAPS = False
 DEFAULT_NUMBER_OF_BOOTSTRAPS = 20
 DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
                      "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
-                     "PlotAvailability", "PlotAvailabilityHistogram", "PlotSeparationOfScales"]
+                     "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram"]
 DEFAULT_SAMPLING = "daily"
 DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA", "press": "REA", "relhum": "REA",
                        "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "",
diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index 1176621a..f159e6fc 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -444,6 +444,59 @@ class PlotAvailabilityHistogram(AbstractPlotClass):  # pragma: no cover
         plt.tight_layout()
 
 
+class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
+
+    def __init__(self, generator: Dict[str, DataCollection], plot_folder: str = ".", plot_name="histogram",
+                 variables_dim="variables", time_dim="datetime", window_dim="window"):
+        super().__init__(plot_folder, plot_name)
+        self.variables_dim = variables_dim
+        self.time_dim = time_dim
+        self.window_dim = window_dim
+        self.inputs = to_list(generator[0].get_X(as_numpy=False)[0].coords[self.variables_dim].values.tolist())
+        self.targets = to_list(generator[0].get_Y(as_numpy=False).coords[self.variables_dim].values.tolist())
+
+        # normalized versions
+        self._calculate_hist(generator, self.inputs, input_data=True)
+        self._plot(add_name="input")
+        self._calculate_hist(generator, self.targets, input_data=False)
+        self._plot(add_name="target")
+
+    def _calculate_hist(self, generator, variables, input_data=True):
+        bins = {}
+        n_bins = 100
+        interval_width = None
+        bin_edges = None
+        f = lambda x: x.get_X(as_numpy=False)[0] if input_data is True else x.get_Y(as_numpy=False)
+        for gen in generator:
+            w = min(abs(f(gen).coords[self.window_dim].values))
+            data = f(gen).sel({self.window_dim: w})
+            res, interval_width, bin_edges = f_proc_hist(data, variables, n_bins, self.variables_dim)
+            for var in variables:
+                n_var = bins.get(var, np.zeros(n_bins))
+                n_var += res[var]
+                bins[var] = n_var
+        self.bins = bins
+        self.interval_width = interval_width
+        self.bin_edges = bin_edges
+
+    def _plot(self, add_name):
+        plot_path = os.path.join(os.path.abspath(self.plot_folder), f"{self.plot_name}_{add_name}.pdf")
+        pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
+        for var in self.bins.keys():
+            fig, ax = plt.subplots()
+            hist_var = self.bins[var]
+            n_var = sum(hist_var)
+            weights = hist_var / (self.interval_width * n_var)
+            ax.hist(self.bin_edges[:-1], self.bin_edges, weights=weights)
+            ax.set_ylabel("probability density")
+            ax.set_xlabel(f"{var}")
+            ax.set_title(f"Histogram (n={int(n_var)})")
+            pdf_pages.savefig()
+        # close all open figures / plots
+        pdf_pages.close()
+        plt.close('all')
+
+
 class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
     """
     Create Lomb-Scargle periodogram in raw input and target data. The Lomb-Scargle version can deal with missing values.
@@ -719,3 +772,13 @@ def f_proc_2(g, m, pos, variables_dim, time_dim):
         var_str, f, pgram = f_proc(var, d_var)
         raw_data_single[var_str] = [(f, pgram)]
     return raw_data_single
+
+
+def f_proc_hist(data, variables, n_bins, variables_dim):
+    res = {}
+    for var in variables:
+        d = data.sel({variables_dim: var}).squeeze() if len(data.shape) > 1 else data
+        hist, bin_edges = np.histogram(d.values, n_bins, range=(-4, 4))
+        interval_width = (bin_edges[1] - bin_edges[0])
+        res[var] = hist
+    return res, interval_width, bin_edges
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 23d26fc1..fafcff5e 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -22,7 +22,7 @@ from mlair.model_modules import AbstractModelClass
 from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \
     PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, PlotSeparationOfScales
 from mlair.plotting.data_insight_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \
-    PlotPeriodogram
+    PlotPeriodogram, PlotDataHistogram
 from mlair.run_modules.run_environment import RunEnvironment
 
 
@@ -398,6 +398,13 @@ class PostProcessing(RunEnvironment):
         except Exception as e:
             logging.error(f"Could not create plot PlotPeriodogram due to the following error: {e}")
 
+        try:
+            if "PlotDataHistogram" in plot_list:
+                PlotDataHistogram(self.train_data, plot_folder=self.plot_path, time_dim=time_dim,
+                                  variables_dim=target_dim)
+        except Exception as e:
+            logging.error(f"Could not create plot PlotDataHistogram due to the following error: {e}")
+
     def calculate_test_score(self):
         """Evaluate test score of model and save locally."""
 
-- 
GitLab


From 6f02e87b8be008e01d9bd9d1ad288944371056f5 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 15 Apr 2021 16:14:28 +0200
Subject: [PATCH 083/175] PlotDataHistogram can compare subsets

---
 mlair/plotting/data_insight_plotting.py | 107 ++++++++++++++++--------
 mlair/run_modules/post_processing.py    |   4 +-
 2 files changed, 75 insertions(+), 36 deletions(-)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index f159e6fc..fec4ef6e 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -446,51 +446,90 @@ class PlotAvailabilityHistogram(AbstractPlotClass):  # pragma: no cover
 
 class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
 
-    def __init__(self, generator: Dict[str, DataCollection], plot_folder: str = ".", plot_name="histogram",
+    def __init__(self, generators: Dict[str, DataCollection], plot_folder: str = ".", plot_name="histogram",
                  variables_dim="variables", time_dim="datetime", window_dim="window"):
         super().__init__(plot_folder, plot_name)
         self.variables_dim = variables_dim
         self.time_dim = time_dim
         self.window_dim = window_dim
-        self.inputs = to_list(generator[0].get_X(as_numpy=False)[0].coords[self.variables_dim].values.tolist())
-        self.targets = to_list(generator[0].get_Y(as_numpy=False).coords[self.variables_dim].values.tolist())
-
-        # normalized versions
-        self._calculate_hist(generator, self.inputs, input_data=True)
-        self._plot(add_name="input")
-        self._calculate_hist(generator, self.targets, input_data=False)
-        self._plot(add_name="target")
-
-    def _calculate_hist(self, generator, variables, input_data=True):
-        bins = {}
-        n_bins = 100
-        interval_width = None
-        bin_edges = None
-        f = lambda x: x.get_X(as_numpy=False)[0] if input_data is True else x.get_Y(as_numpy=False)
-        for gen in generator:
-            w = min(abs(f(gen).coords[self.window_dim].values))
-            data = f(gen).sel({self.window_dim: w})
-            res, interval_width, bin_edges = f_proc_hist(data, variables, n_bins, self.variables_dim)
-            for var in variables:
-                n_var = bins.get(var, np.zeros(n_bins))
-                n_var += res[var]
-                bins[var] = n_var
-        self.bins = bins
-        self.interval_width = interval_width
-        self.bin_edges = bin_edges
-
-    def _plot(self, add_name):
-        plot_path = os.path.join(os.path.abspath(self.plot_folder), f"{self.plot_name}_{add_name}.pdf")
+        self.inputs, self.targets = self._get_inputs_targets(generators, self.variables_dim)
+        self.bins = {}
+
+        # input plots
+        self._calculate_hist(generators, self.inputs, input_data=True)
+        for subset in generators.keys():
+            self._plot(add_name="input", subset=subset)
+        self._plot_combined(add_name="input")
+
+        # target plots
+        self._calculate_hist(generators, self.targets, input_data=False)
+        for subset in generators.keys():
+            self._plot(add_name="target", subset=subset)
+        self._plot_combined(add_name="target")
+
+    @staticmethod
+    def _get_inputs_targets(gens, dim):
+        k = list(gens.keys())[0]
+        gen = gens[k][0]
+        inputs = to_list(gen.get_X(as_numpy=False)[0].coords[dim].values.tolist())
+        targets = to_list(gen.get_Y(as_numpy=False).coords[dim].values.tolist())
+        return inputs, targets
+
+    def _calculate_hist(self, generators, variables, input_data=True):
+        for set_type, generator in generators.items():
+            bins = {}
+            n_bins = 100
+            interval_width = None
+            bin_edges = None
+            f = lambda x: x.get_X(as_numpy=False)[0] if input_data is True else x.get_Y(as_numpy=False)
+            for gen in generator:
+                w = min(abs(f(gen).coords[self.window_dim].values))
+                data = f(gen).sel({self.window_dim: w})
+                res, interval_width, bin_edges = f_proc_hist(data, variables, n_bins, self.variables_dim)
+                for var in variables:
+                    n_var = bins.get(var, np.zeros(n_bins))
+                    n_var += res[var]
+                    bins[var] = n_var
+            self.bins[set_type] = bins
+            self.interval_width = interval_width
+            self.bin_edges = bin_edges
+
+    def _plot(self, add_name, subset):
+        plot_path = os.path.join(os.path.abspath(self.plot_folder), f"{self.plot_name}_{subset}_{add_name}.pdf")
         pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
-        for var in self.bins.keys():
+        bins = self.bins[subset]
+        colors = self.get_dataset_colors()
+        for var in bins.keys():
             fig, ax = plt.subplots()
-            hist_var = self.bins[var]
+            hist_var = bins[var]
             n_var = sum(hist_var)
             weights = hist_var / (self.interval_width * n_var)
-            ax.hist(self.bin_edges[:-1], self.bin_edges, weights=weights)
+            ax.hist(self.bin_edges[:-1], self.bin_edges, weights=weights, color=colors[subset])
+            ax.set_ylabel("probability density")
+            ax.set_xlabel(f"values ({subset})")
+            ax.set_title(f"Histogram ({var}, n={int(n_var)})")
+            pdf_pages.savefig()
+        # close all open figures / plots
+        pdf_pages.close()
+        plt.close('all')
+
+    def _plot_combined(self, add_name):
+        plot_path = os.path.join(os.path.abspath(self.plot_folder), f"{self.plot_name}_{add_name}.pdf")
+        pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
+        variables = self.bins[list(self.bins.keys())[0]].keys()
+        colors = self.get_dataset_colors()
+        for var in variables:
+            fig, ax = plt.subplots()
+            for subset in self.bins.keys():
+                hist_var = self.bins[subset][var]
+                n_var = sum(hist_var)
+                weights = hist_var / (self.interval_width * n_var)
+                ax.plot(self.bin_edges[:-1] + 0.5 * self.interval_width, weights, label=f"{subset}",
+                        c=colors[subset])
             ax.set_ylabel("probability density")
             ax.set_xlabel(f"{var}")
-            ax.set_title(f"Histogram (n={int(n_var)})")
+            ax.legend(loc="upper right")
+            ax.set_title(f"Histogram")
             pdf_pages.savefig()
         # close all open figures / plots
         pdf_pages.close()
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index fafcff5e..89a6f205 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -400,8 +400,8 @@ class PostProcessing(RunEnvironment):
 
         try:
             if "PlotDataHistogram" in plot_list:
-                PlotDataHistogram(self.train_data, plot_folder=self.plot_path, time_dim=time_dim,
-                                  variables_dim=target_dim)
+                gens = {"train": self.train_data, "val": self.val_data, "test": self.test_data}
+                PlotDataHistogram(gens, plot_folder=self.plot_path, time_dim=time_dim, variables_dim=target_dim)
         except Exception as e:
             logging.error(f"Could not create plot PlotDataHistogram due to the following error: {e}")
 
-- 
GitLab


From 03ef63016fab945148a5c9056562d85f690d2b62 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 15 Apr 2021 17:33:17 +0200
Subject: [PATCH 084/175] hist edges are now dynamically adjusted per variable

---
 mlair/plotting/data_insight_plotting.py | 67 +++++++++++++++++--------
 1 file changed, 46 insertions(+), 21 deletions(-)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index fec4ef6e..5a00b625 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -454,6 +454,8 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
         self.window_dim = window_dim
         self.inputs, self.targets = self._get_inputs_targets(generators, self.variables_dim)
         self.bins = {}
+        self.interval_width = {}
+        self.bin_edges = {}
 
         # input plots
         self._calculate_hist(generators, self.inputs, input_data=True)
@@ -476,38 +478,58 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
         return inputs, targets
 
     def _calculate_hist(self, generators, variables, input_data=True):
+        n_bins = 100
         for set_type, generator in generators.items():
-            bins = {}
-            n_bins = 100
-            interval_width = None
-            bin_edges = None
+            tmp_bins = {}
+            tmp_edges = {}
+            end = {}
+            start = {}
             f = lambda x: x.get_X(as_numpy=False)[0] if input_data is True else x.get_Y(as_numpy=False)
             for gen in generator:
                 w = min(abs(f(gen).coords[self.window_dim].values))
                 data = f(gen).sel({self.window_dim: w})
-                res, interval_width, bin_edges = f_proc_hist(data, variables, n_bins, self.variables_dim)
+                res, _, g_edges = f_proc_hist(data, variables, n_bins, self.variables_dim)
                 for var in variables:
-                    n_var = bins.get(var, np.zeros(n_bins))
-                    n_var += res[var]
-                    bins[var] = n_var
+                    b = tmp_bins.get(var, [])
+                    b.append(res[var])
+                    tmp_bins[var] = b
+                    e = tmp_edges.get(var, [])
+                    e.append(g_edges[var])
+                    tmp_edges[var] = e
+                    end[var] = max([end.get(var, g_edges[var].max()), g_edges[var].max()])
+                    start[var] = min([start.get(var, g_edges[var].min()), g_edges[var].min()])
+            # interpolate and aggregate
+            bins = {}
+            edges = {}
+            interval_width = {}
+            for var in variables:
+                bin_edges = np.linspace(start[var], end[var], n_bins + 1)
+                interval_width[var] = bin_edges[1] - bin_edges[0]
+                for i, e in enumerate(tmp_bins[var]):
+                    bins_interp = np.interp(bin_edges[:-1], tmp_edges[var][i][:-1], e, left=0, right=0)
+                    bins[var] = bins.get(var, np.zeros(n_bins)) + bins_interp
+                edges[var] = bin_edges
+
             self.bins[set_type] = bins
-            self.interval_width = interval_width
-            self.bin_edges = bin_edges
+            self.interval_width[set_type] = interval_width
+            self.bin_edges[set_type] = edges
 
     def _plot(self, add_name, subset):
         plot_path = os.path.join(os.path.abspath(self.plot_folder), f"{self.plot_name}_{subset}_{add_name}.pdf")
         pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
         bins = self.bins[subset]
+        bin_edges = self.bin_edges[subset]
+        interval_width = self.interval_width[subset]
         colors = self.get_dataset_colors()
         for var in bins.keys():
             fig, ax = plt.subplots()
             hist_var = bins[var]
             n_var = sum(hist_var)
-            weights = hist_var / (self.interval_width * n_var)
-            ax.hist(self.bin_edges[:-1], self.bin_edges, weights=weights, color=colors[subset])
+            weights = hist_var / (interval_width[var] * n_var)
+            ax.hist(bin_edges[var][:-1], bin_edges[var], weights=weights, color=colors[subset])
             ax.set_ylabel("probability density")
-            ax.set_xlabel(f"values ({subset})")
-            ax.set_title(f"Histogram ({var}, n={int(n_var)})")
+            ax.set_xlabel(f"values")
+            ax.set_title(f"histogram {var} ({subset}, n={int(n_var)})")
             pdf_pages.savefig()
         # close all open figures / plots
         pdf_pages.close()
@@ -522,14 +544,16 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
             fig, ax = plt.subplots()
             for subset in self.bins.keys():
                 hist_var = self.bins[subset][var]
+                interval_width = self.interval_width[subset][var]
+                bin_edges = self.bin_edges[subset][var]
                 n_var = sum(hist_var)
-                weights = hist_var / (self.interval_width * n_var)
-                ax.plot(self.bin_edges[:-1] + 0.5 * self.interval_width, weights, label=f"{subset}",
+                weights = hist_var / (interval_width * n_var)
+                ax.plot(bin_edges[:-1] + 0.5 * interval_width, weights, label=f"{subset}",
                         c=colors[subset])
             ax.set_ylabel("probability density")
-            ax.set_xlabel(f"{var}")
+            ax.set_xlabel("values")
             ax.legend(loc="upper right")
-            ax.set_title(f"Histogram")
+            ax.set_title(f"histogram {var}")
             pdf_pages.savefig()
         # close all open figures / plots
         pdf_pages.close()
@@ -815,9 +839,10 @@ def f_proc_2(g, m, pos, variables_dim, time_dim):
 
 def f_proc_hist(data, variables, n_bins, variables_dim):
     res = {}
+    bin_edges = {}
+    interval_width = {}
     for var in variables:
         d = data.sel({variables_dim: var}).squeeze() if len(data.shape) > 1 else data
-        hist, bin_edges = np.histogram(d.values, n_bins, range=(-4, 4))
-        interval_width = (bin_edges[1] - bin_edges[0])
-        res[var] = hist
+        res[var], bin_edges[var] = np.histogram(d.values, n_bins)
+        interval_width[var] = bin_edges[var][1] - bin_edges[var][0]
     return res, interval_width, bin_edges
-- 
GitLab


From 0bc63e8f8709635c338135aeb577e45dfbce0305 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 15 Apr 2021 17:39:19 +0200
Subject: [PATCH 085/175] added docstring and exemplary graph

---
 docs/_source/_plots/datahistogram.png   | Bin 0 -> 52681 bytes
 mlair/plotting/data_insight_plotting.py |  10 ++++++++++
 2 files changed, 10 insertions(+)
 create mode 100644 docs/_source/_plots/datahistogram.png

diff --git a/docs/_source/_plots/datahistogram.png b/docs/_source/_plots/datahistogram.png
new file mode 100644
index 0000000000000000000000000000000000000000..bda9896f22583a38a3f52e2a927e276199ec742c
GIT binary patch
literal 52681
zcmce;bySpX^fvm?NGlS8gbLCj-5^M(bf<K8mk5Y}DBU65Al)S*-Q6JF3^2eja31je
z&RV~<zH`=h{y94MxMt>#ecwB-eeJDbN(z$L7$g_~0ANc?y;BANq;>#696?6~-!X81
zRt*4;UR#QZDOsAB0Dx3jT%5U5j22PP5kz1oSmcRlsFK)sdg_2!JZ;*s;sTNFG$nDH
z&-I85Up{k)e-lR_(v5p*E&dcU{39ZvOAsSsU!x<z`?p#S%g@rFWWvpNN#4Wux6YeO
z&cjy@2$Y|LOgs9$jg=eQ=X<|CBYrx7yi6OB5#M=o=7O|5+4LAF{pH=bJbYD#{`kA)
z%|?BFv(pvbM;9P--*jp~Mp0`uyI&4B>aNpVL$gA6g>6}Lb=%)_Uv{XC@j7~aZ&1&V
z00VeVcTI|O7jox(^hK5D<JnBXADcWyCsi-?M9S!-+ObFE3H@hJsU*8!zw=W$zKgpd
z%at_CFMns})c5+Wjf)%~#2<%~jg<r0!2IO*o@~?$i97#cb8&-~zh1zMGe8Mz^;(Ss
zTn-UMu17T#Z5zsCeE`10V|L`EJFkK`Y;5!Z#BC8&T12(XM=pVnS&`z~33&jb6$GER
zcr(ByB`Osn!XLy2Lo`}I+>n9?y-0+~0+7!nNJULs5vWHj3z}bHCPs}4#FP7&AcDLU
zh=TsH0`J+6w`|{@<lyXxNZ^tF40%q;{f89eqiB`|?W4W`r>r!k$Ep}=Z(Wt}&oEtZ
z8W9&ip=O1S@Ngi(v3);1PDCCzLeWH;3Y}#|a|`xeJ$K+p#n@`UI@Sl53svkSJ0`f~
z62QV-O*npzi1)HnB=)i1r$~p#xKzYpqR4;LbMWXScfYIWknV^bz7YLDcooo@X+is1
zhR8I`N#ZK&vOsl0UG+^7hAZM^;I8<hSY-CQf?*3bZSvBfS`o-d;I@SeVI!dT)$Uzg
zc6nClXw0@dF;+;Rk<kF_P$H$=b2IilVrSy<kcNn7zvK)zD>O^6)X}Vi&)Y+c#w$@~
zWcH=^srT{Rp1{Z@f6=W*IDD-))P2H?tMS$Px85)8)jS7G565P{CB&n^sCN6#gcZ``
z#Y_H6=^MniAKPDQKK6O!_=)TjhA6`-iBZIHh!j;3-e8#iFU>VKW0aGpC#a68mx1r7
zEo16)@RU(4S$24K*mhp*U?<5<QO-vP_3r#_`TP1*?NsH|`%Q9UI<X?7s7;Dyjb`Y~
zw;2YE^dAbVN?b}#v>UPPvHY=Gj?MKUJQ@OWKE)n2m%JzMPk0|Cf6Dq4`tHZO%6C=2
z<ijlK<BQ(9W=beQ3VRAP3h_BU<;kp%T~?=6qt0>8QIUz6k9wZ*cs+)jUjuqn(s3N|
zX9)xGCGpLabRQ_Ouu-GFx6>M>nnkb*$<RqT=Df_kkgAcvmL8Ptm!6lAlp&I-lg5?x
zk2UJ!`SYuPKVc`<^JRGSWc+#bV6@;%Z+SQMugvCjJqZl4{7g_O^7!d^X#6sxzg(+<
zpTgVs9nvqvvy@khI!j)PjjF#<sFXYTu@><}zE_)0y-;o^dJHp<DyB=$P@UB~>7}Ws
zsrd%h2Hyt41`0v^mkgP7nUp?!wU6{VoH8|XHIiyMc{!V9@x@a=E~LoHht#9q3+F85
z3zc__p^wazjuyTdj5a&b<SikbN_?#~F2Sx=r4MN(ln5^+D0kJ|E8{A${Xv$kSL!Q?
zHuYvQY?8Nxy&j{b&qr;n=vpnikqXqOJiGA2dUCO0c^1WTm7}=UWWS1AKe!Nl81*&k
zBUJFLunR+&Lp(zkL;OiCNlzk+A`i=7m%Hh<*t$;dPXFfn{r$IO1V!SA+=9GY%2}ch
z8z(zKVr}wJVqsEn@?>J<&m9{@w>Pj5-noS#pOh<42rgkB;SOOw)9ufRnut1^+Eo26
zZTyBjqrp#cU(^!S(&9#vEy=8WC$ufHZSc)h>>&f{lU$v(qi$cayTjH~|2F?*ufU(F
znICj!s@JJcIC$($0~3WIpcMsWh{biOND}l|^bksZj|m$$%kLkuFxS|E4`mCWkY`N)
z!f!}yuyNdRrEa*cS8ot<9(Tj9cWvrAWIjY&=$R@im)D-(K@jjDM-xc*_$i<w=zren
z;pwI3EePd08#+IPWL?>uSzdT<S#RmA!DbZ4S-Q%5GS(>8mj4P<7*Z&aP-0EsE<Ewz
zm*S%3DqtWc-KSU}7N{FCTI_I*dGY!M%8OTfT6<N~%L|lEFO0f^BV!|tNQXcD5JM78
z5hLmn=-lnv?DSmaTzx5&;Q#rKt&{5qqw(u8`kx$5^nLV299DdD^w%P1auM=!o&z?F
z+_m%<uBpztwhKPZk~b%pxt{_))xOg;wpeSRvQqX@w*P7D<aTQKMM3867uLy_=Y~a6
zEA$p}L1R<-uDRn0!rix6gAsbq){~}^CX<e;ZmNbp$bO(Ph1!oTRhy5iel;0VOnIMj
zmE!Fb=j{IXtNp4?;XH;d_VQenysOS_ZSR4?yy8CP{^#Wro%FZ*mP_+#2>eYDg44P!
z#eJu_No7iPN?PJY+lHnKv<ciwVXS?vmVBq-t<KM#I^w9}8{*;JH)}e=B)6I;>tjm=
zElAXP)RGrQ7t{x&v-VYY?+V{pD(w71fFhBZ4*cx=S%=0=-mGP(tMC2Ufp+~{MQ7=c
zV&zgNgAHRfxk-l_Jy)GbpTYFxtFFhPp<idnElbr3n2O*Y92eU`eEE%Ey&|tqO%hF#
zdeXTsxzFIgZVL<F*H#vsjGyGJcj7P6c%=*^Cns;D$ZD^|N5m(^yr4O{wYhWFD7LfK
zBh4<|e##T)xZBN=)fSQJCb;Mv;+uC~ay^^urtNKnHc?q9Ht}qnZXaWpL}1IQEPwuI
z6I`RkASy#c$iVmQsc5fwXZKCGY=RT>pq@g5fH(DZ-@D=8={*`q<x5BByJYp7UI)Kj
zd-gP7JiZBbnu|RC<7=A@I{P|lb@KM^XDMfOcV?$Luz($r9<nNaJ@>&QH{7yk%{(92
zT{hC;EqhQ;s6B2u8Ts=QQcqSAHhLet6T@qx&aUYwHVP(zC7}><2A`L=IX8tj<JZeI
ze)jmu7%UO<QBYxs=Y_{d;gi#-YGFbeNJdOXf+C$fr#z{AMuuZ*1I&0ayf=Z4g~$x^
zHukb=Gj6lE%76BDk_F?bd@EtI{&mx3<Uq>0edO}Rfaj2)Z^EUWQR{Kb01TgEhT>WX
z)hAW^kM=KSb)T{83Ha<eWJY;yY)n{`@mp(*v%D$@oG4ACl;r`y>ooubd;)-L@U4Jt
z0B~gmfL%iX;7bGmB8Sw*_X6MtsK&CA?|{4e-<d7>G2lCo9Hg|I!Dsx#kE6uxF#uqA
zc&Tc-C>yy`Iyl*zS=yLVx_CO6Qa+$E&ET{I+vpF4*y49p7%YK`kC@PqzZ~CkuZXF;
zE7mQtoA5Jy9`M5b^sVfWU?<~nhr(x9N&3oHy;oT1e6Q|#!(7eRxti(Zp66WZgZ>xN
z$~l(f3;lmyx!;W^`JX&;^CgEnY*l;RQixGR-S%9`He%V1wO*YbAn%3+nU>B8-w~x5
zu<WW*i(bJX=YtA6%BFWGKG36K{MDOJCwu-G!i3wy!%=re+gn@T{kbN32NH@o{SR2H
zOtzuJcOU$&&j*%;Yc5W47+-h7q_0yxq4%h*Mn)vp_=ZZ<Eh*c}y_B&h>DT>zlzro#
zH|{@oYcGFyeKmi!$A5hjPc1|y{3p5cdl-iO%&q6yw4+7MhC0bNxL(*hQrv-!Cdu5-
zu;s}y=<v~|bjdZZcFGAss99Tv=HL=^>MBWNQ|nIX$fV%H{!ippST`Xqp75XO8{-qn
zoKyBo!fvEjU976id2<YysN%9?Y_2i$x7xE>XKuTUBm%@9+?tRI8pvZDI!{RzUvV^n
zM{~b|!UEl&bOji-vsSQHc+|62?D10taSoA~N7`cvZt^hl5gY2a-n7EjIpw-${%*kO
z*1skHW|jD>j1~mNj*>}<=zs5tUP<s%-=WPg8$CSOy^p_{8UK}sZUnD~PUh6dd{ZJU
zTfY@!$5!0s(4&6n2EQg=_?it?XbAth5=$q#D1Wc2kpo9jQirTtd7qf+i`inxrj;C3
zkE_v#szmH!dnYz0U*4C<1l!dKxAEL71kq}z*>k^{CrDp81-U05E1XEP1g!h7`$upi
zE_2;AZMu<!R4kNj5lYkscUmT08$HLc-ddtT7a@{z?^*xaA(VElpZU?<W3DBC0T=&s
zZ;*5p=7a)yH-wTjn<*1%p=Tf{8#j|J;7@yTB{Aua<L_sWY)*nW{BL=Fs18Q`K~em)
zA6CNH*pY5t8DlC==+DEbWtTm&l3GH?kf`J~?Y0u}M^%XUKx<a3@D24yqo*GwGuLFI
z(jIEtcN#U;roX)Ehy&b;JRjB$bukSg!r_~f0tnd%`bZwX-$X{rd2!X)#4J-Tq1Sun
z*z6lKYUcj!t|UxnqO9Z@`(Z<fr}@F+ad>cYiHGMVuIS=OcSuaFQguq>yxbBj4tE*Z
zFXk<aNJZJcNJpqS+9FP^kL87i88H3EYso<Tr=ovu?dc2V&@i+=6y}|)+$Z(zs~=ls
zFPM88Nv`Jdo>Lo$IvnLPp3w661CRg`L{+rPFN;d+e;f@w@pENR`63un4IF2=>>xMW
zI-0b9f2}m3`FP7D>G!?Vw#!}UU9DEq$h~-9bSAnJ+o)YQRR8Vj)mfj&G{zb_0xMzS
z1;S%285NoQV=wM^UU5S1J9{EH34$*|mHRDRM8z<EB53Wa{uT;WX|ZB(K^NC#ToX5Y
zI!hqwNmxufk=gPz?rph|_%xGYv4uqvir7=Hq~=uTxCTVyEpANV*skw#Zf_K<NVn6)
zOvd_)iWx<#RI{>m0wn0e1J*odZ4(CHaHDdk?^pGiP*N)S9G5onoH#<)kS|5FL+ORC
zX$PK=$ugjs)JZjXxQF%D=rad2PRo3Yw5a=ZoU&D!{Y&p$EcaJ>XK}kgAfFV{&sS=A
zD}P69+CJwk_Z4XuPw`2V#$+&938D>Z5iAussM$_gLz1n+d*a`F3%fk|`ukvR*lSV^
zEhd!p+x2RMNbjDC^Y4l2X0?Wxo3-z5#YL*mP|tXF-F9u>LeOjozozORgfU4x68Paw
zZzFoLLTNi-oGpHxwI1-!YEeGZv^XV<@`sQW#B6dyy(g0*n7xI3%}I6FXx+JkI<sL>
zrJJ2cmw{7-mr}RYO0%%z{Y%3#E<=_t_U~KFRnSBlVuVChxt8CPw&tqm)>x4)9YrRU
zz8#X!t`XFEWlCnS^If}c{yNEptrYvxQ|_9*-1b)%4+rP_k8O4_)}O`guitew?~HQE
z+H+9aD%g*2S*r_4vvK_#>_BSl8W>9_krdbZ;kZ_dqFFY1k@i8A)YicH>)*N>73*wm
z5*}MtxhC2%)A8_^9aLdAv4nD&T2_t>eKQ5Kx3s6<akEf@Ma5ara*6zv=yl7z@sy~^
z_j5wa2*Y;&80C{4IJ?r{d`DH55W}X6a^4xU_7}`5S)~5P^&_V-knBmQp*&TS${%0R
zD2a_kym8Ysg^*?o124X>XV^E~CriXj9`bP4=sDkVoE*L?Wnl@nuTWWhhYh#P<J`FF
zzF$9kJVkMh7>Fw21>{8{7+FDYjc`O*=@Uql@{zPj|2{U-3krx_s+2LA9nS09B<_2g
zW)-M`Vi{)|#O#R|k)`^CKK(cHN0s5k*3b^N*Ql#6Jhgg^XSE}UP4H`MBf<xN2mHGE
z!{g!YlIOU9I>AwPSr5aar9fdpLdcx#e8gOWPCCSM)1QJ8xu!||W3v8l=1No*ii@<E
z&b4f#>&~<YuFf-YKebQGv6)I6A4|eXh|O?<nF^`~_D-2Ojcb3cy)bn;%al}_-ESsZ
zetcQD7H-Y3=C}Q3h$~Bdqc46hGw?H-WK6)gWe=X$b-b({;8U4(hEA1erOzXZHv1X*
z98DB9y*SqU<b&8&*{|0v&V}v!PjrPXKe})uqh_r%3MJXN7Rt|sU|Ks?QdM&B>aDsj
zPCRZd5_<DjQLTPiN$mQ18WTnTbvup=tKRyUsVj4nb!)uwkM~tzD#|8@h9DXGghRgQ
zDO>=*WY~|+jWDui=c~R8L*Q!s`Ot?ql|1>x9~|=E&Y4}3xGTq7G8^C*X8dwEJl;Vx
zIo=o13+1FSUQ%>Aj0mwI@nvrzJs?>`9TSCUP05A$h|KkmoN6$lCW!E!smA=sR``uh
zYL~%A)gpmriah=~Xz-AW`|D)-F>^{V(nrfZz^dM_ob|&<DlfSe+NUhhki%I$qUz^3
zjZ7a26)o(C<y_FeRAZbCv1;LQ1P_bQ@vDg^sRTGjcI#KuBO3-jQ*tBHFAe)9H@A+`
z!PaeI=EL-kLR>3_ZGQTji-7`-2VV=iw%_R2@O|N%wNXEJewpYp>s)!#pZ#wu_`V#m
z`o6l@a}wR1DEK>ekr;l9)i%lX3w3+*H>pg@snCMiO13RyNG@~^Nj8B3*W9$}VETAp
z%)0+krXvrNl3y&x$Gm8DHUmD4_ave5hDjpl-;h7&kTD}w!$(S^T6v#YZ0o|)NG1KT
zA6;7XY`IAa&-7a;<>80_tq8WatJHMDS)+~bGDi=+^sSrEBYz$+KhT>}Tgn@ng>CCt
z&qOwJOUWI^L*gg$4xaA#d5TyPhw-c6nnNCCIFSto_8U(cJ@f9@7aVO1qCU)X<*7LA
z-yDRi1!ydAjUB7rGD9{Uw;I>axRIXmlhVuwH!-t0dGn~D4{kYkQ|Vo<Ap%t2xeWOQ
z31@_<o+|4RjeXMPItkHc@jVjUba@1co+hO=;PE|63_J9Gmt{cv_>ApSj;4t8=wmz=
zwAv8wVeIetr_mDGyh-c`WTiONnFv)mn$@Q|oPTJ7*}^irvn1@69izIw$J)NTCOY_z
z{71I*OAQEN0qJ*dRXyhR;l4g8TfSRYes7<w#8_awWYUgc>{X)TwWZ6=Oa7yf`tDI&
zbxPP`63L&E6qos(FplzBuKc6}i9R_mH#eF^&9ohk{ImgyJ^`1Mh+$+0_um)w<G<u$
zXTq4cf0%@g|Bhxzjwa|+mHNn$fe_8(uLO<UYji|HmOg%T(`>-f5)n#D=@}G%+<oK@
zFB5qk(~AxQWgvh}ql51LF92Qu!u^91K)Qd30QXcTxOuGm2Y^8Q@IVBiIQR?M@Zo{p
z{_yzUzWIMX+yBWD|8KMXf8X%`ob5j;{^y(j|A}uHQ@HjN0MxZ9@f=vp*^<=!1^P5z
zey_N-{?fCaO8Ic~6%F0SZ$y=!u?iFSy8CmGY;d9iAMR}b9KYuS3<-kzyb1u7_sng}
z#TFd9`WMBh5I!V;ioP8Pzz#S@6dtvhfUn0%QI*K}GZJP$!2%4?2?1axaqXnu{*trn
z&BJ}RCx7WS#z01#qNvi|a&Ly@&up=0Oh1qP>x%<v7&HmL@`HcX8yME!-a1{{@+G%-
zWnUgP*!FfA3WZcrJqbfGNa${L>xZY&Za&`<19iuv5|tRJIK7);WG(sg7dot*#o5#?
zP{)p5m?&b3Bd@)Gl6%uY<h5_?m*=zP`UZhLrkBy<MtMn@5&E16wt&@z2uP`d8fo4B
z=v9*3YnoWbyft%m^1&^a^l2T0h@N#`gW-n3n{^+GP3V(IiP9~uwq+jz-J|JVs4qeP
z4LJmE<F}l>3{}~-z7$+4d7ZFU!_dvEKj4h-hX6$9fLhHU7Y(B!iQHVEi0Z*yvKMMi
zoF{_7|KdkVgx{vO46AGlwQWK9&9a=;zht;(9d*Dw+Dr(&4luNacrUz}kA~N8!g|(k
z6w!e{AJ~$x2n>PtqSNAEOx3}I+L{Gk+hjjKsMCfQ3NYtkG^f^-bPLrcuHSss8nUiS
zb#7Mb=3WMKRW9z@w}^m^CU`^Gf`KBLE44hb*k=8{WuhuyWS<#BP(&cLxi%;t($|%1
zA19nXhY5!AUNYjLSHuxMSmw+Oc!!*feOQ70kGi_m*#;fzV|x<lrQ(e-AXYkuycaON
zW42JkN?g`|Uq3#Tr{U^4mQlN#60<IK$4i95`ZS&`(Z)M}9mJL5I^_j`SR8Bc)Jc~_
z$1{j@UNCXFY<MqCV*K_~sYuK4Zz=cHJLE<EH`&?1!MEq(rMGcTv>L5F35_osM(4bz
z*H_pdR5n1qJQ2?tb;S5JQMttSywt&PxL{Aer^3Gvv1#zo8R-lOReU+R;+IKG+G^@H
zT_Lq1l!o%u7v8NKr|F@Qtk11^w|EG{34Gutm5HttNEHAOO*o@Q4g@^;_V-q+@6FXA
zyM01zm_&Ax1xK>8w*Yw^IZ;%P^Vc^SU8_)Czd8a4JJXpt-4^3AFYTr|<kn{ic7FqV
zDZ|Rcj+IdiXH^IQ7sZ2}^xGe4*b|J})5cdR)F)z?w!%%*w&*yF_+ol5alEy+9c4>@
zT>JX(dE!ku4OK1=Z)m8oT-2ZfHaLM;{9Q-@7f)5o43=armcO4j77L7GbzbKz_GM1#
zFK8WM`MR=S$fO4x;lXcKyxUCHXrreE09rxqOawqP_?ynQTPF(u6bE5|?v>#@YX5+6
zOSIq?5vTwS8wscoCB&%xWB~dIB2dc!^~5^t<PpsO<&y&l#4tJT{qX<ZkM7A5<Y)RU
z`rpB$bD(W^@YH*MZW!r%<h>k2)pEHOo*Kc~`E=XK&k+eh0TKMHVhFEe3Ws1ter47?
zV}t5`X6yZ#N|V%2oDwMYsRj%41cv&?;JX3Ptp`W)|FrV|3=;p-y(&o&<loPnGP=KC
zl9!jq^H?+X`r1nY@gdI45RdItqoSdmPwCiOy1B*Ue-tk)DVdlm)tQ`_$m#2oQTT(3
z4_e(b1+k>o`oI9HummtX87>_kSJ*8z_lAi`e>|RFx~LUb`{e20dU1b-fZ;gt^XD!s
zp<PCtoSbyTSWd=Du_q^v*todX78X*@&dwyypXX2~KZLsy;jXSOnOx=Q9K9yD$GR03
z=jZ1|<>eaU;=v)Ip`1cOWiEV!p|qsvD&YMA6?DzuMx%3-Xy09S|Fl_d)e966sj8}K
zbloGQ`Sfo>MQm*BRAfe3Q`6k0^ywY?_TM7^E{##y*0ywWb2AaSr9%nfA>{tgllq^5
zi28AO_fvG_`&W23&kNembR*y?(iZj@kd4P^(5O;i6_oPk)NXz|Tz;Q@Z5R-v^7={G
zC-6l@M-oX-66vJ;Fkk!TnBccDZ~&F``cA~qa_ZDo%J{VvwBTi=8sE=s2}w8A;@q#L
zCVkWZ{|n-W01KX<qWRh6;b=1y8Zl5{{y{FPjW2I_VY=WIe-W>xX03<7khI9CCK9M$
z0nr|(yY<&JW{Z2@e-|~!Ee-9(q_P6VU+?uBh#$1^jn9d1l<~b?G@3+1+I^NZdX=KW
z=kGj+@n<H{xH56_HdTtsDMXQ1O$&12A&?H40pl9^UbR{Z6h^kCW+C_!Ds%+>;Ada$
zXpa*p_c~^kBX%d8G?9X%Ll#_KeawR|d)U|i4@O;vKUBSjGqhwqbyZVA<aRw<zWCZ#
zzs*l?eSO`^%4+Q5VBT{z{))T`ZwtO9+LPQ-o49q3S!7mlfof{MqQCD<1c9j<7?gv0
zaB}Mtl!<sRo#{!fkZ^MyGtG_H`$tRlYbTdK^F)ZWtM&vEAU<IB%}A)@=Hc@3P)30J
z0b9_T_>~Uh9+g4`xyFcfp~Ay12Kma|{QZZnRu=vxABHzRNgmmD3zR8co?~Vx5Od9!
zm{iV0qZuR_ba@_jtA@S3hhpuoH@MHQRBW+5p|JC%CE+(shqYodi~AFrP)o#P49y{u
zdF7H}rcK^>{IZ7uBF}VW_NsQK_^{eFVZc6jPT#v4!UvlyDY^}t{h^aVHqR|GQ)=_M
zps^;1$OC)i3)TaYo63c7=a=pUYiA68+s7WY-P_+Pn)larg+)OS%N;MzNlR*!OhKh!
ziE*d+*{x9D4^8bhW?=&j!4K&;mcWK+{CdB>o66#{S<>tYJ9@kIv2f-xjoi>(qU<8A
zccm>ErhFf0keZlq{3`J79m^?Z-|L;3=<L$ZB0fS{zGHs+EpVe?<tgKHKTO-&7DE4f
zSXx4D4}*Ls;}jR3(gn*gd1Ik#%h^cCD+Goi%N86(a<8*J1j^)Io1l#4-Y9MhD7FeO
zS+U$ZlagV;rcPLrBw<QbirQmcyAWFFcU1Z8D++jMCuPucRAtDu3^)XS&o=K3I?ZLI
zwMU#{w1>SD4HRj=u1ej)L`DRm(LZ$b$p9}v;}t*z5jHQR8BH?CsA&UTi=egwtcuE7
znM16Ci_1Rruak%QzDMZj=@M~nCtis->WBqL<I4<DPn~j5lqqw7SsqsP4%3BKlQxmr
z>_-(u*X)wh-KZC;8{8Ud{E>Y%8HhlPZ^H4YG`M$MEt+<)st>pwh^H+?n{ZZj45pDi
z<UR0I+P&%)F3joNv02p&0pt~VGv0&Ye1){WXM7bCtM?#27|CI0E`zKD2~`AG`fwjm
z0?x~x9t=NW8UfJTKN)lxBORMW3C6n{7=dc7OecQ7Ee-&Y6~QBHl?Yt=9MNHCK7$$z
zQ+NT>daQcA?R~dq_gi96gH=7tL<j(ye}9=>D~Q#hsDy$MYzAFs@^+tf8X+oQs}{xZ
zyaKo41$k7{cue@Cc0YpoZq?70{-7?Cz5>eSU{$J(obA06IgO}x1=+3?EW`lfvGOnD
zHw=FXRVq27=r}%GHFTAOVyKI4o%cr93iH2cZ2GipGQ6FMBYaLxtrBBpKUau}3&&x3
z4j5F6cqayOf0v@gL#HJSr^7>!b!lOq1cPQpdSpKPb#{Uz=)B(VU2N0}%PRY_uIMv&
z<CRn;3^ODp$5OdsFO7r|rLUkMivY|L93+E<3tnATbhX6Bmjs}umfw3w`wY@QTCn-b
z;@X^pVP#A0%qCR(zsPIg6cACwKW{Nz$QAy`Q$3NPdh$ggTU^a59J^<K9E{RaDJL!x
ziuz>Jpu2TC^gdf+^763PlWrZlc%m41xbMxhRg2Fekae+{28z>*i5URsmKX<thuJQH
z+m->lRZ<Y=8q>^GtXX_N)>rOMngbdywS8plc1t3(b*7<wr&Fvg+o2$yh&X`i9!~(7
zS4J93$FSoiKHrm;d6}h){hCzQc0D9F7~}*=a-m-^JLpRqj9p*%UmABAI&y6&a_=Yw
zy^i7uuC#M)0=ech__d)sm0c7ybLspr{qaJ~;bTOke}N!f3m(aVIyv=RyRv*^8nNXm
zc!!)oakgt)XO?!F=kP1_dC1P8bL-wGP;0>bR6IJ5$cY4+8%>Pk=Md987WMDuM;jHR
zCzh&J%$FdaT@B2a3H1N6t=K5;+Nfdt8)LD)vyVe?-Vx^?KBN4R%c2TwKbh1n9Uc`7
zT@cw9B>b0<rl>+i`WjnF2n?OcDxS3xb=g>dU2^_R^yJ^i!JbGKh0q<nKgv^pJNA+i
zs5#z?_HD}!1z(AX{(VJj^V*=0<DeH)>u~xQr-p4ljqH1fZF4=qsFn(%p@6}`5}<qv
zD7iLlZ)uKwp}Be6_C4gaI7za{3ym;dAA0gAmYo?*#=LhgS7C{u3$ozo3;X@$Ax>2E
zp6hr68YMf^Eb+XPyp3BcKM>jDS-wlv`(p$fw3&c^Y~6}bwo9WvfS%ksLLEvb^E5~E
zWowzd*Nb}xgZxB35uO6^Jtaeg7(X|C{o>ZZ6(0OtHW>7rvF4u}QI8;EgGbz3dO;|q
zd`Pl#@7bqOyrE|Y7K;WykM{OAATnqAj7`&*vr(y<lYq_Ap{@DQG#6Q<L|pHOg<bnB
z2kie&+`1AaIzuNMqrLpus(S`_m&@z%Kbh;zU0BWA32(ZJ@Rdv|H5Ce&o(2#>7XF<c
zb(K1}=kq-(m6%<UfwHah`CAKK1mbQmF+yRe=|*1hvHm<bf78kN4|PPBbQ4PX8Ekdy
z_|ADs$}U-znNZJXXsliJC219cwqrNKIemB?y!`HE$F=^yU0qKZV^Rql-0s<>U5~bn
zPi_m-5q})C99mT>J-ThXS|Fx)?-5wnMpg<VJWoYlSpaNKi<hoZJ{&a~YiIt8gT192
zgi?bw1SE;6&gC@sJFS1u8t~eS<(W~O)qyseA$zqK3k=2B9t^*KJ{goy-RES$%&+^E
zY4%&+j3ziYHMCBH(1Xl{g{TVtSe+Z0C9fu_rsbbKsFi|JML*?-ntL$_9_Zt0EOJsr
z<kOrA{U-g2iQOLkjsZe~yb^qItA~Ct{BMYO$t0l^jlPFyns2$Kj~mZcYx-zbbCj%t
z;@q-*7nlAoSHb|e-EfZ%lNk@8I?{bc25=R|%^?<69dV|$_~`de{5r^-%W)`B2eo7o
zMnxBw<V1mARFIC_P#A_d?A?tYKvXw4s<T5E9His1szHhDU0i#GSm6%K0X43*?cX*w
z;=C|(-W`eG&j{|s^U%|_qlb@aq#^3OP6_7R3NRu%^d@DwWdZxo?bW^D^gYPN57(lk
zoI8iO1L_vvqVEuil3IkpQk6#eQeTT;QgU^zC>Yh>=T!D-BRwxG+d$EkWJ{78?UBfS
zZ_4(}y2YM@gsI3jxkKyU9;9U|QN}>u>!6d|@|A*TEyUQ~6gq$uwSAjcw})95vN$gq
zNRrE@a;Po^Le<t|Wo5xv>>x&+)27h1LE(8`q{hbC8{UFLK<b6?0AJz{;vWYjXd_&N
zfOf7v>W#DWVSY4~vvEkM4Z3vgY%A}pSiUC&tLz{=&o67(3ovNr%}xLQOnMB2z>ROX
zTDOmewcN2Dd}DyTO#^|UeTBfRn<@>i--wmLMaa7Q@{K#mo>1v~=^A@Qnd=_j9#`xI
zC70o>RJut8k@%diQ}E^hfH2bb8yLE{UgZDW)s#V*Y1*t)x^&G!Yx<c5vscuC1|xDt
z|E*=rXyZNbZmX{5Y`4$N^IZKW0awb&S*ojVr4Hp_ZuIpY1-#gX5_^)F*pJ$>v7G(L
zcXdV2uHquZ0ODt9sgjw$W#;j}29nx)pBUGP$eg?GO2*9gwn1(6JQVe-uJ<gGy<ctB
zmIadXE@xO)!tN0N*)Jr}>CwUPP6xA|a=14<jni2cvz@(sYaw1v{u<TLynIEftM*6R
zaIWf?f393fVt}(<_F2GrUGE;rdpG&R4w1W$QAuP6bRKQ^4qa;QT1buPH6XSCMc4ef
zVHbFoy_(|+KkTJoxg>7GaDXh-q|P6f@vcyO3u0<^`0tCU_g_3MXrVc35%%@!=t)GX
zsI5*@6{jtkYcPV=Dv8%n6;fBvLr;dDZs*zsx|sMR5|RJY9(JumLaA}zw8pa1!*euF
zgYNU<W4>l6O3Iv?`%1m5PW>+Pq=ucVP8RjfpRc>ACjXM;fFF$Rm^rhR<krBHT_htN
zFkP26krtStUAL@fTPIuxCJcL7U#wiK57)&(L;xa|_Rjbwl(Bt_sItO0qyb2ENXH@?
z9X<8-B4S2rT<=sLV_5g3J%)KdI;d<d|EhPf)HyFUPjueGT%$+w@1#x5=#VP-U}c)q
zxK@6ME|ml;hR#ji4HxNA-iQuv@bWsiR`lWQDVILSzo`7wyg`SuMS#yzkHrkTf)`+z
ziN$Vr=iFKzD-hz~ylW2BIkb(;nlpv(S0!PA81x~8q;xygiX_WYPx7bv-lP{LHNTx+
zt-GE6UJmDrOH%e7yiu1G8=r00dox{MlHtqD-opa?lg_L%mFe3UlHSzyvYR3bw<|k0
zkh16Bg1~bGO>E37>mUUKQe3L%cuPy(&v+udf2(2YfM*Ztj$rvd-(ubJ6%ND5kVj`3
zgcv%wb%3Dxx{cn!zeuBZ=MiWh6?TdWs6^**36k2AALRk>US@$Y^C4_@K!dTfzo>u;
zQ{j1kD-@AfTGJr-U`Vub7b{!lTk%1v7&z)Ek&zvXEbQNXCYYH%`j&d!p80Akl7j=B
zOlgDz$qb}KVVxl&DEBk~{=H5R`FRcEv({;4xw%!-<ZsL_kQdHeY^j>b^iF*8l0b{=
zlOwo!_MbL<jQ@$EJZw>m*j(e(+PRBeW7h5NRxRmV_@M*X$6kdO>PstwL3#HX+1Sj!
zRIlIW*>kaqfWZht3lJd4=RmyNew-T5XP8TRyEt<31AAMfyFOyJUKO&O-amMacnp9t
zT-1_1dpQFxL}=!3^Uwo4A-3$=&5>xo+lU!d%ZBDX>onK8JP!doJ$LPxmOZEJ(E?bH
zVrvg?6ldF%ax2R}pPP4&LOu9M0V2|u<`Uj6(oBcvU4z~W{gb6+pu}d|dKSQx4=}_<
zJvF>ojc7l;a)Va_N}MGUnX}+JpsD9)R6WvOD2h~=>OdMP;VV|f-A;I&91d@-;2nKH
z5z@^b&VA@<2#ammX78igT(ebP^$Ber%=`atu~By$YzZJD380n+GuiPj?>(0fk~Oz)
z52k&ir1M~l{Y!ybn3f=b4L1-y@mMID;ai=&iD9|<z3vEleeku%(?(zZ%x@4B>RdgN
z2;yr5`Ej33S3HWyXTQ(=Tm0_6CZ|rV2RGk<Y|8G>j<Fe}nR0BtnF;+fphWU$@uf7$
zK~d~dVsFFp^G~MgmEn)e%#K!+LA2dkI^Z|34n~<F6Oa!jbxD({fI9jyja}I>T)_TO
zN~Qz4Fa~Lm*I>Ek9m0;9MW`=y0LsvViJk{+5X_Bcp|PY@UXa9ps1aY*{($1~hTPv(
zPEkLNwrTo~lwz@U*I>M4WfSv<!neV?AtFkk{AHW(FBv$Uau%350YI+Pl7+urVX^KQ
zUSayEku>D(Zd#4odHv59Kl(yGH=!td843X@Ummqz&E?3$`_bFk<pTBb{U&F9jv`x*
zg41f+n<Y^ZJWztlmp^)M`D?dvqUBzbY)+rlhJmv)iVy#Z@RImx+bvsoJM{HpJNYMm
z=6sj?X2^8jg$+QyuPpA$1#FQ9Y^XiP*xRy3@>*in+_S}IkN)o6GoB<~=U{Tatiq3h
zU3H4tzBID0k!UQ>g5J*X)UPE&v)j1RG}y3B+QV;;3|vd5qvTd*EYyw3Pu|9>qCXi}
zoH(vs;EtDQKSy0Lrf>RctD~LM8!YPExJ~cIqpEQ>eLGS%M{QsE>E?^?vdumJ3b6}{
zm7)SED)df}BuH>C6>X(++ss!9$DUaNml;;s&;J0fg!yLru9h=&;^!Z@KlP5zG_&S(
z)PbkQ(3>nWGG(YGw2>FPG4aB@$9MH$Oi{9A-p@`atqjiL?fWLE;wAR{7opnRGAHRR
zv|uLAoMhW!KckriiAS`k^OT;QmaWHoa0>UrIPD`IsLx&1sWc%-lA#<ewaHrg$LFNi
z`B~|4(}?gF>F}R|v_H5=px#k`LuuhuS>HuOj;G@^FT~f5?hF4A3<3h}Dv5ixgB>XU
zb@zusPzlWG+N~SB@Az)}rQBK$(pOSDU)*u+`ot}Ds|G-q;kT*tb1820WA!(i-LqI?
z6;m&P1iqY8ArJ&UL0j=5nAx})jz9AE`Q3^H1jH$QY4N9~8})v_ENeauA-DZQ@lN(}
z0JWu}T3;i%<xJkS;YDdN?}W7@LgI_;VgivClnl}CY|wRrT)7@JC1oAjI(I{YY{K}7
zv%UW^!<RRU6Rc^Fg^0U?zA*Nq`_XbPR$FvV3=7;G>Xe;#Hk$!+xCIs#nDr{{874^5
zh6On%cggyR5$dIxT(fsKP4yCv!Um7&P2Jf+6o2Tu>DY}Gz#0DTYAMKjnVSgqV7JX*
znqzX{x1sKNN<{PezPl52D2NEl!l3hYh8)71VhQil;=S{*H_&Su7in1So$f8Lud<_O
zbFLA6IA7b<;-dtY!FAJ`-0Lnu_zJr5{tCoqYeypIR0FgRSu$nIK#322=7VQ?kDzZd
zM9Fw`H_@%05E>fF3t9wa=)G8~{xOBpX@^2v`t}pCLV9rRjzD!kBlkB*70)S_q2%z@
z^DC$r1*UETS-|q@Qd>RU2=Ypt4@(nRh7m0>(2SJGwyUX~>0#WscM-*ZY&slnCAk>7
zhWxsMeKr^jMm1Vb36^06h#&#)=?T5n>pR;TPiV~s!_R|MgM-3#E9;;N#W$lwuM@eo
z=p>Nd(zFP12^b)dd*r`_`3utgO)zF9R3BE3zpfO(%YBD=MshoRl>bzR3B>b>UFBc*
z3-r&HYQg+@NMGnkf4THATzw@}JXq`KjqIcLXcu$HTQb!)s>L#3-d?X~TsCD}fRn$A
z1^ppti!^L!B)YP&aNWp2CYL|Z^S|5k$GM3k;mj4GZC)|4zIdnR2&-qZrAfG9SAAAG
zzSB{bYE^#44njYVXN3GtEyUG!VEmd~(W96LbNPvNZwa*0nzkm+ABx-2R0Mhmt&s1<
zR<|=KoYlg$e=}ZHccqit)~d;lF@Z1}@pz1&s{65R?>!l$Gw|tyu-Ro?j{g%6W>UM9
zaBFkWvnDKD?;qP`#5T@0Y_qaRgBjXoE<hRfLX+@F$P^RBunj>inEqF{KwAU+F@Xsf
z2%h#A0^ZZN<cm*6dVX(lUx3VHed%B^^~^-snPCEzrCGIP5oeH)wI>22Se`&^_C6uR
zS4b1|fgG$`3k`WkEPr+f<>}T|K&iK032}wTWa&elCZE)@Ycp42?CYU5PrT4{?mL!6
z#0z+_p<smbK<j}OPfT_H1lY||fc!ns>#v!y{n&_l0Ne;i_|LaAFT`)=vO@9F$qq(!
zt!B`#Ws8A)b1i->ynt0yMQ49~ngYT`|4K0Qbe#VlwTK;T0%^JEq>hm$*AY?FPy=_C
zqp8oE%F@fMjoQsw-eCJ65pjETC2GBhlxwHu%kQ*}FLi8q+z`uVFXSdc#w}L+4Z%u-
zs`C0`KId%8$W`58Of}4H=6UVc;tM%b{Zc^pF-FtdX0IE2DtLsqKUAcsfD_DxncA>u
zJ@pF;3N+muv5;3>O>@QQvL2>`idenx!S#hC@>}dS3T-d>DZ%Z~OZgpq<mubTZvI-D
zW9A&|2!A{K^CKg>#WNpFBxMTQ?&Jp}zCyx3nkFSDE3*=-%FAQZm>^}LkbvY;gg8kM
zCi(g&1L?**38+*5Q1;v{0|a(8bs*eoCLA=2y66CL+-vPFL28XlpZL2FA^O8AbR#Jx
zg`%Ah9eHAA=Fg)Sz&)+W<Wbyq-XYSlp4L}HCFtLDKI?cl>}Nb!)C%UQRFfK}`NAw_
zPtN@1FDzWRa%bfv*xA`X88Xg-1tWTTdQ}yb>{6XNC((bo25#-N8lm4S;i+|fuHrW3
zT3e6OVC{pNo+gH$F-0(%FB#iD41$Fa6K+nfgsFvv)V%!sIEDO0D{`fhq9S^fptqbH
z91d*%=3-dFaOf9B@ZcZ~DA%a2Pw2{;b7ki#dik{W0tqLwbxogKk)`Yl?8qJ->Bl3O
zxw~rxjt3DAeTSwpU}tAH1r`-}pOGHatb=8JAT~d^vz%&>7~wK<gxS*R_SCEJmT%Yp
zFlWAOk;8ki1+M;`VC5n&ZFk>We!a>vEv3FN1}0q8(eYMZUY;(d7vwXKF@t_AIQEcL
z3*tZUY2=Aa9rNMn?Pe&K+OOw_o>^LGYY1z&T8CO)yyKiq`2&M6OPHF1S%4w_kAHUt
z4gh1*;G>apInyYEDA?xG=xrjss1cczutqc<)<fkm0K3GT>Of!L0<l^P-jYMO*9vB`
zdxz%5V1eLJ<j_s7j__Fk$OWzwq5R9eve_Bh&pSHp&j-%iUKLdyVrop6^=Fi~9^|-9
zM`+eXww}Lc5dg_#FkG6Av9p=zMR5?4L!%p$7LhI+2AKC+L8<pn%}daOU8jBTCK?YW
z+kDfBC?1jvL-uEi^RQWFRRpfcseN7O>h$|6nj&wcJ3&}WRO^ny=OJ^!T9^L_$*|_G
zOu`}{1*u{Rf@~qI=H9&6BA`^MldYG3iSjSQnPD6-Qo1K&a?V7!0E5&wNXNCVcU6bB
zLw->~aZPdL`(v8WogWm-?0kP%u>K`Q5~euqUpP>S1pJueVmX8X>ls>d`%f4ZmK9FW
zo8e{OId~EwPP^Dm=f8~2Mm2mEcUqTyZxPY*74k~$rji1Bx-2!P;}v5Y`oBzfW5^~@
z-vWNLubCwn04;@OO!BYv7a}U^p5V5VW|flu%d5n-<xo7P7Le+HjG})b&4$mC9;4B_
zH3=syzuS(Sn!^5fTLR#zE8B%vOh-HpXevH@^KtEWVQg|xYbr)XY#YXZ`~}+xd$j(<
z38*M@x-H(GEk_78Y-qmN_JX_m>AUip!WkK0Zg(^Gp8)T|OMw(`0Gd~2|EO!ym2}gb
zi=ka*(z^skScFKEPji`z@L(Y`p9xhExQerft?OKuC5nK$2{kjZ8`YqX@&||Ky-Ub#
zPyZ9wF6+_A=Jm3X+B`zN=Q^^X^OG~MGS`UIQ~K?w6wyA%0rx*yyd=zgg0eCu<V|o_
zB3e+Cc~qN>f^G+GqY7R=bt_l&ENWlf)%JV;piDqtuPA}xIB7>+Ljrf_ZCl=f1ZFjr
zyhS*u{fm1b7l4V=h4}9VQO5<p(O+vi15=!)b%Jvi-W#<Zhz?K@8J#(Wxh$SxRuDxY
zJ(R>^5uCj#mjQuq*zH8)v$ZRc6St`>$Qod3zc}|DyFFppiaG9Y!avp|^Ev3bHZMf{
zQbj!EBc~3)$i}@eY*+i1e=l>8Yr8>}&7290koTp|zEAG9C6<jWU{csvs|~gN_2NB~
zkg)aQ?RpUy+@@)mhwI+5gC=^Py&DEW)B|)dkXL3a_mv8^*qTaUT*e|C#fzJs1)N3g
zV4`ik$o}87B3Z`_y~$q$ek5a)`Q_g1rR`l%)Rpn88A=Xs<FzsV(<4&(yI&>Vo*=sM
z_>JDa*s)(LN$MJ<d*o%FkD)t@S2=~~o&OdZfpZ#;^p9&D1v9b34P%CO(3)S1*#AP%
zRJKEz5g;-nt)?3Ms<)o-pW(mSdnVnKK$Hv|lmz=~yRCp!bKMJnx1(X}^zd($WYJUy
z$uPSAdlyMs=OwtC{@&(gcP+7&jx3>KHKbmmP9Q&c7imXZVyM#eH~@BK3aZwg3L~Kk
zC7u!^;kp{g!2ao)g}G(1($0ea7-HHE>U5SaMtc1`F>BddXaGnhzj@ro3PKrQS&%oz
zQ=ppp^&E6G9O>!(q^K>*+*?qfHHT17CEFV&u}m?I1z&;~9ij<vdyY4jgB+?~AP8Zu
zu{%|RK_q6giELp{JkSGXKBVsO@(fVD0L?lC9`Yy#(EpM|UWXVJb%M7!#spUW_<?@i
zyLqs5@jWCD+ZSgpS<F-TX~6|nC#ZA!)2Wot!3LC0Ry8m!te8zgs%X|h+(2FlTzE9H
zp$b2TDT(x{2jYHtp$YCh=(9xj!}<cvOe4VqxzW7y6gAvO3&&fTN+>f}?UBiAM5vU2
zy?~k83=4XX@tcpJO3geSurkj&ZyWcmLo<AKFdY4K(Yc^xIE)G<1wM^Hqk?~fh=9Cu
zK8)4S5BBK*hS?0YhywgvZ79*aKym6^#vpygEFtp-#ivP5hadw=uz=^6wCCG0xd>L+
zo75BO!6j%l(xz!3A?>Mvz+ASq)dY17F@lHxY$~QRpxFv?9y53_fWKp!<;-h7D{X$@
z!b0?ZkmQ!Vf(}B(2wq9jX!_5k+k)J(u@;~HeELJVATu)7iUzgPY5Rdkt$wNBbUrU<
zGJ;UeX_TEU`!G<IYv8bmsUBOos9Yt1Efnk-2zgz@eZZIkc~3Dg<$5#M86<y@e7HC9
z_I%3+4={vi))5%C6_w6z>S8@Ct+9MjE$nZhVY_5PQ<gfI1nAk#Q+T2UoJS03PBW0c
z1Rb1a8-XGFFFybHQ_cHRq~!*=Wc}tO+YMW594)4HUWCh2Esz71P+ebASC};DEdz!#
zvU&F!LH;Ry^o3v2y|z4R?N;|9!WafMVIv1}ox{0PpDl9EEjW`4!qfEWrhTY1G<Nzz
z(hsag(P~x{>JemB#OiSka!K8*X;`<hc&MRy%dwMK72S6!&C;1%^oeeqzc1^-EYN4*
zVhe1DAe()k2Av7(H>Z{!Hg#*L15ZJfM^(pdD_4lvL74bhJs@`;hH&?3nF72w9Nty-
z?@~_*P-S^{c$Wd@Eo+ZDZEc$Z774H)q7EMb{_=pV3dyOe#lev-ePI{_1fgCRddCUk
zw?o4UpP&l2yT7J_!{v`FEY%z^2yxn$D)#Dd!DhgAD^}=jDK08Tt#Fik#?YfS2zdQk
z>7>qN)w3_{Pz{|)bL6~Y4QRnVG$gG~r}Cxsekh@AUI6w5+*`pM5YPt;`%_br(4zvn
z`t#8bM>jBnD!8W*kYIlAO*TE*kSZ*h;e>6f;JK{Rsc}v<EloG2&onFC(|0Ac-FChl
z{By6Q?>XIr4KDa>bxS*yjinz$j)<w-lZytU5JOPKIiEaq!Yl|Yw4W^<Li1-_YjV-Y
z%n0gL4M5;)_yWA($=ijg=Y2^&Hp#n#t>q1bGx6B%gBzp)Ejn+n)B+jm_E!@gQ345}
z8wvqjNDi%A^ye;S^k7DlNzAg8`g5v=F!@G+e=d8cRRMaPD_7;+@H}6b9|Rh*%HkFb
zZF1o5O`PZAG)(p~l}nKnKsR(gSMmY>GXfBhkzZyu?+SJ_VB)lqGzs3wL^Bt5H<s(7
z8S-v*|J6Vcs-4$IOC{f5OTe6YHw9|Q(wAa<R1JoiGPKQF#cuVg;dDV+3sfL;;9iI4
zub&p(4&_Fx!ywBXej+48OB9wf>Qh_vU|IOSask1wqFB<g$L=YzTA<kpJv`rO9RmAZ
zz#v>a^r&Tn*bn3rkEUe&-g>-XX}kH!$IpKy56&GCnZH%C2eNs?-uu5cszBa&aZk8m
zU2n`GBj@U3OG->dE29|qph!&-x_tnNA6+m1x&!u~9Kr*OQ1>p2#aj4&l@o(bFX~0*
z^S|?Jv@yM!Oe2+=B?SlHT2JH3w;2qsz8v`miUce>fRQkuF0O85N)lgVs4)Sw$wr<S
z?EVb_pntToSRHg7O<N_PNs-U#B#e_ET+Vm2Q3FUR=rExCRX^Dp3OL>%>zIFWddJR}
zN*e4$DK>YZB@IY%v-{$F1{S*a8XNrPtS?HZFLMz=srwhk{=6RmEzzH=aq#ETGmt`)
z&v}yw6>>f(r;=ZW+*7$O<)OuUT;04@);NWwynpqNnuT`CvJNd4&B<W8W(k~cKs4r|
z(#NgY*PK=X{yz`uLF^NjFE-z;CkwuZZ<oH=yB$<Uh<7S(2iu@Fy~)e08DcUK5RW%i
z2M8tQbV1i^U6|XhUuX8M*y?kE^>K)6TivzRP_vq$vtETKdHOgrA}e@ydP#D>;p?A!
z)~61-Z?$lovF+X<%-9c>v3GdVz4a`N0u!Z;(tB7wREIh<|C<zKD*w8;9auLg`d^qR
zfLs=heK`u(aQA1cvpQklI0H5iP&r$_JUaI|7_QVDfUgsvBbP`IFCFU)KtMk;XfDul
z9C}k_A~<yp)*sXmn4xfgc=d!@k%>mBcZ)Y=F6fSjIQA(b6(O>ycmdor_5?GJN&`g<
zJ9h2gq-O4sEp1>A$bACE^BbtuX8=gYGr-F%LJywg1ff%kCA@s+#jy4YO}Rori)DUi
z$3APNY!cz#4FX7+^3l*<t10I$(2eF=k$nZ*=u0P-Iy7W$Ynbzjno_sWKv8>|&>V!8
z&fCBi1+d~!1)@f0RKqs<<NMN_y>%`2b+k4I*j&sNTy*|C01~X-cJLK!7h5~==+zEQ
z!`t<t7`3P_*kXRFpH@@`apt{qO;4yb2P5&f>r8)mr!LZS8>_mDyEv{b62l%b@4d6N
zLAtuQn{KfA%x9Ya>>#6+PTEWVJ{wSWwMJ_*1U29QvBH3sQFSM>+nBmMosiUr2$qVh
z4**J7N7u_zCWrIfM^D#540B)Fsk5CTw4l>LaSJhY#mzaBg$eEjNQRtIw%Liy>Oidn
zK+?nf)cMVL3WzzlUOFSXfT9+6<heD$J@G>gdfZH41z@Yl5mwt`V_Vf7YMTwXJ{;8$
z;znNxdNlUq;#(Ovl=_!X)lJ2Ikq1o50T(#9O<c-Zf*lR}=d*YZ`}IA+$v(yGyYHM9
z;oZ3>h_-a@tWoc)O;lPsl)+*(DYpW(6KdMtw9CnflcOQ~Xc)_6<!cGp;0=|J=+FY7
z%Ee|IUjPcYASc+nbf*!`rww&}X{r&}gr+=4>X+gTPbT5yaf^vT2AhJ+fg$(j9rsit
zLmT1t>?k={vOGNRo7bV#in5;h-8&BQo%dlL*6FSVy_?({c8msN=^U$0a$1XD3luzw
z*rwnMg!GeV0j6bl*r8V3=Y&$PkN8%m_@lI@+kE$EQmMc|!!^)5gt<r*-i_V^`se+t
zQUry?{Q>OuAgp%!*TCJmgB0#=vNwF=1=s<DcudmwfsJ=O9e#0@3kQuq_LVab|92VD
zRz09tho4GXFk5UE7$B7L95w<G@}cz~9CX;|-{&;ww9b2e3wl{H70+V9jr9#ODFUFC
z1(Yp30Xf7f>i{q*N|h4Rah3v0(9xUo{kZBH(%BFw_Au%MJD*fPkJ4{?9?(|-q+&U5
zEc7zK<j-0)0JIM<w;$vS!KkopdV_8)kT&W&zht*~B<TNN{S!qYcg*O}gkg4`Li(y!
zWUeQS!vk!gQoo5H;fxd9N`EwW@5<@H-X_BEhL}vEYw(Jcn17E4Pv;~b43FKAto%Lu
z<ejFzq|4+QH-+{{tizYDzz=#I%<VrQjo3HSsE;I<3GOR#t5jF!=UfH|r=mZo1<c-Y
z-!%yMJ8MUuO|Q#?BLvGAqjT)<&g2#FlzvayqRcGJq2DXsX|0I27E%StCYoWAH&Asu
zZ4h3Gb9r8g(Zb^Je^K@3@ldvJ|M=J=OC>FYx?2=U5@T0slalO9m?Ha@Z7?xZXr(M=
zr&V^c48xcyA-lwkZ7h{_Y+)?J@H<A&^Lu@t*X#b=pMUNd*LBT#9_Mkqw~LBT^CiJL
z?NqT8)0vM+S4g;e0}GF!L8EwuYcMPP5uV%QXhLEWH);{FIGp<t+4t9)hDVDl!YarD
zfO&ET;B=+K-eQMl5IJzt(gGd9r<HG0>`rr}w(1=J53$rK(dquYj3S(G)1<~P1MSuu
z4{^h}3VB#*$yY%wWx<k07nsv`ck%f9<<#LNTuY_S;5J*QMxCaE?eXGa3OiNPd3>7*
z{fhqDn6%}$dRG3mS)X*<zi*W~O{HXJ_CKtxxbFQQ8@s1dLT(#eeiE{v8^b~r03><-
zsn(*RVewK}g7F|xXR2L_{k2!53N=U%BfOCT-*mBkt*X(!ccsZuNNNZ46XAv{aCkwn
zb^M|c@_hd;S9enoIMIT8(zf=x=VYeFXaDGxc3HSRIA9Tn(t$eXi(gG<DcY#`fPTCj
zsJTIe7U!=%&5b&8j`cnEfEF|>lAuz!Fy`+eKC<6@gchG29$YQ9Jjl2>$jhu5viY8n
zPKBFuz}|}#<^hdj_})O_#}C;<&;0u}4k8@pxJ%8i)7%O&sm|EQX_arhH_zx=eK0nb
z%;}r54#v_Solsv;Q=KOrB&$xmDMZc(ZvlB95Og7jC-&-$5?Wp1v_U#Ac5@VTt$#W?
z62WJhr4>KVRt=i69PgF6u|rZ&LSSX04hJkD7P$w#4;qqvd(w7etp`4>nJ;}->1%Ke
zlelrM#k=X$maXud&Z$e79*HqukYtSlsM0eYz;nX>%$e{|HcBcA7Fn;Ph2%f%mpZoL
znXT(wU;bRD?cZSAXyhm~xDX=G)>E;*BthOdp@^^%tdbjD3(Rm@4P!|mMTV|Ew0AiK
z%Oc^tXm($tcjo(%XMDpS&SV4kOQjiMUkAIVoHZL+5iq4#)M0wl4Uu~va=~NUzrfAT
zhFlBeC{SQaoQeh;RQY!#XJd}umG-6fTQpS~c6VJ4JMl(bZmODaDnat7`s?l*9S$JV
z4uVm^_%Dl8FoV<aD9?!KAW%25Nw{Cj4d)lVFhBJ`G^QM&Yv&$jm&dWbf|sV1!MTA6
za96E=3}4VVn!5lJ8RS@C&F{O`I<UiE4VixK^&y-+{sZ%aphrjl{qOnNf!^k`wrIQB
zzDBAs#^^>$F1|HVo|E-7k0O%TkP=a&0;ClQI7$n)i&XH;&VTN`+r5GQJMn;xhF9S2
z>3D_zWnCrVSSn3|l5$vpRKso4lMY8k`6=AlQ#fjX=Byu0dFI>gox&l^0YNp%(@l|!
zpYFPUBM^{ok~29X+^{qs-)5N=&Zzu#5AC?kIcR<mfkjt--9lX5DcI>Xx2wL_T2o)1
zDA=`<ev4tBA(%CwIue(_t+x-_9ZMrH-<&Bgi*pb{!ANjkk@4f(Gfc`<)RpSJ4~Ory
zy^t7dKOtaQSGm2<%?gRT#txt9MCsPB&KF@9A(ZW}S&DC4u#ivd0X-AmmcnCD4aSpF
zdUCe|079U@3QBfxk%r&@g7<w_dCY2J9ShhXU8E>P=T0)*LKp53bctyJZ3K;dSs&Od
z`A1Zgme}=#{L14{MFN5EKfjEDp2zeEH2C|s%?f-iG`k8C;M^T7$IzLTFa^w&zYYp@
zIkPf4bwc}w&d1H}Q#*1Z%~uZT+(1}ke2e!@?oJeh!|D&hmAeN7f#HMa|GULy!?Vf;
zyV&bwYO1%K8b?8NrjEw)?;pwJ(ce6gV<52{elPn##m@C;83oxTk=0as77cm{iL_e+
z&>)@&t8opDt+(@6giA0#X_>xB6-J~nqECFBI@anS;eQ_f_fXj697iZ_rN0W9>?VKx
z;}K(O&zE|3rSvTNx`3*&RT0C5+s5Mq3uMxc82E9P#+V_@rFAL<{J`hlFk}>6CP=)c
zmmm7?`4_IJ?usc1r~Q^(>-|0tl8dghijj5Uey9Wbc|cO-W7iBaG<$sd2yXZMx>QHf
z8FZdd2YP6b&FI6pM`(fBxVF_|EqbzVuiT{iajTV53^TV)P;XxW8_bwcSIL)_HU54T
z?lC}poqd4+=)sxVc9x#~R<%h-#vEb2Ec@500$Yy{mbaR*wii3}ul`&Jx6vX7?|<u{
z<#p~|#7qa0{dyUgYOhTtm+kmF%>hVnFMc{_2~eofekJuJ0ygr187aK*ghe_>=$ZP|
zr?<@Dz7IwWylRu61v`A5A8U5~4)B}jFOxRZ2l0>;A+fJ-RF_I!*raDk7te1>NK$~_
zV0`ZtEPLHtk~xz8K^k;$0okOFZILT{p6_oZVS3TrQ@8E_*qD5N^CA&i87I_Zgo9hv
zXCL*K=XP$nv;On33~pP9Zu3o^wHU@ba**CupZ9O-@j>Smla*nS_(SKv6jk3%qpba=
zO2zB^U|r;xXyu!q)KRamGf-8%77>2&A{V()x#tlrCcPXYUy^5OyF=tMYY6IU<Z2zx
zEhOF1$yFMs9g(Hy$ld0u36%#$n46~49@^vso}g2%{c2B$abQdNtM~O2R#{t`vETJ^
zS`~+lG4pyfc}i?bmxkIE1UFQl7nctm5;5&mjJms7J*-)poxG$W9BcOQ%Zl!I^J70C
zO_Iq(zcrDK5_GD|cQ7iF>UG;A*eSg)%Qt_+uJM50E?BR!MK$H|zpv~Kfzt<C^Hqn#
z)xQ6|^xS|N-(1aa6n#3ih5F-bueD3*@Y~aKxwyqs*(8F(0XQDs^OG3HCH=%N=l{E5
zw8i@IVA70>KSe_BHS(rfE)}}IwSZpdYi`Z+Y>lq@x#^6G|3Hft-Jx*mx7yyoVDVX(
z==UsSK&%CykJw$5M^U#g|I#717MNDl(TU2c0+{avG=5b$w9xx|9%=IIo&^bXqKt6J
z6t3ns04Tqko-mh=xZJZjh=BT`pzW}K+jq;C^z77E(w=HD8NdIQol1B{@B6@)%RkKy
zAe7F>BfCTzPStKZPTt7l_b8#{i&!<}cBxq&fO_1uhwQf12hO+s+X~n-0yG9}U)jD<
zvYW?9UqC2P)}YQye}M}+CHscjvzMWwu|5$G5V%)Xq~1{l+?gzAl+a)&K=<UA;+-nE
z2yI_7UDx@?1AHwnhteZoD*IF}{>08;-5=|f+vZ)4;{s{wRk5_kQ0W-En_MY!11nAh
zEUivazVJFoSoD|9NY6-+K3-v;ee&0Gp4RyTFDHdiYZ8(M$M5SyAkQBni|tI2p1OVC
ztB_6EMzP%JRwsWee<j?d_uf=TlcW0nA?+aexu<PByo-JIiadpF>3Y+MMhh4*k~2rX
zv58&oytsUX`E|x3SS~m;pzth4esv#|yt!Wj!0?5@v4vyrChzl-Is>Az!{Ml0S47JR
zTqBO#EYXIiuN$ew+mnO9*vJ44@!dVh+Py#vvv}Mkf(_AyDm7BVzZN<jHNN4GUeC%y
zW6ev?TBtyqdl8Y^h)FINX@68GVz4Q-GV6ij{ZcsiL%R{yCV1tJuixi>PDIG_{}K|7
zf`(0&0<YaE82sz~vpefH40lqVaXT%d+)B?R0ocol+GFyTzEYIN%#jHAvwdJSQe3T8
zhX;X533#(16+P{lPt1KYJDA-qUrI5TeOZgTQ3u*U3W(gN>7yl<LKC7GHsBHBuXHme
z*Dr`aKDy)99@vUvHKg+&d<h54{2v(v?(bz6cfHM=xpInv(499%5$oIf2_Y%DFTgPc
zWHK*tC2v$M^?*5?bhxNK!n;AH_r*_q1?%WAjsn8jD^<QEkUWE~B+F&C7DQgfm)R<;
z#G&!l);YAJ%Gkntu)`p`Y^y--?N8uX7|y1V-@gw$=)*N9K@$(Tz~z4@Ae#_&<Se>)
z+M8q6yJvVckPQYt7nbof4RqQynoGCO%>;HY^1|OIcewTtl0hM-PPV*}<BaP5=?pC(
z?YrQ{F8Yd2+x$hNcd#7K(!JZEiLc!rRR<sgAGg)KA93-|J&|8wWM=Jc`K3aj!@=u<
zXz1v>xj02JvcHVC(pZSWJIyTU>maP}!jCwS7H>6M7D)~w`${1)cq%@1fTQR6v0YEi
z&vx9zCCN1<*0D@}|3N#`)MPL7q$I9}D00v!H*)GC;>6B}2L-M@Iej|Q!N*e*Q6#vh
zYnwFLdzl{yeZi;FiVCwwcJMA?wrukIFwN~c<;-uAD8Rp4oqW^K;zlzfr_;nypx{sm
z@=nHCEtq`19dd-StZu)>>iLlV569CPUNkqCkDp;Yd6gwyR5E7U9CRlWkuT`I&7|sB
z8_T;?shOG9o9L!P{CM&Cr|`38q`(a||9AJ^JwAWinDSlq0A&SVGfl1N@Vt5fq=wB7
zrrLMDtU5eLJnN)D)xxaK5Krf<KQ`|)nle19|65Q2qgSz@<<%47>qa#R*(^9$`*oVK
zjYa&vDj6og_Vxl;VG?;;MBzfjKwG4s-k@}swdu0<WAJNHOTHK?7`#8B#!oZ&7^Vm#
zS=s5cU2+e>{-<au1IKs4%`Xk(l}pY3>?d79OfNl^LSV8<8V8+djP-EB^e`;rB3q*=
zn1Fbd<GSejBCvkV_&nnAMQ2KWRJ@8+B)L>q2rjQ(Th)H**=aRBtm{1V5MbDLh3v1v
zXThbr{UKZ+UeGYJDOT}9S7Y*#rgdU=1|Vwz&7bG3Sk<lcVq%h8K^}UMP|G^IKHLfK
z8-_B0C`feIyNZ7uGroSmE>UDqnte7=-ZGWoQg+xWP$`|!0Lorq!<3y^x70+h>YvCf
z(s+r^WtpB31=eeJ$E_9$M@}B@Ni#LoO(ieD!?@?Mu@Y{wG<Ue<BtZ<MTPZ4I4gZ&}
zFcbRdz%f1irzz>lY1c-~@zwe~aqk_)Kj+j_TmDdYjrTKxOc1FcRG^XZRGu!NEuM7q
z#JfEObUaYu<So-(u=c9z0%RT;g-{}kxR)<nr{e&lpQ(5^;x;{XFzZL72jRo2r3#u9
zSnMuC3O2SZYA)aNaPx|vHw(fzWqFqWVHdj-!13A75b9cGO~5J{m}ah`JIW{-_R|Ft
z8?G|}djA#Z8Rsr}oD_YI|6lIHOsI0I=r<R80ghP?#y#P(+We%wA-##(CP@pVKgo+G
zO=A;%YnC+ABd0bSjpbPj*AM77v#d2hu3W93CcKlMDYV~u+H!wx)l}*MGZt~vdjMRL
z;2t%rQ!ltvkyoX7NQd##A^Q-52J`g&l6cu?U2}j1<T&&*ZE5FXRybzOlmH1p(*rWj
zpsjH>*<b?oUfk-=>yDON`X~c%$oI$a8`l6iH=S0qPYC|D1l8iau_r$NC!tA#<O1WQ
zCW!b6u6ZKvdA+s~BUKdTyZ^>JW~NKP%zWMkbm)xAPU^j-gxvNA-m=a{eg>ySP@C;m
zfm1*P3fPeajY!!SN_xslmLKvzST1N<q4N#dpq-HkXH}kpmxmnsn=onSY#>M4d%xr&
z43r;MUOCMBd3-1<aZbXNyUcP+Z8wV`H}4!XJ3b<2x7U>gHHVf*7H?BP@FZof=fzF=
z#;H7LtPDlA?<vk35a!pt_=X4sB1;=?$4<L!UUEhb-3yauNasl<wVcb^F<6AeN%oUd
zt+WQPE0XD89O@A~cC)+&FufT*){Oi*waDQlX48!Ky<#1$ejUM*3B107r#NA))kYpV
zI8`0)!St@(?UtSuGmh}lXLeJ9d>#q&ZVO?;YzRt|r6^&00&bOgV2$PIb5++c?YL_X
z(mH8qI4Mu>#k^_Ozjt^+QwCw<KkuP@m!xV~Voj8nYTMxod$d-Fw3ilG<@N$BsJS%I
z4kuSCex@pL%3u+t6gJ6ES)LR&qGlY~#iQzN5SkDp%$FlfE9L5Oa)FZ2H5PcWT?L4#
zQTGR@ZCyjZL4M#J?p_9JzXzhK?&mIMX^P+9kySaSrj+m|8sMtevg`7YzFSLB5$KY3
zhGZ`!tcv=AbT|@6y(Lg_DgdXO)&KV{Es&y5ti@mVJ_i-Z1d=c|l%ad{#T2rzB(j+I
z787T8)tS3w2~y`Ry*^PjSULh<!s0uJfd2?`#FXN(I<lfOnr=a5Y^WF>*k&Qkw_!k8
zS{&EJFg+Z3Xjmj46z<l`77>=8F&mosRG>Q)=YH|(?c9_K9L2)C4q9p8^1UJx#EWDe
zezCVmhtHruas94z_5^JmjyxY5DL{wV{8P(6ynGiw6}7ewSC@kv**VtH7ds<w|I`&c
z3s1JmwuyzIcPylQ5m#X|+rjz-OkU}g^y^I)l#~Nz)?hiyB8tf(dSlXgjyUgha%|up
zYyw<IM0E3!#g<s6B97|}>07n9jfkRviJ#oorzM}g_;GT7&h(%r%~wfl67lb{#q7a&
z3O<%&?FG&F=H6Ypi2g@x%Dvo7#`kA*QJbs1me$SJq*QK^>;*}>-P4a)rWDdelpv|0
z&T@MWYY0->1J4CuPyl%?m*5<n9`0z?)7w*!zyUld$zH#SVe^=8_TnEap1vJDZdMy-
zn=6!c{`ZcaHI=<K^!^$B9^8v$d4-ExeSBqBH{6>L`<eo%&AudxPXs1GkwiKo^#A<z
zn1&{g&_Ck$?_3M{$f!5~4shqfMUK=W&9O3NZyqV2<X8|i(wlE#dD63m3D}JyrwnPG
zC`Q(}C{~PP?@P*SQ&hb2St^+0-+%f%IL+<F)p0iR#;dJG?0p(98k~HI?|13#`1g#i
zgOh@O1yJmw)vtk+2cNn9f6wJ{^qW^`59#}DeXpDFsxn_K<l4><W~n^5d|e-K0wP&_
zyY$Sd-n8y@M_UElx#*E44y9}mT*VVYLa*&(sj!qD*x|L1h^RXpu-C<s&@%5+Ulx}L
zcJ*zI=Yzo=_R3KiD#tNeptQoBl5bZQ&%BqGCW%>FdVeRtkC}uP_dIfeYfo1(&^=<{
zU2XxE7?JL(ES%na=1(qrxF2+W?-Y1$Q4`~hFVTW&!5Ms4x1KI|%=%T%sGO2I6V)8~
zr*B4pWE6s|VxatvTmy|bc=oaddiFlNQ(E=!(YX4<+BCA49u4+Jm|Qs8v@Pcp0BySS
zZnGmDGo!`2(Wez){3D<1ytr}^GB#$WemELI+h)20mbDu08DBv!I~9O)aaVhUiOQ^d
z7_0*Ix1vCl3+@PkjaoIBbotO+f7<0kecP^XgIlIO?q;8o(m-*-s(l>>wo+*SEty0`
z`D~jffATmqKLrD53v7{!6G_^FBHIR$C#Ypb?b&l@0j5XER(oKxD0ofaTvcp$z+)Y*
z&SBqe!Q&c?U}x=Wk4A5><|gn@<t~r7#IPC^0SCD>3V2gLIry|W13uZ&SL_?~nX*2Z
z%IB%VH7I=$k#V>lEq>DS09&>P6`K9dzI(l5P(+c-s&#6zFmofa$G>g8J_c5{==Y$7
zV&vkm?RYJf`H`vprvU)FJDejD%u5=!ttb>>r3wH)1L}k&B?s|Yjt5p*-PNlmD8<~>
zV{LlhWf%9bpfr-Rn(xqF<k}UMWrRWd1CCQQHdT!-SnWqR<92(?p`5DtT!G1-3??B=
zD0s>H#;%G;(dVz!*Pvmgspu*?Hm+8F2o8qxE0Be_u<!tPqVkJapVx<uB~0U{lgjRq
z@z%PC6Egj|FQBmk0+eM)g)?{K%l8jgbz3x9?(Pe?(`uogjo3OpGz#YxA)_F=N>5%m
zHzn;&d;=05-RdmnSD4hj8CqJhN#oQ$I}>0xN)k_zH^`HZPa(4Tp)Y~i`rxq&ag$3l
z%@l&VWXLFm&1U30Ted4aNj0{ju48Teb#bTG7y`Z07fMtH7O$;He}kWq$wDy<cc&J*
zs|62URvP2B>}zb1AVn`kejyfo)H22Sd7f%~Q7+h(u*F^PzQm2^!CjQ*$H3dpC-aS+
zv)l(!N63O#Zy7HC<Lv*fm!_14J;v6_ThX$U2Dte8UrjCHTu4qC7+UJGNxgAO(J5>|
zK}h{q9-W`dxo@==8Y(I!A?V;PzHIl6yLyg$Pf^%l>_7C_mqb$C_Wh4oVP)dL`ThEz
zM=e^SY)!E9B?poQXjhW)2AahAo3z2hz>d3ur&V>W<25+eSkh*2Nir9OQh%=Eg8K*?
znCbz2>_p<BOu5;J=_wW;{Wnx0x#N`2`bhEm*zoox#_oZNYcFf1FRcIExRifg?{c)}
zN0}@4{`~Y|bNx75vpTN`sapjcv*%3kR~ok+hcgZb+H%{l0nkLc!=Hl~$(|dS2zq>B
z_sag_jGk1%o9K`Uk8*s*4~dfBP`wK42+iF>d*Co?2I;~9BN=gh^VO|@0CXy-I=Z3Z
zP-!nB`u{gtW{akJSfR;?nN%@P572TJEP3pm@8#2p795}}OuIp=YD4(DLuzZ6D_fVj
zu;mji88<LRZvC65#oEt}FyoT(12JxwL<BoW_ARxz<*E#sMvt_6l)vs~d|FHBJwO+W
zNxwE4eK7ebO2qgsA30oQyj$%3?jivL2Zs#M8mPR1Vp0Hm%Y=@5=bNTT@BZV8J$R=#
zjj$)!c->+`dblYdn7KG}{{0&rOqRh!N`g?G98s~BwIY&a5E$8yjMZ<#*T<UnhO>IQ
z_DV~M6xUT$zir)5@68T=dicS_>(fN~cFe{rEn@DAL*vYnWAw-tp23-GCf;um>;=Tz
z;d&yLDpoGRn2kg&heG4Q*5JjWz&|f9-J4PKo7_Vjf&=Hw+oiXvtJdE6l3JdJGU7U6
zM=(3u)+>*lSAS4|=P>}Gz<5=y+5$GJ1$+6Fx@jid5?SI4Lg_BzL^03o#gR$gNr5kV
z-{lq^BG3Q#<oq%x41IAv6JKr2DA!BHn=zCAJ*hg;SBXwm`CGzxNl8Z#Jh?QOBb{Rw
zzRa48*S^4Dpk#2OP;oi5V^A7VhSx4EhX0D|vWKDkcnj;7D7_@r8!6r17Q5u{(3AiB
zHM$wAPyKX^<r>K97B+?o|Cq`NV84rKagj)S6&dY>`|MIJSGgh$Kky_RH78?{M+0R(
zeMnADmh(@-DP+Pm&Vk!=V&TI~8KoNh`|8VI?nnJK3Bl&C;H~-BESYx(Y0e$7;B!ze
zOnL;n*7Z47pVHT;9a9azco7e(h>=ilw==4aN*d3J@?DR;B5%2`@H#LANp<781BIm;
zfs?bBf^ZtA0%S;`l&T!6=nwbjpFSTrw>p2sHjl$E7zy9q@#)yo;tao_*V~Y0i4gIv
zTsOVVI=regS4zM%6+DonKn=(o*vcjb?e065*^5fwPoEy9E`EpKsJE0WR$|gCzi4S@
z#MUIF?&n^#=)yDtEUuh`>TGEFy4X`^0x5%ul=&c=|5ZfH3DQsTphU#hp<FkdlG3`g
zwD0(Y{D>dY`=>TJ$Owy+oY{XIN_=n9q6IEVvJ}kVY+X?{AwKL*nb{;gN4nr23uk5D
z`|bemt@}Tahpdc{k~^iEy3)$a4RV8*+o?q$RQ@Ql)1)9XMYv*|-4oo)(ZwZS`=w=6
zH0tYNW!LcZ7#OpJIba>=A6=}#%C}lgQov{r!jWv3!P;o{0&})hahk7O#Gc>_TTDtX
ztj}6$*^1lhiSiV9Whu2I0rI8NOW>Arqj&vGtTfa6?pu{Q&@p~4YW}xX_M`y$p*55h
zA&>pzAkX{+q{V7&nwOj*lUfN{dAx8sfq0M;6@qXLqr&JDer3jD;LQHP)iMLDq|D!B
znZE=WEy4~5%1v~H5%qz+Vg)+{WT+;E+P$f7jkHYlG*o*n?(+?i)0|z?!{-#d!v_>^
zJoMNTnPI}5Id6S?*{57Fe*_+fk!wG@!(AQ!^Gzs~mB6rru-kWWZl$s=LHlt}5_%WC
zZ$vXc+GnZhf{wu*?elR0+x6Cu7)v%)4f_Ft|E8csq+MwB#ND?c#sztn=}h+AqKmH{
z*TXq;tZB$3tVgf;&H1ToHC%(>nZ}f#n16*S4`j9b3N!n2U^tj^f8>a}xS*cF12}*p
z3vSr?i|SmfHkQ>^SaXtsp1w-_Wi6-#qpSkR+1uB64#zcb&t_kn1C%7RQ-3oWF)0k4
zopA2}1%tUnTYsB6;N%ha9^X`eNnm1;NFrasA!8#CUk|a=VRg3bTh&G(1ld->k5T}D
z-u7Lz`ddC#lFZxMpS}+?TOIRi9Fd1IofZ?{uiUyM^5&!zHYQt$yHDtHvdPLj0b@pt
z)9<AdislO>MZ_J^5Xf8jq7mFI`((>kxZlDN3fqj#p4g{Uj;<Vp8e3T+6MNuT`yR-^
zxMEOhT=jPG1T7vc-WS|(T&Quf?^d7v+m7xDZr)a@uMf3+)+Jz3?BN~325CRUD`EG2
z&EDO7CsEMT-P89P5J!97MzGJSZ?;&*o5bwh4S$&`@GC;EJ5oK}_E!6DS)h|_rXE*3
zo%mwJGMze*-<vx(WKp5mpMOD;bcB~=uZWDPFI#0#N<QP-uO|XS&mSRDd8ai|r)|Vd
zHy43ywtmOln>VEi=4ZnSKbLQWAotH{!d$Logll+T%|2FaAfhJR#;J<hjY;cfx0+<w
zcr5#}3-0fhZG)fYh)CzHsdmprw2$i}_|zie<|Imn8wGm1uWZWLvXi~(U{5HTmQg~e
zS@y=WDHlVY+KAf{C+gS|NA#OaobnBly%4F!#3$wV<^z56BFQ`L$J}HtHx9X*0_)Z7
zpbonACK%75)xHTbP94=xuDwO*Rar#RQH0vr(#?(so=1H2PvPldnDtAj&&<V3xlv)x
zPy1hY<^XZ{X!a@BIam>jlXvBarxJz^u_Y-Y)M$abdw#Lh8$Z){i3r4g$s%@G4)!HV
zNlOX0%9bbc#cxihmVaZ|{DGTD5xOeKmC0bw&UDteC(KF(WCEi2BMzooL`=7}sFUMo
zC=PT*x!uHdq?=WKBmd9!XZs*%tIui3@?d*g4u${#MYbB-8J4h<HN>Gz0>H$QJoP+z
z$h80Wlc8M1Oy1r%?9c9lTsiO0zZ{s59j9=L(Ib{g1mjXT*;K%-Zn3nRz6NHP<vg@^
z`^$qY03-^cbiz_1a(k^Y^LO-i>QA#N$JED80=^UST4mTg`s*9~%9&qI4;J0ycIKVP
zV^>G6e8dD`CseoqO!?0=sLgs2$k>*>ul$CeOM1Qy@e(4zUCA87eO9w!D)y#>2d%Rk
zU8CC4dqg9(<<q7hEw*#<p08t*Y96fjq3V2gut3q6C5F>(3J3Gyybv*}IR@X6bPO!(
zfd>V8`x+t2!aS6shG0o;Mys?~fjP;e`5*-kxL#nxsxPWu%f=y5lb=j3(3PCLf|K%|
z7Clca^D==(&iiYKNC7Zi|G3{JNI-^J=nS|Qi6t-5N$<ecxEgLNm9rdXqm!slz?6G6
zO6V!iJvlu>PV4u@5$x~b^J8Fb)*>Lm3@osG?jC>kX|qP7l|zQY({~)0`LVsfTm>5;
zM3Y)|ByCAz*&IHfC1gJktcGDrI+1z7kig4Qwj4(lfk#dss=NqzwI7^ZML_M+8rgQd
z=(89g1{Iw*%J3&<=AlhrSV4Pq=ATy27rM6gSo8E>sh+ezJ!1Q8Wq*~ekkmddEPCJX
z@$%$>lI@+Bqk*s<wquh)QS;c8s`G^dnfE=8@A7^C|FYcyDT3c`nfMW0Fuh4=JO*Kj
z!$_0tOY$_$-lSz#y%|Zp)YmI5_Mfw+S7^-hw<<)#{(%zEhxcNHd)ob?V>n~G@VN-J
zH%gE+ClR$8ZlePMrtpsnOU3viR@mK}y+G<-{Fg34nQlVx!RN-J0e>I*Zir7sW&XEw
z<l(z?h^_ObCLftD%gY6GC~*5Vz<22){QhjMT8Z2=jm4C&Q{=h=ISQ*?pG#bK$Vva~
zHYehuf8(F`Nu}GY7>4i)+1Ivaozd?zt@|m^1l(Z}no7I9nfb`B5%52sxov_yoLRMG
zV@VO4(W-uY@kS-c$?Jc;J|!FdUWff;!3w))p@>z{9Z}diHyl3~*>usj*7Uz<XONN9
zMsSXNo{@VsNuC;zB?{fPzk~sUXp$^2j)a$J6r6J*?_d@s?8{qBTb>lC?U()wrUA;G
zegi9ev-Y*Y>_BdjTQ8fW3w~O7=t&Vl;e2P2lUqp#-~`vIpkN?P6<Ln0{YZq+umFB~
zD%JN(IY$B5z0n_S?#d}F^ED3p?2cbGj3Q6@%3Z&y(wIs*!<*ad_5ytG?4cnde$BKM
z33%Kn=*!qAiMW|_wEJ}i-HIfrYq8%3WsY0DFT4V<gpU1IpHlXFvbGzkdh{bkxD90~
z!K0x-ErOQ&iM!K@Vq;kLUCCgCWHVclCP9jd`K>M$_Y-j)j+A4Bb(3imMS-cPyY~wZ
z(Z5VudJ+$q!H{D~YcS{Gca26&a#~9D1#;vSU7Edp{}g<Ydi2j2vTsqth(K+W`1&WP
zEaOcWiI=Ab0##=hZZVhZuukHb{mf)m><6o5dUp8n>7Hxx&>X9{EtOnEe}Tk2c+|>w
z&+z8-?s9Z!&KJJNTgEyQL3at39>acHXt6u%OW!3<TpJ^&ZD=4p`AxL!#Brl-cNNzr
z^S!(Z?d-YC5<MO$0sYo4G$Gk>AO)O7H&t{gZ2Ii9Qg+y{d)xv2z48%qTCUQM#ICtF
zWt9?c__nFqBZqc3GOZSjLNGvM;DxiMyF7gN7D%1cAK5hSlmijH(vx-#w9P#pvFC1m
zNE(>2M&k0>UcJ+d7oS!IPs&QjN6-K(+PV1^M<_XNV%RUz%DI&X5XTiz@t8aqx+hvL
z<w={PyO70*#_dl0_}4|22RcQG6~bBWQw0(VK`|Pkp`QgvEr~WnCMy!}$xsTr)gv6E
z;=|N?P^qCSM0K1qv;Vw*duWnxb}jZUz<Q??GvT0B6j0S@t9i`Ppa||R22gWk4(LrT
z$S-ZgvBGM&qL36b?xY?OSLfF5x3xJvY>l^q<J+N1udZ4W4q9^As~t*z#!+Ct0trq3
zQt-Ay<BW(G+NMpWd-~@320@)Qt++&od^G~&H?f#(N)}f>;S{Wz@1!PzihZQQ+mnk}
zEgt7dDtb@=X4nrrnd}^mH#W=m@b}(z*YKx+tJwG-%Y=qa$Kce?cB$vKDMQ*VOu?B2
zYgFdik+VrKnZ?+d0(`F#+WkP(Ocj`Zm|XZt9NiGXx_b_a&BPUE@)35+!<+K(|8`ad
zO(tXbY1kWA`rX-~1Bo^Q%M-ht2rB(5hv)yJJWGW%kw7^-<=xW02C-oAd?W1D=$u@K
zrr(a(i^l@A^v5MGuegUDs!!x@R%Lybg^C(LU{XI`0-QG}nAK(8EM<N^a63%ETY39b
ztJ(Or$zj*u!)^geI2!vV5SVoH?UI2~nbPU;b+v(1;b7VWHbRBdSHk)gx^}}P<2_xy
z#q;gT?||Ni?LF?-?x)@Wz(oy_F@n|7PGyD|PV(>{?+#x%sxS~t0=|~20FBjt(1bw(
z6LN`Ahg=JW98GHAu-4dNCLXi;p?097u=jN{q$)_0juJoy;yX~~nukRDY{nEFFn;(I
zR+Og<Scp=bQG&blQ`!7MU<jp0>^?8_nfw0+ma@XuCIm~y-~MQrd<Zh<xC!iB{KWL~
z9W;WBm8_^1AWgD8iyO&BfDi!m9}vNm;Oz>^LIIXdjimrCP(svLQV8n*2jGj<A`Y3F
zhL7B<p7|EX@*B`w#xtm^tpmw^y{XL_A@E!P3{FPHc7X1IO!u%!GXkB;fUS1w)C&U$
zF>C-dz%upS562ceM~@n$P11sa%9oxM!>o|0pnd}_EocJb(94}HLE@EZ5G+tl>>U!P
z*E_t)w$_y@hr!fi$gH{In3-K{L4!Y9I*nZVoCljry2>?z^LR7fEmWx*RW<dTl_7F3
z<#)=fF$2zydjp2*5;pCWqaRZ$+FF4P-nESE+Y{Ug4zM>(emd0`U!-7Z3Ti#SOJl0M
z8OJ(}*B&1kaX!?X&z$Nqb^(<w#~zvC?~$VPXU}vJH`+dAdAao4YC~cR4>E?p3*P!#
zUd~-k)F=g?bMA$8$8k?4b_X-1n{`)I`|JK#R_xo-cAE=9`c<8;u{N<BdXT(xbOoVq
zQ=d}iMHW2J+|6dLCsvGh+*NRjt;_vERmDhuWnE(Lf#%3XM<_MITVgE{CraBbve?Y*
zmqDBe!f1C|$%^7D;o_9hr;zU;>CuVnnCb)kBM7m`Pkhqv{k^A-!v7hrO~z<n&x#VQ
zOe#Opk^=l<GAd__huFe$htiT)>(~ol*C~lr=Je^_18l>)L!JsGw%l#x{W?PNBJaaI
zpm~9A{>6!5FdabQ5FqkJIOqCS>OD#U*J)f4I6zqxkn!+TrTg_Es@~SqCT$UZo%q)d
z_SqtN_(mo{iIu9<z3py(29FsJf9y4c-?`g`hUtVT%gh2d&5hgH#an|sTz_*SNbSo4
z#Av&cGpoi}nRB-~`|`3Ny{48x?Nx0SMdcitM_-SIz^NBYUCy?m3r2EW!s)K;1%?7v
zj!zFd-1j=h{(ePqE?48odk9;u0?1FLI=?4r1Q!k4-uwV$2i5TjK#I?_9rd1Di#<lV
zU<V6bk)5S>^5hJ77#65NO6Q3<JJHknC)m1j@>CBfZ`<-`O<vpOzn5mS@x#)cs)J#{
z_V<-wu2JzFN`U@5`W_D|Ai}0oTzT4h5j>-g*jne4S|OSFeG}n>FujrQD)#%5SbLsN
z(kbttuG8@J<fM^<6!tG<RcAo|X*t!(N+6t3Iiv$kx--Y`#Ij@o67YCrBP*vWk<N<s
z*NYB0;XovhWmDgE4^*^C5afeYM$4Y)n}LKFH<yI(K30<QhZB0E=QWm0StW$l-*3XI
z-Xr`{+YklgK9Ge8kh&SJCMwuqNPlVyp>9jX&E!g*js}X75O*5zYfzcCkab8`yn51M
zBfUB_m7|ir_1(aNMk&4oii1%|=qX-O&E$rq-ViRHUKh+#)wogdol8|eGp^5z2_lYK
zN(Ik8EYX~^>%D;nQw~^>`WitI0vI9q8^9l{(mGyzc2%I2-NXVY69E#07)|GCro&`1
z!LsO#PTh>3rWvt>Jlndyv-uZ=+sp)$wvLaD>0i3U1E+F9W-@;E8J@ey<>3D&Sw_ms
zkecHTk_+~O4d^vwXlsNNTCj4GAw1bzE5wO+>V{X~f1}DCOXntzu<bc&!j*yg9c}~7
z<2zPH9xG@>D8gApi{YYDM?JNOs8VAdq-hc-@~e|;b(d?LvmoV3_NCvh9>Mx8l~?bx
zRCOD5imeYv4W?ACf)O3{7o?vl%U^`aZhzkqNAgw`>0)fk8VgS_<@VQ&;sjn++Ps7C
z=2jvmTcY1G1EQ6`>(phP((>Wua_-QAho@WwCpnxbp!?GYm(hnV#-mw)eB0qpueRWA
zW*C=`L46Kg09f=~r;Wlx^}v~3<c-^%U@Odh_F1?oLW*`TN<r&e0#Kh0{Qev-Bwa!T
z{`%j9B0UBH!8y)v_*hU@K{x%GAb^a;f9h00a5R#$>I;c%o2Z4DsF}=TdLBOTOy;;a
zBwmlWiC%Z`(tMWLcN;DLo(I27W!<&#)0&(YG-}q-0o%DSOQKV2m~=y<Qz175<#G>H
zUk}oOz!M3zwHqm}YWT6yG0_4Zn_+Nj&w(7-2pd{bT4H|=_pyk=3g(Nn@=L-Nbf<D^
zfZ~xQlYR;TL)2|RJFz0LpI`L8KVeYnG^ZJ{_Z3rLoEs4v13RQ*sxd<@$U%p#%M~kV
zN*UH@>8l-^PIwya?h)h@f_O&Hud=_KIfOg5GhXshC(wnwB(Q2i4~3%#_{AdlcRo-g
zNq_=K%0I9-k@q8DS5bTIp5AfC9IEe`r!+!hlo6H{ud<!bd#><6Up#)&S`)064E?%W
zCZP$Okf-_jFc^p7A8-(F4nq}`rvb?}mz8E@MqpGP{w9V3q{8y1V`{0k_xoFx{k9lj
z)!E`F<dmL3M>cVtZh<5CdrKn`fUP?~eEEepT!Cyr%#}dTn-j7UYO?xjU;4a2J3Rdy
zXY-$vipDrUvs-LtGyU;ZIg3q3UzEl|e*A!las7SLFkqvM<2iCZvz7VRk3Z(B&OgLl
z+Pd-Dmk=<W=)5?4OGzm;tS6lyInl<w83jsG9kN=Y<&K?r@8I)&3#ojnwdkFOs6V5?
z{id1+b$wBM=_GTn*pvBHg1(rKvG$?SjsFR@m*Duj@QeG_VZ#-O4IuCes0W!rJ0M^(
z?dOr79pBy?xqeJaOzb$Kbt9N4A7q|+RcRp*x0JQOiGqt{b<#N;!5sO!EZLMSKUKe(
zwD|rb&+tkxa>Sj-mV1!UJ3aWvR@?HE03K=#p=tl|%`Tc;So0876xNgZLLi714+;Xy
zI)TpO^4H+KRH^TJkt`Ij%lScGUvw_JgFAK$=fBVBSB09^2!5SQLTL2(UV%P{JOy*z
zMBs?TQee<YQhi-`N549;l`i)4web!4-w>JlAn#L)-u=b(4aC21xPe&I3va2bu=XGQ
z6P4TG0ldGPWUz_w%yJN|taN0Xl%!by&6=d?J05U4?>POSVAHs7mQfk=W*)%xYB?s}
zmjIj*bYg9}kIiX|jQt+I1%IF4VdT)jZjau!iv|QM^qOY2D`dv5UUs)>w#o{xc5(m1
z%{UFU)wRH0ZJ3~w@3~%{x~4{L%p_gU@e7_`>azr6)O&$VAwh_5y@zTsR=)*4>azHX
zK0<uMlpm-&iP$>)*1_j8*mLjZ%XM%I{JWzU<>8bZ*Jfh5O&Omne8TeJyvOZxx5Bix
z;nG|VsU3GzW7~5_IE2TWT$JS2HnRWtB0KDLSX<8%WZNK}p*3msO&CxoR6^gU73BJJ
zq2zk=4aYpTkFZt}iaKBahWy31J?TF|C;9IWJ_q^xAAvf+MSe~XQ$x4AQ1defwvhNq
z=Tx{*Isv`u%|iEKSa5m%FGIxaf{Ji5C}EQ@-lVv+Q6jcokQ?o~WVO_YI38QnQbmEY
z2zC*8m7h|1&x-6}NdrO@e^IMVqaiJ9j@vFDiAgKWlVp(0LF@bG%siX&_xelbDxf4;
zLRZ0(5+p&3`R|U{Jsa|7{feiHxNnJAZ~&K>`BZ^UcJmnUi<&9aQ*64G_nK2F5p;`N
zC2ZGQOdo!JP;jdFUy?+D5}w;jrgCUP^&nz2jXee5ojG*1bV8*2v*Ju42tPcsj}{DV
zXyK{HUgno*b-D6?g%rF^mqL#>c|8z!t^p@GYv*x}5+^`-sW#oT+6PLlPMcUUhulh)
zzUH;Ix@1lbg`R3&sD<$Gul1K@9C?mVCobX$0Yik4pAW1OA*!e#%bUGe5>zu=F^G5k
z{#r-pPH8Rh?vZUP`roG?Zc_$kJ0SITsoQ5E53p>HxpgZlP{!~r@IX0cQoHAgy~ZH3
z^_ByPtWz-ae|`pS`wNXZ?5hG_6D!3-a*-PW(xiS_5V1b<`i+z&IM~$^Zg!$P!oL0Q
z{ybAfC)mf+O+p|Z5p3PI#@MY~gS$QS%3lKjXD@KF;heJ3YwzAg^(kgtD&qg&)(ioH
zJhR)?VbKJ)mc??QTei5dglaAaGS}bUTxM2bD7Yjr(`uo(Z?SQrfEaA3jI%1@!b1$T
zZ`$7bDRd%O=mXf#LV`u_`n`=vQ9bnry(&FEZ*kAzSFR%QXCZ=tRdNCm5)GI%96l~N
z)n}k=wZx-+oqy-F=O@;F|H$=kBPrSoR?Ttuv{cDNPcDvv7fCGs10=6!5sy+fZ06+E
z_McNz&0!^lZC=QeN*4^FcTa-skCxysh8GcnMx7%9P5sCX5JN!X*U0;KuKp1y6<xl$
zDQ~bO9wxK2!*ByxfGP)6=Oa57p9w<TNU;SnEvegj_OJb_0xSSE2c1wDq$fjWr@4~x
z_16DWF?or8dPTYA5O`(Gl->F2@JFq8Ybzy8%%<3=d7C<5?*H>&ih6AId840n^;*_O
zExM1U^-06vBm$7*IU&;eg8_K)Lx7|$BxI;vWLZ7{u%wMNS1y%GT5HW%3sUKtt%cpr
zU7&z~S2;|Bt*daa@`GPj``t)c&mfBfwd3DsvTOUe>pcWKV1NZn_RNg8f^0zb=ki6<
zx6Hjsx<My5Ed7hv)n8w6K5;?pe?GA@glD&UEq|#ExnjXroE7?-Rm)LTcez;0c78Va
z@}Xf+ZO}qwaOXE4aH+H&Ko4dq$hp-Z4=q+KueCo6(%nt+1X>=i3ll_<D{IY8OR7_?
z5cw0y_Y;aM+U5O(2}W*^WgCRAM-^!tz(d2u0(%WM6QYlXLHT&1It}9$-s?9PbBM0F
zRBE$cFWzIKX2*%@Nei`e4z?`3Y+EpR7vSS&Q4FH>Lv-SF5$-%ImZHzc08S6D+l85%
zS==TU%JAf+MF{cAI3huj@~J1d`P<hzrzuf{;AuC%Cmchs7Jw=cKP~>yxI)UJ!#m0^
zrGdBhO!+%j9_FrG2fK@~(!rD-i({u*ZP1o3T1Q`$OJ=FuvD;ppxMKmypDK0A5KO%4
zU6*HJ&F?B&KdvLgnY4;Z677*R1OsLJ1FHuAaMTB$ikvpJHER6yy+2vd-{_rW6^+e&
z21eh~Xm3nfrRtmOC5mQeeTrQ)+o}>7D_m(VzZ%T1-Bh7jUpGQe{9p_iQ`)W#AlCkL
z-lZ)b%-N_pay$2_2mgps@HLn@RA`~%2j!AIH;75lR4z2zvI67uPAYMM4;MjttM2Rj
z6~7L+5c+z5;q*^dY%+K}KLjUIe^T-pNsziU7G_9xtI(QNObM8&^De#W6^=^i9C(Fq
zNOmeaQ_gEjk@dgY`n~CGY7v~4d`cy~8*+Xrzw92coQbFjiOc1;C}Q{1ruKIDLu6zr
zt!KizU$jYZYVUOipTErq2n)YV7qWU5&NWN<f*{h(NiB1NEC|BU4iHCt75D;Yr-+lI
z!6;qFrQ&tOBKhzA^vPjM)q*}D>{rQ0mTs-Kki*uQH2Cn{<ljPI8IJbEq)z0I53Ku}
z6h0xo<5^JDX)hRwFzhsS?<8kd(1ogeb$>hkW(ys7Em~wMnV0|bU2W1jyrLj7@L~k?
z6q1OlAc(*JzIdtY86aAX-C|`9hX`Y%nUJvs_s42>`jzIs^1}3+GjZ}Cc*M%3CMRk;
zQ1i{VM<UmBb=n_%kv#fhJ-y#1$P5f5(RL97>O(#Ivfxg<DC>Sg?{kz|BdtnQVmw2e
z-)qLZ{b@q-yBvTi7EpQvTbvpaPSxj?p67Zs=Z<R^>X$l(!fwA1u+}M0V}L2%iyNhK
zU^xvLd>yU0_CzfBDGS#d=q0#OByTemlz74FGUOI&?Yza^5!)pCY8a;^xp~%JtEpID
z93uDv2pM~bY4KAx5H;aK{)_wQjk~zXNUW1H7NRb`ole7ID8%;aT8n-bF_TUOHCz$>
z<$OtKb8Q779<#cmCtci9gBA)|F2;ccPh*~cX1$aGanw;grSP0n(8=_OC2|GjQia|z
zCDjf4m9WI?7E#ykeC$vNxUr$ziES@@S(m%Y(iF6<$4lrrbOZ#o=c0VA(>fxay`ZJ2
zUZd~kL^)x2wKzeDRx)+4)p>BddfE-Tz#OiN=9`(ce1Y)#nojGL+5SK@GuQmsljrMs
z!G$9MEKSr+WHwB?9X9;QogWHkoGz_yyu~x?a*=(OTHDYZ^mj|{h3vSWe866fL9W1b
zkX!SCQq8X)f9HfD11k3Kst}1b1)?0v$dW2xqcZDUP|pHb)tcs!EuGO>N-!`;fgcF%
zA1Um-k-?+&3C4CHw8jS(tbJzGN@<!-l)bP#Vs#E1<2K4Vy}Apt$)cZOiear2FjSO$
zMe}spxHcdvWj$y`w(1(V$$Ue?J8j{24!oqJ;711|q4jmElmhm#d2MVaYaD~GcdA@1
z27tY9bwbci+YRi>1EamLS^(oWk2_MA(Ic8egPaR{x1bKtsDsu!;6_0vyfnj8PEGiy
z?%}Rvw*W{;EL`8<Zs3Aw#?4m_O@<F{stJqVw|Ynqdjk-3vwy+^R8ugc9M@4@Y*t|G
zHr8s(@2|>-m|O<HXRW^}RpS{S@(du8#Cu6pDZ*VRhgmZMO^U34lNWd&hjUg^s=71r
z;lC>p5R4Q)DE72wj+^YSUiGJ7LOmb3rpgL!nr8>yLzoAqR3gy#SEVl#n-UNe&f(2S
zqnpAGXm@tf`v@&lW$su$>j%csyFvHfw*Ksz%)o{=%an3rh{CH>lKtOK@=fVuIz}Nk
zUO9%I_-`$++Wq*hd4Yl&%?$&|t-yC8kLGoF6BRMKAXrzaTMHgJ8<aOq$z?N*-gPrS
z{(8{K7eD!A0SENu_Fzjgr;r3_8@uarcQCECom%8fou>VH_yBQpx@i_DK6n)aC}`l{
ztQ?(fv|_`bi~NYAwQ$@{hsLs^h;61MI$2a^l$N({nkMoSyxquAr?p!_^%Ls{4BeQm
zP{pj=YhkvWk=B_=3Z@d)(AA5oeH)lcokDLLL(mmf*<=u$M&DNDLoi*7g$*2P0ru#y
zOe4u~<A5V))jCn>H{00VX5o!Er3dSN-{oQty~8yK;kQUn4#eQ@dmEr?lX*76+5K`a
zm{4|UY#a4iQbDJ;08%u@f+f8ZtMhB^BdCoVMy=!PBuj?th}q&mTmRE5eW{E-u$+19
zIYEZa@dFNI$g~-QLZM<=g-FuD6fqANfGp%cfoq_!Dt8&tJzV@w`_#vZ3?96Zn^9Qk
z^GH6=R77%=dEsHOq?z=@nAU9#x49%==Ra^J$#}rdxt%YU<zw;BV5%5kBP=3_`4C{*
z!kJ1YH}sX-c`Sp+Tb3~j>C7E0QQpAvn4e1&3Q`ROzd#I(bo~HFaOEweZ;X;7<egkV
zv>=~7d36V5oN$EX9BYaM@LScd)57>iO{rEYU_in*i!65&+^@!_v^;ZO^W$39ckCfB
zZSikCf(EyPD?nzs1jaQ1(g+jVG~U-$buMMQ{T6#IRIHdq`tHnF80!#6|I}yyHW8Zx
zh&so4449KntlEB}HEL@tG!1Z?>K}<S(q6R<JXl<ZZq7F^SCrAg83auar=|FJ6ClEb
zA~zX`OQq7Mdqw`pz}qa|K>N~%9D_iz<i%@<r@P@Ch-f^VG1{9-(B*))aCMs8&j)q~
z&4ZHPvXv8+Ba@N>ag%@KjzA6;Y)M_KJ9;v+ko)ehp7J*(Ry}Yi_G93vR&mPS=i>{J
zKdWqtfVu+Te5XNCbJkI*pC0(v14)dREYxaIdx-Up1&s(uA#+@p&(s0is9Aw3e|i_9
zvKcT!o~za$5;;?fBt31UXd!12wT#(KDPMv@@)pEVh5m^dZHzbN$3@X8Y%L1{*`$X+
zJBy>NSH<P7mm;T%`>#p`%8fIdh0I!qayF<I$HbFqXpekq4Axv<*+_Sp>k2DrruW0k
z<kaGIiLqmCY@c@?xZZV52to$nUWKF7=N^^1QztM3<3BT0&LRrfR*bP~_e$C3!mWT7
z0-VA{EJn_u{|<GdYba_0#K3kkx*SL!belv-TyjD_f|&nHs=aT`4X{&V(cTXTGmVi5
z6JR|rrx|b!{liqzOu5aw!~gj4<4t6wAPnxYFK=fN1&bKJky{#b)~yZSRpmYC#%fzE
zWr7CsEk+QpT?j%bx`N`y1I#l+X59PI1c@cDW{L1~?AeUed|#qsWga|sz~2}rFh{u*
z5E*7X&rW^a=?Jy$<?U=@Hv(Q}hvB&eK))G>Yh@=$G$mtWE9%CgBA(HS3f*)fPn;Vz
zPt7xV_g1qDL(Z!+Y#9PQ`yLu+foXQ9YfByn%XFgFQ~ead?y1s<9O|x2k$s6k==y>x
zc;~VZF`HFb>W!3UPkdy^tO$Mey2|Pa7frB|9f}$!m29p(nO1r4>A2qPtLuoPuR%yY
zO}_me3%(t>z-jUlttRi?duo0r#2~Z(7J83fktFn<!x{0ikzI@}U{dBKXlBY)o}R3*
z@zAt-fb4VI3<~+nxS&(sIRGW<%S8Sktu2l6PLS*-zfj=Y=fM^Hsc?GIAOQd}c>RI|
zt;`NTz(<VU6V8znvrT;iea7=b6)PFXVl+(xB@Tb>jugC{M;c^^NlR*2R*n+(wh7cx
zV~|V1|H%d-o(uaopE<4r2ivQnnVEBI+z{N}`$}2%X<GBXpN2ot_&W7ZE`j<K7+Zgb
zqa1E~2Mv>5ms0{*9=AgkOt24=dRTfLBC^sYL|0{dPoLo9vt~zk^M8@v4<5z?ND%zI
zo-ff#0v98^G2Z9Rs$(H{2z_d;c|3V`Joh`}ZWb{T_HGAsK!!|uc<(*9glqI8r{9W-
zx2}n6$W`R7{Pab>@3nD;<9R}GpD9Ejs)BvqWNWbsf)*4?b6<vl8#A#HlgnUj#&Hx_
zwTfPEjXRA{d$0f_05EJT8<L!ZUc&zuGd-7WL&tULTYbX2_P>@p!+{`2cwl-`pYA;q
zGe&UCT}i<nRQr@qRJEg;T%{w$LLRsyf`jZXf)QhVg6_?rHXX!6JZu*zuHSAPD6lyw
z*Ay}PFSA(09eeds;<Pc?870(|)F!P#!Uu2sBI}jmTNoJI^^Fbb>xdK7)jH_HpNobK
z7}-dP@9&y*3|)jQM3pZ;E#nTyvm;n$LGBjisX8u>S=wta_w4+>EFwOEiMu1;X>$y{
zjf|EH(*DsAkvq$xDdD|FZ9w^fhM}ya)Z^EjIq;8oxqEGa)*0+TUOv%}M-oY*4dM^!
ziLC?I#-oEYinuJ^3P2kB=U4T1kgWhGFGy@e;2F_s)KA!4_Dju(*8-s_ZLK*oxwq0V
zo&dN#aKs~hRk+0$zCo;uz_Blk4HsXG&HZyxG;v{4F4dE=a;5!MY0UJU48FC}M5}4H
z-$m7;L@PEw@&+IZIk}gsm0Aam30GlxVxzn1Fn6-~3wwT*qupgg_#sjk2?TwSF10m)
z-QVk{U)+8WAy>+vsn^=+c?CPTo!KqHNJ(yZX(QsVykfY-XuIk+*7@ZOwngj~!ex6%
zQVz|Eqh){2q$R@usOoNjMQrY}KsM5mAAoMJ9x2jaeWSAZ-K@xoeg@i-=DypK>6_O2
z384~`to25hk!_0|p#G)+v=j9~0spF{^y<GtpFu0u;-Ee3t{+N~+8^f!hS{*nRW{-K
ztic)O27T+qU(>oPc;=-8{S*0nHRpqbAiHb2^e`)zQh$b8)ta@gWOQ8)&q!iQe8z;j
zXD14MCJLuEnO8f2^WXH`OwqJ%OB2uFg9+1jAGZ)ms@nxcBpw<!9BYFrRk|UlLp_9G
z4SS?Sm{=ng{&eyN4Sley@7-9dbj;b%0Q8P87X&K(xl!=)ptmmdxh8aU=^P`Pm=QiP
znbZ&N(Ln5AhHAmm%<(mQ2FKk_5L!IX`pQwTg+yYYKz?kIe(l5h{sKsM*1VilP9Q25
z+$a>-fyfC3s!26yJQ={l1vnVI&b@kr**{#m1}~EfoS1m1*-0+AZ8HN{Lv~F8ur{)_
zj0zAjKrobww$lzm_joJJmT+QTOQG<5j!F_IhkY46M93JVNKUB^3%-R0QwDO);pGs4
zb=ileJfkotjott)x_d1EQgr#S`|~X21axr}ISThE&W?}g0i&~;_-@K$)zF1-GNgo`
z)a}c9bsj8CmmO<YJ^`P<(XRt9@qm*DRj9W%$eS(xYFlL`!j)xGhQT)TPFVl8#LfY*
z<cL=|duO#k$0UP*lNF!)1giEDt08%@{`Z^iM8ZC!)-hB|x>y}690~j{80yWZOS=;;
z72B@9rqY&9qmMh^E((y)s&vT+u3(}e^ksx;jmQZ?yc)?S$@*4kV0oHjeNGjFd>Kxd
z&ZKl1PO)9GZ2-{0)yk|TzHWrKeg0&8)vXR(>ns!r2qwN7&B)Ws`A%W68Fb>(OuUB;
zbZf=zOSIHo=Vz=_B&h-VDU_mW7kGCHE3{<EtPWm!315|$c>^+tE8I<^qf2IP&P8l_
zS(^?g=8oV{-%o_@)G_zqvRBNCtYTV-b}j_8vS`8cDM6-u;qSb>OIKxCA#JdrG|Jh(
zEcm9;-68hW)J}HxX(|B=bT?GbxHb2y9!Y@*R>M2Yc#^MPv!G$4V3o}f<Q<H$`PVnT
z6DF5Jzk`-RbV~IO)r6XuI~%N_KnzAEw76UOxQ*mp2)aDPEJx&A2k*XOHvydLH5+NU
zTf&SXgya3~EaaE1ERa84Iaph_L6H;Qt7>+3#^kZK)5bT~{`86+#;76#3rYU5?~%^W
zt(8AOOjW-UBWI>{in_WKB+COyNs!<)?WQ-(w7M+h=kzBGs6_zYf0PPoWxu)<QOq>W
zoA^7ph94^IzD4UhUM$`K6NOe_z&A!P;C;YXE=!7-)ip_bwvOq4A!c8x>EpR~BCe_T
zB}C+dk5U;ju6oyh&#)X)%hSzfM<)ZbzYhPZ%(!jwBx|y}anyI>tHOk6<x4}5Zobg9
z;+lr>Q~CoeFlveCrlR}$^7U-!Vjv{zh}a3S#GRK1Y`K2Iat!Ei62!|sXR$*NO;4``
z{LCUQ9!o*%)-T+v>2?kEtBN?M)w<jI%7_tG^8c&sO`xF+|M%gMq>WZ7Lc2=Ymk=s#
zDv>qGmXLklm(fNg(JDLH*RhUuW=JJjCSmM`WE~7Kh8Z*SexAPH|9j5=ocH%T=bcl>
z89Zj5d+z(QUZ3k4G9_|a>QwBueU9T45$`V6?BuGn)#;3rI43L*1+Ai{L}>N)&rKIF
zY{=O+7}&}W6ctO?$~$xMB``eY1p$p0Fl@bIjqPK#MzhX8_GK*I%c}!$8DpvHeDGSV
z=|6Qy*a=D(1JNH2t2)Sc#s5TdKPX$YRdX<mH!2I<%!X)-UT=$=v<jd~1OVGu&{bI|
zWWt)yEnZ(_E-RL4*owqYwz3N|yz;Ua2Z8erk)Z991n$<Kc5{v!kCD%#P-mL7E}l0)
z@GWb^t@)m|Tgnzwb8#a5EssPFT<5FjU!-avhCBXcMQ%JML)jvuV!vt1V!61Kchryj
z4tmhmY^-KmU~DwbJb+{gJ+N5){p5T_<S2CENeDk@$HWHOl;>llej#ud8booGP8F!Q
zgDX-P+8r?`ud=z0aB2YDgmD~sPg9U)p!bTn5Mx}%w||Fy$Y#e;Zo3&zWt9i4x&0b$
zUvk&2vS#2+(5k50OvqPYtEqG5q(=mM4nUX!J!R{`@kA5d%|iuUwP@aTP{Qt!=j!3d
zJ|Bv}JO2!!pa+>h@;A>Ed{p^R&A$TkC4Uk+S_VYsY{Xa8f>aGu_UQAp3AW)V_6QIc
zq-Hi}Lq*onc)TPq_*nGdTP5&>EA1Ag_!w1*H;i6t=hS%e2CqdO?f}=5rcJD7!m5<=
zh;RRuq}Ek@%gCKKyKxiIz%)Xy?l?3AYmW#bBMCFcC+berb9tkw2<o8GuhaUd{Cv0U
zwq2p&9waIO|7Dx-7WZWlU+JxoE1-JP*TSM#%FoAlRl^vTKUIQvwcoWPB1DpU#71kg
z@nD8-%D0Uki}kL`hQs$v%QVXJ6el9d;Pm@wudWPG0HQE0QQ_cdnDP6HMoyv9?2DQD
zVvK2&#@_EY-GGcb@&Vf}512An>h_V<f_LF!^2u57I(;trkA3b`YtUH-)%?8=i4XhE
z<aR-(8kuTbw_(&waU>`-fN+b){5cF7bO|~CL=aZ5mggV++b5A0j-^UJ1G+g#x9Wmh
z>h%0&Ibbm0dwYG~jj?=;&qexvy)!-cL^=#&_<SP@)`9#XLkI0hc+PAIj%0uwascR&
z8acW_9l0DxAq&t@(*<s%>|FN<+I}c5s;Dt7$_R7|N>xPf@+=HJ0KOPgU=29{h2hzk
z0qGA^5%a-i;U~Tfa2BBwP@%Cg`{Z<x255;UD0H9{`5m{Dme$Db^t~JopvSM9d+0q=
zexkFpBc*ucCv`ol8Rixz`{&MTBuu{b*Yto1i`C&k)!i{2M;^G29sG5B48OzFX^R(B
z2n<<3@szdFt~0&jE9LNKxnuwawmGA8(e9_=Ay@RUn`fTPLw6$Jfor1bQ3jMJ5M4f5
zg^>{59XV?04SZtmgtm&I+7un9D&uWb=aL~^5-&HvzDYuCJrEaw<JD-fA}5l+YFHu5
z!vD|;zG1xdcTCW`YC)`~k*u+*&}9>b)ZC!7xO?MM8kd$Sft4R-H%=txofhkM#nZbB
zd^G6|Zy;^}RYaU9P$LBp57O-bF<w=C?TSw)V#gitDTYSw4XvmWdlV~R=^5Jq%GWRa
z)IXXcu>eA#FB{o3bg|7%-BJx{2Q(XGd-o>%q!b|S$VDd`852cHD2_Q6ko8vbTh11=
zm6=BG@_8*i{jR||XQU2g$`3qMWV~9ivd^5O3QVFx;Ccr$_yJfn#*pRem*AKn42THd
zAV6BOpnT#i0CKMgc4}6m@V^2#w5*c%1DQY7-lPjUsM`~kYbb~>$weC0FlGj5Yghv>
zrwL+J<Jy$&WlXw9gA-s)L5$X<0+|ycZ7+Cb*4qqfTNEG+Mz$H_cVNYo)#zg&#fj-X
z|1KeEiN*9eiq(8GWB(J-1*DdB$ac1AbJB#aWN<~+ytxm_b=4WI@4~gh7Ew?(Y`RB&
z=e>-x4Sx2sy_%?IXB#b=Eb>7fO!6yNpzxjrDib*nIxR~g?k=pBVA{$inni(1vNhWO
zTX3n}9NGlr0FmiFiIO(@AuU$}kzbZ$E;v0_bYOqDkJ0hJg~U}*IBP0|pb8vCY{Irn
z2`RJBAjO4AOp?saVHRBsub*6nX?O15pqtb}5U&+G^T9*IE4+&T_PxD(5I2<5Q}09Q
ze{s@=*UCSyrcIm<Fs;1JP8;GQry)#qy!SU~JKvqg-OR8VFS;+(QvnF=8ak&#tB%ou
zfQ^1PP~sp0ilS8_K|0`~5d0M!!MWM4dUvwYN9c4Nvcs9SCS@D?D-r~?R7yvgB-$%A
z(?xbI@X;0LsU&Srfc&jo{fbbW*SK2Lin{T2v!e|Kp|azr(iXyoGeU+d_d?-0Z*z;!
zRVdll9UI}pg4nH*p;ByI?Vv;8QL3=u;!2PJP@dQG0S$9-DT2}iz?;NNfl~g;QfhgE
znoFYFq;nd(JeXPFDXL0W7z|jvG^j?mrc_2_YhpB*a&!tJ-U+mJhJh<~1*2^GGSi#(
zng}ACxAH!rm;d@F;{IyV1E2;@pflo0j*IhZs`(Su3L<}l_C}-EE}t>TvqLXDaovbW
zBa7E1``5VyZz%G=D1`0hhz`0uekE^YA|fWywZUw#Z^Rrz%$#*deh`0tu6l^2ocsgm
zWyWjR-Bw|Va2E+D=OIiPEYG&ha6b(C69FYAJmwEX++efrlIpRd!kZ$aMC|i_iPcgL
z8wo=kl=6yIflecdh4k=$tmseJmTN&+9K9kBJsC*MJ+QV=qdWngsgWOT03a^gK{_#O
zDD`?=^0`3*Q^_OjmYUB>n!jeDui4*E7v`0Sw;M<|VfNELmA5E%o!i?9a-0N-Y$YKQ
z3izRd+1{3ngGd+phk=U?$ckPxKm5{PB6jHKy+WqfD_?G`T~P<T{ci90GGAQm>amus
zPoJKwgK9~c(gOO9#st9KN%3lnd}iq~+0a&FMdXT2rp(~HIK2Z>+5Y3MB_pzaF`x$}
z_b}^dt1J06M6);zhSHqO5NQO!3jhQ?5}AEjTvn_D=%YF8=PaCRL-yJ+8PN0Clkl@v
zcOy2$F3#r;c~@v_6Z3p5nT&gi%`i23w()6!;v@v43L;47=iI)(a}<hiZ$Ey9PXUsi
z0!5?6o?RM#p9A#YLdY!8_?{8FtiEu`nHfch@wGHipHS-;>oT&_bPASc>f8kCiSnC0
zYkiIEUxi*!@at@PEY=Bp^>9OcIQO>Az20i*Y@~HNIlV7&)7gT{HiU2tB_-rR?Lo&)
zvm~S}G$>SzpDVALVUR1danYr%?XWNb_q$a~8uM<^jn@tbGh4e0x(8N=t!s#NIpHs|
z*?V?9f`O=7N(h&e<T&qR+pJr~6a?J=LEx4BV?>Tw(G(>*zYPm87b(|>GP>e~vJOzj
z`VLaB89hv-Wws4{8#?`L&prI>Yi+eTboSSnf$g)-Tb8jR%%|jJvR+!FzD0ZIsP{BF
zC_W7DePfWwk)yb)HkOKQoFP5K?<g|OBH!3n8Ke17DEKdqb8TeX7sRs!GJ!s^gv)aV
zgKRjO241^~`fWgZ?TWTW`N@`sQZChZ>d|}|L#Y<fry!9oHgBp2Sz2jVo4s}8_k6a;
za?Cq<xk)?U0R@t$D03yx`d`lcG$srQ;ZC2}2t=ow>X462Ih<TC?33}!@9=Nu$JmBB
zJP-K`)XxKWMG6cAdW6p{@{v7qi=j3e>67>iCO*to$1!&t1IZt&p>x9?=w2Ex=VS(Q
z4<-RN^eN+CrLZ&ov!U-)40WySUcW{j`fQ*+pLI+@qZvP*Sd9lp$iONX#Sb2Cu&DT)
zT_uG%f)iTthh!Y_DGvT#S2g@6IM#z`m;s^Lwry&tC#-U_&JUa9eRc}dE8V%vKEI?d
zyH)8E#K;Ke<w9C=8R-Id&mXSJ6P%qwi;pXTM=Q^B900En`aYr27X;0PkbrpAXJWUW
z?*XRO@Bd7Y_*p?yHMC@FT~fV~azSgWYQ$zjNJV2?aBFo4n9Rw^51|5kud4}E>w<J2
z9t4C29Grk^IMO?r1Sx0YzGCqc1Q|84yWw^jQb7|D2r4elw$#e&{%|AYi!j&f?>O8w
z)Tg<b$Ki;Uj#5y2ZrioRKuX+51@ySWmK!_&PDTrfok@fGmv7)<t!p-!`8fa9AD)0q
zNR|)!62Kl*IE<gLgE^b?G|IMh#PQm>kklnPx<W407@+St)gD-Jic-EQ+`)UT?sctL
zE)3~-(BBZ-GUh{od;TGzPc0W>AUu(dR7&QUns|ZHTCSeS%-8AAf3KBB%zoVZ&VKIn
zJvFbqWr)=Ixd{oab~Rp4(b4Ye0H80x4;U8oEE7-a#S6QE>FUx<O`aBe)n8!4Ss{cN
z!HQh_YtIC$v}p7`5%&@S^+4Aj@9Gkwv6xc*sBwS-buXjIwUVI>M~AX?^Nzt7N^c`M
z^{o_JoKkg2!L_20GbO^gsUMF2hnVYEU&{~CV~W!8zO|!*<q%~!T<_P)<I{;F%k{**
z3+n#hG)cp*#&TByQ$YCy758a8%1rkkK=_1~ScgFHaviaz9tMd9V0sMllY(Y`0!sH8
z?rmK(jUMZ(nhr_Waq+c)#~xs45NZZid-LH-v%Tj31aRd3#r?MrLyxQj=IdW1GVctr
zyki7Hd0lpI1>cyN(2t{_nc}wIP%fw#n;HH<uQ8>aAqM^(=2ZabAdrr$XU=3XiLK?+
z1aFS`AY&0H<UjJ|O-0K5&A6m()ZPP0FkIg(6=cG#BL-QH#KZz(Z_#<IcXuxIp4Tg{
z&PNkkUDVi4;~(dennL!v156=U(ep+_>`dSAl|09cb2wV%^Ewd~RW@D0b0S>6mX`g^
zuGDpMElk^AbG}qeIhDX!zN(mzC1=DeH`#%oSXLQ>p`ZRkv-yHPpXVVvQDN0#PA&>_
z^fU<!qDj;iOYK-gOIN|DeJ()gP9g^bEJ@wdBJJOQSU{uOAI3EjSifMIg16ZU0KC2P
zjG-kI1e`8I$_&Tr%cDSi;S+#FJL($1I6Op5YQ0sx8teVTt^E7D@O^3y1r@OWm)LGM
zXzxt{uc&TJ4{7B~;vAVzpsk=2SD&s%`;Fxzb`ibSX@Vr+b7FRN&r7k*%qBF9lE~RJ
z{uBQEy5qan+DtA}ZwJN%0rt7O76FaB7?i$j{=mLxIs{J4lJGY1{A9ZSEXl0{M{+lo
z0GkTA2`(=nkLo|OFZQ2ig*TC^SKPnZKT1v$v&!uKy(_zw_E|MVEe+1&O0sZWwUv*U
zKm84z|BqY)b%HG7!|eeS@93bi6Ly%tNv~sTXMYV5#3h7F#N=<v{@XC|tx!invin5Y
zs-1z`_xvtc`B1&C_9w%l5h~FG+aK`Tz8M#HV<g5vEq9x|dODdvJ;(~u#C3TD>11C>
z4qy^ZQ~&z68k<|>gk06gUK>kLl7gwc{}!Zz%L5IdL*Y%-ni)toIj>$l5nV%=Ib2g>
zMYEsn2I`|Y^73z(7R$v<J!ZIe+j)F;h5XI;cPZVD-8cWlJNWw8n&%w<%W013LaljD
zt0)@6IEAaZAh6NqL$JX;bTmm>=d{6e@wWKYKq9*Mp%99+h00qrPM!+kUICKX#1<*7
zsG*um7C}@+3U%mSH@B<3>XbPmf(XT~+B0B9dd`l}w@4i=_>o!htLj&Rcb?1k7P(fx
zDK6#<5ys!G6)^BYo4IoHII{1M==4#J^21W~y+uPWpXAN`8t6P9>6?rtt-zpIp+0r(
z62&p7m_cm|?_>}K={Sqz&UVg(l=n=$UH-NW%TLxZ8W;p4u)_$(_ft2=m*UPb<Sm22
zH*D~V2@Eu;c(dOO-SU+eUkk}CsK<%Ii1dbwEc%YXPoCEwL>-wTYAgo|on=%XlkLr2
zWT`s9$ZU9l(!Fu2F8;@5b=yksN0<s#t776Nl%r^<npj<cMv8)_8;nY#J<58O*5VpJ
z_GA4`vlbUzp3A$(qPDJ+2DLnBCnYo4(dz7$0-lDM-nj}7c5#AhxTr<qe1BlO!&Ix_
zi<pM39jl*RQ@3rbrQ`zRSt^mBnjly7hipN_7rSSQ1Zs{+=)Y$W$n6>RKW2mmcr)>t
z|7VIfU}8z%K|8SmKeRl}%>UsG+O6-Gx(#Sb2qNhxxU&#RR$MuCc2q&c1*jzPUfK^k
ziF>vZ_^Er&H^suJD%2wols%xG%5sa_L_ffUc&%poX57O?&nWlW(xV`k`C%WH%QEeF
z8UKo3zes?eYa+C|$Ahvz62s3jKcH)#DNer7?djVIqqyaNdUr#pS{E1i<92huf3O~m
z>rJVO{DtcOU+neMWuld^yV&&=A-U-Jf@DdDEnx{0O&xQ0e<)5q-C!x;0rrzJ;60P;
z-5-pez+a}cHf>4$h&QA61ER9zEw9F8VLKm;IVkMKejarVnx{_f3ZEl2l6|fdVpl)0
z7w0>wr5V~8vb}0fhs(pgocb|Ssdaq8hv%ON_=t#Yxi2vY^QU<*_Ksi+R+VOyu7qQW
zs|c0AW|s}i#l21|<O&-M;?Mk4ta0OjeJ*Z`@-^!VLYW8f^sI)~yQ21>oaDy|nnP#`
zWISpxIuV>(kUF_-75q-Y5BCXL)2Cw>0xK>eog+(B#um%RO1mNd`3j*!MW4yF5KU>M
zZADu4F;8x>r_hY3c!&Ivr7(-E5}=fvocVf?#+gsHFQdEzjJnjc{{7Nlj{F`_JhkHE
zHEF9-BKqTD(l+F#+~nZCxRSP8u=kK`GoYn|mfXFK%FrlK+5<&oRZ9xAZ}JZ5yYhTf
z{Y=k>m`Xw2?J{R<f}IA{aJ9D}I)8s)bNxteO_|hMd_bTjI}lKFdDZ2bP}xBMV4V3x
zbT`ACJ=5qq<w+JRadFIM<*=Ki1`OvX6uU3P_aAO;Yvary3P`W$Mq;8k-|II+&1M43
zT%XcuuVZ<5ExGE#HrDZwxTFjlcjneXLP;U#<_ulb!cHTYWMomG=5Vq-P{wXcyj~!{
z%E<i_-3<Pk_hxpLZ}#1R*%rIt7c(~&X<G;+PomLu(~t=gUqP6O9B&u4{W<WJ$WsJ`
zSaUS0g4KK1R~6047~>u0*<hJSP%h9(7WT@LQbprKro!0P^mtM+fy9Z{(^#pbZAgVk
z$)aG*m*mcIUsm!|e{u$3qHq?<ecb@*&=$^5V&er4Ga6^TjkX7+O-Hk5hw|dqYU~xz
zGmmP>E&#mfTW!<-*WK<iQ^C%5%d2@;&<2$BIA)yRxYwWB;^pq<X>`ReTD@3JMI;nl
z0~evb>!nErJjaL5LZ>iW>gs*eo=`sFP{G^0wYvcUMxp+OsO(bfUuYK}%|$}++mE1Q
z3IVp#JXB%qTo)D@Uj>+bZR@SVOC>vmqj84ypm_c}Bp|4EmR}<OtiShX=Zt{yRnV5h
zjDGFYaX3`eRXxkux4#Qx&JYe}J=N%}>j;N_MN3~&qZ13UzR<L~L(z%(o0)<LE1|tx
zSV^s?-vi8sv<O^SOIM_#veEZQYQAmBS>%oO7kaPFHhv?d)YwTn<SLD92-HHLt-u_l
zvy}Q=qZl8B6jD=9L+TqvX88(izTf{;%rBlbo(^Y@;yVU~zW}!@o^2}9S)VwNY>d@5
zDfi5W4vD=oVeY!FNBnb1ZD~2w(8gktuMer#CHpZ|_pFI5%4t-2<>43FXZ^c*q|$Z!
z<-x>U9%EHx8)%BPx+E-k9m_IpjOx((vthW|t^l0{?UK+(32rc)eFNDf!f5YnNKIVj
zSZrJ;1f|A*D1mw9ZF@Eik&bty&U!K?(7{GXxeKrshR{d>E2nHV7ds)=os8?U8VsS|
zP`rC{J$AZA+rkY4<cBJ+S3f3^Lg)neWLQhjaV`;Mt5%^MhxQM8N;9n<W?g_;fGMMo
zyh-Z$uLW=3=IE-L@<P$}0)g|ZD9L}+wIJG=Ulf2tOIMOcj8^x5^snDs32k#Y@)S}s
zwd;F?_(14dU1#^-uwf)&Ii#wtnr}Q5(4@jfN<!P#-xOUNo;`4TOm*wmoIjl9Dz1N_
zfS>*CK^C9U_nS7EOTWLr{qcm|cc<UfX(Ky^inZ{hx|JRN#vto83wi)Nm#3&6WJ>f5
zU{YG~fv5j>ePi(#0e7KjM57AtN`<a%Q3{CkAP4OYInrddIE%yJAuAC4c{R+Ey-eK%
zYxr&<XxPZ~)omjH`wI#eovlvO<IMG;-Mages^<kOG_}3k6y=BJ`vGrB%~>IM+1V&w
zWFOew@nJH0-4)IS@sl%ZHX8BDnLrHzc|4@<9m&BH>E130ZqsJMv?V$xb_1+mJX+}I
z&x%)7jxUj!1t8zhB%jT!GWfe}bg9>k1ay~jg0twgR+Jy&IrfVcR((R8BP(ZOKm)i8
zIs+x}Y2;w?L1)hUJW|%5Hi`>=h+|3A@l<zmyHG@r{Qk7vbsAXx=9DKve20O127|4+
zB$b0bnBOWuEV9VB-=<#Wdc%T*06AgKXYj-L*oIq+31g0E$@hTya;3=ki$UT9?Ozz2
zdeYC1<y<~9sJ$hpAAR>dU+<Vq*?A~C9l*dThMdTYLZpvx4FN^uU=!-!-~=*B18FZg
z(6jsRO&cdJVZX;E)xIfsLgG0_yRM`$RoDQ7u-VW^!mkWDyQDmW{>1MuZ%K<VcvpQe
z|L6K2Nlg)m!()w`YUI%`e>OT3z=%vyACyb6BBFlm=$gDZZaqg6FJ$%L_CuK&0>n(F
z3N7L>N5h%#{^PaM?vOt<Aw91FUkz5ld-;m%c+dk~;mYPgb)>Kei40PZ6rl{t3DTY`
zi5fU9`O*NH%eZp*7K`-9oh3%~Gb6CBqb+=&P;UVz6OyDs)@nVQILYP_*_;OwztI1V
zR$5<!@t%%j=y$)np<OMq&X(3KOlm#10t~OT#w+o_OITg|IK?41{kk=|BjT{m$@j4(
zVRuen!ebUw&B`U%oR0u#07VRxlN<1^da{Fws}(+8ZtaidpylUEP&!_+*@?da=2;G|
zN7lL<Vjg7YKq8ChL<cE)wp7n{ly}c=pE21oQ?MO?&jU0pLI^n|j?cCxpG`yR5(u_P
zao}OrHVS0{4xtxzr!OS3hBoB4F6VB9C}sZfweHhb%|&xYRA~s0itbn6B_}d@CCpQN
z$&YhTp#dXLZevMVu&U!6OVMg-i;R!#-;s{^iJ3QdmYA`;3ZI133T8Ce+jv}W3k(t7
zHicM>yT5`2OyB~ic_1wy_^eKi)9k@==T)AH#FcTKK<{yZz%8jglbpCk#^CDru7uNv
zT9_@)uhLh~A8Moj5qwmzjr(P?RD0lwEJom3-44g#j}`}lp-c0xj1FBJk~kYY3akFH
zb^v*>LSu4%MzePJ>N-0-jgaxjcs7Ga2%-38p5<D3;jIQ{vDh`qo6{;~6fAAYV#1i^
zNtf$KM#<7zLO8eF;c#M|!zMr{ZOux(VKGp*7H??&JF>oat`UhCBI(KPRqZAs%<<S&
z^vjbK)A-&9L~m%&Vj;@Q8B1a{QhTFXFk-XGAG@-MtHwUO6Xxv|ym*^IY?i}ij`K3;
zS7kh}GYU?fI_iru1O}93v7m}+*#>1I%uXVdiHy>z8U%3mmmP#8r-1YY=aWk2+&Txy
zkmgN<OmgLfwU8J(-1X`!F;+SPYykX4bp?A6X~&UTY}6D;6qjPpWAkN@hikAcy3ZrY
zGkE*Y^?O(gBnd`F&9p#Y`v~?ROl%;XP8RI}j;(Edh#7^xbjMdIYO>3;l|)}5%rXjn
zwyLM1-@mNVh&mGd;J40F@q$!keRV3UIKgNl2!T^gYyWXNIx=q*lKAts1;?7Q22!@9
ztTkh?)w}hmnJmZ$Odkmtbq)Eq-TTi(>o2H6wQWj;Gzu%gLAUN(v0;3>Qn47bJ(Q2Q
zwE}@nYB03uc-X6>A<{x{kVBTbU(Eekz-LYN7{>cj=@+QZFt*{${9Xz{+#)-HKH(S^
zRr}D}vKk{NzGkF&(d!D+dhWn;{Rw<Ju(AiB&tyWz>8wwVzMTA%96RgN|0&UD9F!dE
zS&X=oLpfKS2?57S1xANrPqG|YkJ$FZ{=qeK75P)mXuFv$4g*+Y2<HqHF6L9cA?>H*
znf>4qpU@NlTXT<Z93aNEA-+TxWO#r8LG9gH*U9Esi=Ps^%v}7aLf_{wOfhrx9gZ7O
zlux^bx8UmRL6+a|qpTq!;R@Vr=iB^_)ntR3hFIHjQ?Y`iZ9iCV(HT*#eaS&oSO44L
zjbdIETN$2O#<mv%%0ePO|7TblnmPu@=OY+_tsE&zanupC89^cFc9#Hs3Ms}^a3UP3
zovgQm)p{{}1g1W5YGf(Ku#{5?XgP}|jlcDo%a5Ymhul6MT!G5jaRcwGpwlv%q(L>(
zjR4*Nbo^hB$F7Pq<3?7-9xLya9tc+Rql6K46eU!=;m|IrvsT_bz^%HcJUkd8JrMhy
zV~~*^Bb>)AE>}Q!mg7ntdOVuTulkB(6!qR{TeIV)mm)=9g`C6<D$57@qIu7JkG@d+
z_PMY9f%GZ)9Zh0dT5|hh-`(hVxI0!u<%xW3Ps@GTgEnD<@|!Btxuj%Vj(^Pg^L*x}
z^{(2Dnyjlw3Z9&5x}%0gKMVAx4rV0<8nZ)e_tyRzl$gb-&=bg@4%cDZR%}(akC8-n
zZwiMx&BN<`_YW1aV=iDZUGn8Q)j6&ERmwPjc6<$|c)#TwlbaX8)LzHDWL3=7t~|oW
zCxl3@G&PVr1u>hC2P{h+)#mvqfZq}@Lg(11*PPVq?jA@lF762MUCZAfUrwY>6L)7<
z8Z`zdD^%3f-n*MzJ9-66#+1lpKQx}}xb2#?!62vR&2m#tU&TNwHggs`5pmenXUesj
zklA$cuHl>C4N}ueO*>7mE-D$u;s)L<mC6yPGsk>mF{Y9=h7}lwwc5N#@;z?vznM9{
z+<5KHD~eOhBw;U4vx*_f{`6NPmlMU3bn9LZzM>yls%s8bQ3|<n<+pg2(6(lmj`XY_
z1LGe?sFQ+eX#x7yZ_M?zr#$2?ZVUWC`I;*g+ix`WNWCJ&AjT~4<g6TS@vGkl6-8;6
z*pDq)B)<L`M>gKBu;IPn1@EdWU1OsCa-;~EwO&T)OFL{VYD?XlO?4k)Q>-83#?S62
z3I7Vg=rGkQ1HUQB`xfMtXou2Ta#9nn|JtX94<=F%tZz{8+pza|QAnc-){tUq*gxR1
zA=~fXMUKbJY3CeVD|UC}5mw-=5)J?BT0z&+A0rKxCLx3!kQ{a_Gmy%k$jrER*i1#{
zEbii~ZvC7nhGw5BJFGCbC*^c`#nfm_unFdL;KhcdDJ$(7Qr3p5zYJT2f!@@(OvS<X
z1I`;BXfeuOP01yS8LnkStD7r{xTpk@iw7gw-6h+!!%2MsJi<yjHnehV?9<5SiCYEU
zMh2{uhxE5}{^~3_dsF-kI-N@AJWZlhot!AWX8g_!(=U9-TXRs-%&@qAEg)zu;pR46
zikM@-zKoTp+;`MM&d!FY;}<p;;O3He_y}P&N~Fp9f@bH`V($j*yII?QX3gIGde8pc
z7cX~NpMGLS$?fVK?!p=hl$z`BMcow)6~R?AbwsuN&z`7d$zf9xFqs=LvnsQ9<<yNg
zV#CZ1uV1_YaNfG3wnd&%&Y2T%w_SskL~Luf1*>4l;ys?%^|r!`75Y?1l66NtvD8S4
z$^H?frr4hMHch{IdVh`E?a{*=`w-VJ`dJpM=yc&}pA*CV?{*f&3)Py4n=_35HmGF2
zAE!{f&C%4=^7I@ekfLWhGsVQS9{uSbbL>=5#92^OaL(HO0!{<+EdMY$jO$&qx%S3F
z$@Y<l;$D>;dz@N;?1W5Iq}No}LpwbzE#RaxaDAu@1=lB7HoGPBQ}XcC?A$N-tzY2A
z`^pP*iyoF>)sL>PDW?0XJh|Dbl4$Qy_LPro6W~Vi2)6MwYdaW((gi@x4yVMlM{t^C
zvi7a{-kGjSElxLA3zQ+$o!<zJs?fzMGN>i8;~TKfNoUlv@mrRE<l96Nu`io95tFkQ
zxXB6@uK7c`w}`Tf-ra8DtJJ7>3GEdztlN&w#ECUl;a#6w7QJ_47MO`RYR^g1SjMl8
zq~z?I9q&i(@bw(~q>@4t(#`m6yjI|B(e@KdeO%LeZ7t~c=^S})S<D9o^<cZe$7}{S
zUmv}GP$k*#I)?vj8pVhsd9;jo`p430<{j2Bskt+YAC^ZC`B?)?M?yVBa&|)1XT<FX
zy|D09w=p$D%Xo<489e$;!s&c%xqYkzyRa_~8{g13Fu-&iu=EcK#EFv4iJp}xX&qUf
z0XVGVLHb^d;`kH9@v0*`5AJpOeCp@*)jzV$369!Zzjb<AMmSHFa>k34N8axUNi1a*
z(&&_S;cm*>{1Z{XD&qbC1NiV<rPR~Xf(NT{sa-QoB5{-5o;ZEVoSB(nTANN;6P0HF
z;8YH;`CKU<U*V~@Y~P~m6TAN1BG?A+nOY6!&==yD%f@;eohYoZ3UZ$1=rOaIR}YqM
z7^K;y#qE6=TPh_iY6>H1d+G6S<xT7gu`l?R9jFVMe*=RV2BX@H<@TIZBW*tO**nJX
zOFlR{6&bT%oRnMRi##eIyjS1b|M}^=vr~R;dj?qM$@t1S(Zt}2p|eSFff;`7iL)M{
z(|ku=OmAywoJ}27d;ZSK%Ej_V*alQ(*%^wmeXd=l^#p6N1OzJY78UR$IL4}Hd8Jm|
z^NuhY2#8)*-dL9sTGa)9qW+ayT;=w+jTGZ92B|it>UDY&x2cmL8*E9JVucP*%tLoI
z_qD-y6L(=~;%d0>tUULSTviyZ^ZAx>y0Jmz<ohJ+zT4$pYj+xl7hgnr!NN@qH^Qn#
zs=ymNY<ERLUNPsKSrxV*_iB!OJO$HPA2k>wdn?RSbwY1TSn<%!$fW1fxoH!f8YT?$
z;e3O$?lRWdEnha4wGFGrTUNpX@78&lUB$juTH*iTl>Ly~1C!DCxcd%8-3iL21@<+w
zP-{UA9RD?#@iSGtwHxem)+3AL3X`?)Z*LgCQbqf<M|mqGligK+z)-jdR~Zp<V4jvs
zBT9pAiC{?#ogZk|_~gC!)IMu{;e@@zh3BwhS7Qx6S<n0PU`$@`tP>3>AS)hDD|l40
z#2y!xJ<1e%bor~jnS^w}xGy#~j=P*T-Q$lYy-*k&d|*uurk^dFoN#lnZNV$6@$#TT
zg-u+SGHbZh#S(CJ+g!}lM}<3gJ(gjy$)s4!bw}>%wYOOhl3@_+>g%f)Utj$aB(imb
zz}5{Bww&EXt4cC^_17|VNp^<UYD45UMs52hPwLOPf5>GyOr^rPvQ$%00z(3TW?Xa0
z-knVmD*Ly=;kpTL;oVNzM9tL1RAt3q8E{<6sNV)lbI%kYP8pu+g{h0x&f*i!%PRKd
zd29F|8KW%a0{@ZpK|fB@;mzha`=JI*JA37C>J+vP%$3OmU(=V~6P6-7t{|}W*N<I~
z@BXpEayxT2E2a7;Gb{A;OZXgVYJ`U60hj09u7PU$V31TsIZs4YRiaA_qGWRP5BbYD
zK8s8eZQs?%tN&<`$eCZsPhNe_w|vrTQp;4QNWQDzk&hs^si-Z<Mhg~{^T?nDCVZ`w
zCr-loXT6+g_;}+BEj&42u3zt{IYF{N*x}ZPT}MzB)gO6OqTHsprlsnBCwWpbSr$LS
zEXWwx#a_$dqBLBQ=HtXb`85XHKi0%;=|<<CkihHR(@re2JMR7G&Dc%)x%LkF&$}_M
zM>X=f?<8A{)Xa%3bwr?(WcR&4vKr(%?Z)XCXJC76ot;cZOAB};nwAHsPJ0{T5*soL
zMwE0-F<6R599=z;=}<^`1AIK^URQsWS?Ic9`$Vz-f;qvVqO5=2f@Py9zvQ0fW9!xG
zUUC0k=y_y+X8%p$8;NAkW_AZFKO^MD72-vOo+@#>pQkadMp{qFiY$DiBYJiXzayX5
zJa+RNjKNSI*<hxVG8A>=X<fB!Fy20Powp74>$n(ge`l5gPSMrg`P>+-^<*C3@#3I7
zw)LbAEzeWngl3Gf(fgB?GC5Ie9@Vnn<?G+Q_rCCAgT-?f0@gyQFo;}x!>^e0=k>eZ
z=!~^6b<?lA4&M=675CJT^OpowhRP^U$v0vH0loPoBj20+eQtJFt_3>f*zg_N%he;0
zk1LH`m#t9Ow)FG#5<BJ^m$2HduDU$B{XOYS@JmBor*m%)etKbe`eXrL4_26wj5|T>
z0u<h_@_>xL=D6t^<A$WM6J(4eeUem{qE4ewd-3d_eH((ENB8v4{M1RTOqrZ|T}GV2
z<}XamMJfbjQv|V_(*E+reLL0ku==lwp?a^^GCmDC`=Z0W>NUbsbHCVcuXNr;A0>js
zj_w~MP@H#UjZ+}*@S=Z<t0kjD{4Pl3UCX-Mhm*$L|E*`f=Q--GpUwVtf#+I}?%f*K
z|MWuYKH4wIC3eIh649#Cd)Cc@sf-xhVK$Sbax9~q%P`+ThA?;k#yJHzkHtwh`@s$8
z#Sa#Fq*W-H&ad@02?b*pb}_8z#1;&Cv_KSR){8;oogDlV1$Cm*w|alb3<%aB8{|Qt
z^ZWUZSq=aDD!VawMXEVz+Jl%7v+>6lzYoY+1Yc1)SH-93qF3a;OHdf3$U@-~Bcu0m
zv+<KtJDg1o!M2*T4ewV|`vkU@Y<i{g62zMdYmClmfi$~M)jc9rM?PG-z~yq`C%2RG
zUjHThct?b3Hp`CT|McuvYg}U7{zyBs!`*&E4V9CvM%w!Ex#Yk--PKgMsiSR|X;q$R
zC8^|`>VY(H_99fR^!zSU<6tA5TIKR(bMGk+9Kj)vWN4OkzvPtj{9sk#kGp4wJ{Gt4
zF`K2WP^e+ssbix~4)YHExtmKPtp}HT^&7DK56_v!imv|L{9jky>n}rYJ`_pwakwXa
zOpAw)w1nCf8X<`y-mm#OL=fI`o4iHUNv-<wcC320Opph<FZP{dT%WwMQ1@hSwtxM#
zEQjDchqykchO~|<N)?$nRl0uqhgY*uvde8ldwv<8w$S5RG2L5pO6ejHz#g0p$*)%e
zZ_$?IyL(FAHs11DT-^QX&N<%WAA#Ai3m=rNG<v+kl(Zr*uNb?qLs~1~kEX=#Y>D_7
zo7XQ>RN?bpsQ<X?>S$UP-H^AbZoTEY`TcaUh_AaNGPZ?-n80yAE>%wFi+xhN40`u=
zmW=zDrX=otX`in832fxiwu=g}AEylqon455Z=Qgw`AD1iu9(nRwKwRP<}Q|s!mo^;
zt1-$oo{CGbdqeMS7v9?q?$+zi%;d|V?vig=@WxscNw6Eiz?Jv}wHSiwdhbi$5emcI
zB-Gkdsi5=B<=N(H?SncCy<+1U`}F;TOC93-OZj7b@?guTdH=r9ry4)8%XMnDeHS?F
zv2x5gXWXO6ZaJ;pFl2vnB3yjw_tfir9PV4%{Th;CR$^I4HRIa_6I$=*^2q=9h7nrg
z%w!&a>JW}c%JQ3BWz)rhAW^^H7h3keIi!u@GA_rLXLCaCagRc7QR!drYBJg}UG3{l
z#SoH9o<!>xxxbAQE;J}w2;gy`oOlV^HxKwCGj5_B&R!c2l01{Ray_-|ZDO0j*Xcd)
zN$U;tMq+#oBYrq=D~rV*hW)XrP7;{B&FhZsa_>(!ynf!v@-7_d)|$A5FV9p0hngx`
zH%PyL&Gq=P#=xDql2T2|a%CNNKU8^X{yh5ca)faO6<T|qS(|e0%=#&tJEh$Wnx*t)
z9}C!XQJ|l)&xG4fovX{4$%{O{|84t&39crYwKLsxPug&sH>Y&<RKh*@Nu8R><7NMm
zpWF$T5U;4xG5<&R_<eT8_WxYi!mn1x|N82YqaQasu78)>6w7a_uJ*;Vm-F(bq%Z6^
z6pH>ts&8VwWd+Awh3E14rwt>uLow7FM7drK9^kk@9Y9(8{D6135B=_6gmAImQW|5`
zw91wt`f!lzLwi$s{Z1s`os;H*XJtgIdpl1d-$#Ij$OB=Kg2%rbMsDoiGlX#n9?*zF
zzWgJafWHJd%Kzz`=ZcSyZa)<a9Atpug_HeeLhIODE&hg`YE`1!c#%Kmw(rnwSccjs
z1-FYYyeacvwB-+rxR`beo<3UqE$dpl5LR7N)78_H?O0e9#{IiQ_ORCJ)2H`u{jv40
z+tTDM8jW_#_HHHyqjM!nG^3@mvc+krw%18R>JRaPRXg|AQ$DGSex1L5eR|Bh%|rU`
zePWVYUyU!OudlCjXsC_L-6ZyPrRmMs^LU?`p412O*J9mcAIRs5sQT#I-o0zfFM9TS
zWZHGqTg!hp{=Y4QWxEV0Eq(vgp1l@Ah>MR`-YdBiIp1Fq%DIi}&m54HklWdO;FfJP
zrmgKJhzb38+5BbH-P^}U4|yfHVvNpIKJecKe<7xs^6w@8w-2Cb5<cS4noEavERyRj
zqq+6!JT27(!`#lafGBa5maqAv`U7KMpLR<ZT1QxjR$jive~!ha`r-V#yQg+<&RB0L
zBo`6%ddbhjrNRG1Q1ACACkx#of83Y|_bgeldE{s?(xz2C->jU<hCOjEX(VEEY0*So
zz?Cy*V@+daMK*^!szz!n^W2+OZ?9OpyyJCAsivr03cktW^0nM3Km2<G-)7?8&F@{#
z7#!JIoS3D1{NiKV*RWB`CcgF6jnwWgoY~8n2*A1};AY!4)ZE(ha&~W@No7g2q~${f
zcSMv{5h_OM!GrwD(nn=s*1N}RkGySq*+}lQOszNh=(&qOy!3-ui$(D3Icidgu3yvr
zUq*Z2?x4}P_2tk`p;vQ_${rK$6Sc#7TkEFSb5p_Vvo&DDzBsgYt!2o>DN+Y@+#vXM
zLHo_eqOiL|9UI>5$lgf!>Wx2R_jTXa(CbxBs1$bPZkbJchCfG05<gTk*NQ}on8hKA
zs89wlFQsh!EMS#;=Hw*_m)n&Ur`%mos7?ge>RDz-Ofn%TZQqz(UaoNTw#<_M9PEit
z?UVUrGHk;SA3nS!`x-s@rW|qc&|kl^`GS?_Y%ag)>+fH5sg*vZh6W0GCY@?gB+HHZ
zx;rFC1NNM3PmM1%LFi>0s^#az>XtfBWu<ScO-IaZ9u!EMNV!@4xPj;D>Gc~4yBNad
zcd&UYYPa<YOednaKQ?Z#@bvWTn~$IJu8#1^l=jKlvVNVXiW11ge4}m(Z-6)O=Hrg)
z=!4I{HP5WD*q5Z_Q0G=I9rS;bU{hx(I23ft%gpOwuoTME{6TG~Y{|z%ub(-uzm+MR
zwjWJ5%PUEg(nVV$$87iQ!h^&%x0dbpq2IO)=WpBlaGfq1EW>VX?qjCeW3P&9+9%T&
z*6pt5FTVMGB46ir$L<tlU!iOb&f8Wm)V+K?XJ7lIVI*Ja@PiepLZ^vGw@z;n42^ug
z^3vMD%qrCJT}|`4F<AMCaJ$flhpKfuxbOB{`_<dy6_V&#`s{toSnRhT-L5M7nw!fw
z=g#{%m9GjvM7%d5mpnE8d=M47^lOlIbZwoRup`E`nqqKf|F{qjikiy17xmXN_*<LM
zt9?Q|b<W)<Dc&y}cZ0_9ztb0UAPfeb;qkwn{Qnxj|8L9rKbq46QmMLEMWe33zf|34
z+`vBaKmIDbK4OkzULY<)so%Nat%))RwofSocj)!iHZu6_%w%h)(KTENPQ0J_k}hj6
zh)VhR&V3{BZSx0y<2<gn{5J3CWH)%sI9mSEJ=xfV1?oGAv#J&D?76B(#S8uoBW|!s
zD1KZh-BXiX8&GvSggj}_gc2UMuiQDa-$*vr00d`xZHohMB#tO`P(MgMFhv_v;FWBv
zwZH?I!VW;4OX50XD&<%$?c@}5<Jj1OpGO$wsU^$5ZOAUIyL)3bDam!h&O2nt%YqwK
zH;cT=0qtz<ZhfG^C*CJVd}ZvDWd%7#+gz1HS>~j{=MqXsHiTOF>iXdu#F;yUJ)57(
zF7K7PUr7`n7Q)rFi7tRm!ASjcKy`Ze_+F3>)>j4Sdus1B+}QRrksA1tIB|AQn=mRu
z<Tn=zaau(ixq42T&0iOcPQ+zD-G48^l~~|i<95c0J=8{4+xh(a=)!goq(yA*3uTn4
zX3`>g>QTDfTIQA~PebrN*n?fGFN@^1n#SB(2e<HXJ@C2gKj?Q%Gk&mBIL=WI(>p`F
z&3}SyllC7Use6&@Om`ols-=5=;>JP64!odj`mfO~|9LCxx#w?ONR~@)^Y%F9IDAv^
zUg>PVvZE)~Su-_SnH#&HvU_u@gEms7cwq;d@%whs#1Sd|pPrJ*_iykkSl|jt!L42Z
zy;GvtHDSdZiQd?FWSZ_=aSF0@TkB=awzv8+SGPVNar3*$)w$YIC^wMF42s3toj8Iz
zC$k9V^azM$hpvvyC?pbx+E;sgtGCBHYLxl|*XVz}&gLfve<Hj8|9>>_-_87ghX{3V
z<~i@*7idR!$w82XI_h=lw%2_tFB=tW4;%P`l9iT~IV~-JT1LS@MoC3ZUPV?$LRwZu
zT3Uv0?alvnfwSv<dt3ki`GRx07bW3>BgiYb-M3Y_YOiPGdBN4&<-Wa(-33=al)R#}
z<QYFXc?C&D326ywsl)*g96;^<KfiBe<Kb!V>Vnd}c=@!9r2K*XS5*Aq=P0d9H!l`m
Iuzd9Y0LhiAzyJUM

literal 0
HcmV?d00001

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index 5a00b625..1ea899d2 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -445,6 +445,16 @@ class PlotAvailabilityHistogram(AbstractPlotClass):  # pragma: no cover
 
 
 class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
+    """
+    Plot histogram on transformed input and target data. This data is the same that the model sees during training. No
+    plots are create for the original values space (raw / unformatted data). This plot method will create a histogram
+    for input and target each comparing the subsets train, val and test, as well as a distinct one for the three
+    subsets.
+
+    .. image:: ../../../../../_source/_plots/datahistogram.png
+        :width: 400
+
+    """
 
     def __init__(self, generators: Dict[str, DataCollection], plot_folder: str = ".", plot_name="histogram",
                  variables_dim="variables", time_dim="datetime", window_dim="window"):
-- 
GitLab


From e4dce9fb99fb72310bd7cab20075191fc5113448 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 15 Apr 2021 17:41:43 +0200
Subject: [PATCH 086/175] updated defaults test, /close #299 on test success

---
 mlair/plotting/data_insight_plotting.py  | 6 +++---
 test/test_configuration/test_defaults.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index 1ea899d2..aca5e3c5 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -824,14 +824,14 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         plt.close('all')
 
 
-def f_proc(var, d_var):
+def f_proc(var, d_var):  # pragma: no cover
     var_str = str(var)
     t = (d_var.datetime - d_var.datetime[0]).astype("timedelta64[h]").values / np.timedelta64(1, "D")
     f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1).autopower()
     return var_str, f, pgram
 
 
-def f_proc_2(g, m, pos, variables_dim, time_dim):
+def f_proc_2(g, m, pos, variables_dim, time_dim):  # pragma: no cover
     raw_data_single = dict()
     if m == 0:
         d = g.id_class._data
@@ -847,7 +847,7 @@ def f_proc_2(g, m, pos, variables_dim, time_dim):
     return raw_data_single
 
 
-def f_proc_hist(data, variables, n_bins, variables_dim):
+def f_proc_hist(data, variables, n_bins, variables_dim):  # pragma: no cover
     res = {}
     bin_edges = {}
     interval_width = {}
diff --git a/test/test_configuration/test_defaults.py b/test/test_configuration/test_defaults.py
index 90227ed2..16606d8f 100644
--- a/test/test_configuration/test_defaults.py
+++ b/test/test_configuration/test_defaults.py
@@ -68,4 +68,4 @@ class TestAllDefaults:
         assert DEFAULT_PLOT_LIST == ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore",
                                      "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore",
                                      "PlotConditionalQuantiles", "PlotAvailability", "PlotAvailabilityHistogram",
-                                     "PlotSeparationOfScales"]
+                                     "PlotDataHistogram"]
-- 
GitLab


From 52d6fd13dab4ced4b1f0992c5fb8a6b648114a85 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 16 Apr 2021 10:35:32 +0200
Subject: [PATCH 087/175] appendix include time measurement

---
 mlair/plotting/data_insight_plotting.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index aca5e3c5..45f5d3d6 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -444,6 +444,7 @@ class PlotAvailabilityHistogram(AbstractPlotClass):  # pragma: no cover
         plt.tight_layout()
 
 
+@TimeTrackingWrapper
 class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
     """
     Plot histogram on transformed input and target data. This data is the same that the model sees during training. No
@@ -570,6 +571,7 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
         plt.close('all')
 
 
+@TimeTrackingWrapper
 class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
     """
     Create Lomb-Scargle periodogram in raw input and target data. The Lomb-Scargle version can deal with missing values.
-- 
GitLab


From 8ec9b05fe6ab984b6852d416780367a2aa5ef3d7 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 19 Apr 2021 10:19:56 +0200
Subject: [PATCH 088/175] TrackParameter wraps now entire method to get the
 return value

---
 mlair/helpers/datastore.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/mlair/helpers/datastore.py b/mlair/helpers/datastore.py
index d6c977c7..b08c6f45 100644
--- a/mlair/helpers/datastore.py
+++ b/mlair/helpers/datastore.py
@@ -111,8 +111,15 @@ class TrackParameter:
         """
         Call method of decorator.
         """
-        self.track(*args)
-        return self.__wrapped__(*args, **kwargs)
+        name, obj, scope = self.track(*args)
+        f_name = self.__wrapped__.__name__
+        try:
+            res = self.__wrapped__(*args, **kwargs)
+            logging.debug(f"{f_name}: {name}({scope})={res if obj is None else obj}")
+        except Exception as e:
+            logging.debug(f"{f_name}: {name}({scope})={obj}")
+            raise
+        return res
 
     def __get__(self, instance, cls):
         """Create bound method object and supply self argument to the decorated method. <Python Cookbook, p.347>"""
@@ -127,6 +134,7 @@ class TrackParameter:
             tracker[name].append(new_entry)
         else:
             tracker[name] = [new_entry]
+        return name, obj, scope
 
     @staticmethod
     def _decrypt_args(*args):
-- 
GitLab


From 19e3aab7aab4ab5d68b66ba14b8dcb88a6ffab65 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 19 Apr 2021 10:35:16 +0200
Subject: [PATCH 089/175] removed duplicated logging, /close #304

---
 mlair/helpers/datastore.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlair/helpers/datastore.py b/mlair/helpers/datastore.py
index b08c6f45..85de021e 100644
--- a/mlair/helpers/datastore.py
+++ b/mlair/helpers/datastore.py
@@ -127,7 +127,6 @@ class TrackParameter:
 
     def track(self, tracker_obj, *args):
         name, obj, scope = self._decrypt_args(*args)
-        logging.debug(f"{self.__wrapped__.__name__}: {name}({scope})={obj}")
         tracker = tracker_obj.tracker[-1]
         new_entry = {"method": self.__wrapped__.__name__, "scope": scope}
         if name in tracker:
-- 
GitLab


From 5f9fdcebcc381870417832fee0183d32d5715dc5 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 19 Apr 2021 11:50:01 +0200
Subject: [PATCH 090/175] min max scaler can now read parameter feature_range
 to set normalization interval, default is [0, 1], /close #302 on pipeline
 pass

---
 .../data_handler_single_station.py            | 13 +++++----
 mlair/data_handler/default_data_handler.py    |  4 +++
 mlair/helpers/statistics.py                   | 28 +++++++++++--------
 3 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index e9db27a9..89aafa2c 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -183,13 +183,14 @@ class DataHandlerSingleStation(AbstractDataHandler):
                 #. data: Standardised data
         """
 
-        def f(data, method="standardise"):
+        def f(data, method="standardise", feature_range=None):
             if method == "standardise":
                 return statistics.standardise(data, dim)
             elif method == "centre":
                 return statistics.centre(data, dim)
             elif method == "min_max":
-                return statistics.min_max(data, dim)
+                kwargs = {"feature_range": feature_range} if feature_range is not None else {}
+                return statistics.min_max(data, dim, **kwargs)
             elif method == "log":
                 return statistics.log(data, dim)
             else:
@@ -205,13 +206,15 @@ class DataHandlerSingleStation(AbstractDataHandler):
             std = kwargs.pop('std', None)
             min = kwargs.pop('min', None)
             max = kwargs.pop('max', None)
+            feature_range = kwargs.pop('feature_range', None)
 
             if method == "standardise":
                 return statistics.standardise_apply(data, mean, std), {"mean": mean, "std": std, "method": method}
             elif method == "centre":
                 return statistics.centre_apply(data, mean), {"mean": mean, "method": method}
             elif method == "min_max":
-                return statistics.min_max_apply(data, min, max), {"min": min, "max": max, "method": method}
+                return statistics.min_max_apply(data, min, max), {"min": min, "max": max, "method": method,
+                                                                  "feature_range": feature_range}
             elif method == "log":
                 return statistics.log_apply(data, mean, std), {"mean": mean, "std": std, "method": method}
             else:
@@ -658,13 +661,13 @@ class DataHandlerSingleStation(AbstractDataHandler):
         current data is not transformed.
         """
 
-        def f_inverse(data, method, mean=None, std=None, min=None, max=None):
+        def f_inverse(data, method, mean=None, std=None, min=None, max=None, feature_range=None):
             if method == "standardise":
                 return statistics.standardise_inverse(data, mean, std)
             elif method == "centre":
                 return statistics.centre_inverse(data, mean)
             elif method == "min_max":
-                return statistics.min_max_inverse(data, min, max)
+                return statistics.min_max_inverse(data, min, max, feature_range)
             elif method == "log":
                 return statistics.log_inverse(data, mean, std)
             else:
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 3a57d9fe..11461ad7 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -287,6 +287,8 @@ class DefaultDataHandler(AbstractDataHandler):
                             old = transformation_dict[i][var].get(k, None)
                             new = opts.get(k)
                             transformation_dict[i][var][k] = new if old is None else old.combine_first(new)
+                        if "feature_range" in opts.keys():
+                            transformation_dict[i][var]["feature_range"] = opts.get("feature_range", None)
 
         if multiprocessing.cpu_count() > 1 and kwargs.get("use_multiprocessing", True) is True:  # parallel solution
             logging.info("use parallel transformation approach")
@@ -320,6 +322,8 @@ class DefaultDataHandler(AbstractDataHandler):
                         transformation_dict[i][k]["min"] = transformation[k]["min"].min(iter_dim)
                     if transformation[k]["max"] is not None:
                         transformation_dict[i][k]["max"] = transformation[k]["max"].max(iter_dim)
+                    if "feature_range" in transformation[k].keys():
+                        transformation_dict[i][k]["feature_range"] = transformation[k]["feature_range"]
                 except KeyError:
                     pop_list.append((i, k))
         for (i, k) in pop_list:
diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index 3e99357c..30391998 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -20,7 +20,7 @@ Data = Union[xr.DataArray, pd.DataFrame]
 
 
 def apply_inverse_transformation(data: Data, method: str = "standardise", mean: Data = None, std: Data = None,
-                                 max: Data = None, min: Data = None) -> Data:
+                                 max: Data = None, min: Data = None, feature_range: Data = None) -> Data:
     """
     Apply inverse transformation for given statistics.
 
@@ -38,7 +38,7 @@ def apply_inverse_transformation(data: Data, method: str = "standardise", mean:
     elif method == 'centre':  # pragma: no branch
         return centre_inverse(data, mean)
     elif method == 'min_max':  # pragma: no branch
-        return min_max_inverse(data, min, max)
+        return min_max_inverse(data, min, max, feature_range)
     elif method == "log":
         return log_inverse(data, mean, std)
     else:
@@ -119,41 +119,45 @@ def centre_apply(data: Data, mean: Data) -> Data:
     return data - mean
 
 
-def min_max(data: Data, dim: Union[str, int]) -> Tuple[Data, Dict[(str, Data)]]:
+def min_max(data: Data, dim: Union[str, int], feature_range: Tuple = (0, 1)) -> Tuple[Data, Dict[(str, Data)]]:
     """
     Apply min/max scaling using (x - x_min) / (x_max - x_min). Returned data is in interval [0, 1].
 
     :param data: data to transform
     :param dim: name (xarray) or axis (pandas) of dimension which should be centred
+    :param feature_range: scale data to any interval given in feature range. Default is scaling on interval [0, 1].
     :return: transformed data, and dictionary with keys method, min, and max
     """
     d_max = data.max(dim)
     d_min = data.min(dim)
-    return (data - d_min) / (d_max - d_min), {"min": d_min, "max": d_max, "method": "min_max"}
+    d_scaled = (data - d_min) / (d_max - d_min) * (max(feature_range) - min(feature_range)) + min(feature_range)
+    return d_scaled, {"min": d_min, "max": d_max, "method": "min_max", "feature_range": feature_range}
 
 
-def min_max_inverse(data: Data, min: Data, max: Data) -> Data:
+def min_max_inverse(data: Data, _min: Data, _max: Data, feature_range: Tuple = (0, 1)) -> Data:
     """
     Apply inverse transformation of `min_max` scaling.
 
     :param data: data to apply inverse scaling
-    :param min: minimum value to use for min/max scaling
-    :param max: maximum value to use for min/max scaling
+    :param _min: minimum value to use for min/max scaling
+    :param _max: maximum value to use for min/max scaling
+    :param feature_range: scale data to any interval given in feature range. Default is scaling on interval [0, 1].
     :return: inverted min/max scaled data
     """
-    return data * (max - min) + min
+    return (data - min(feature_range)) / (max(feature_range) - min(feature_range)) * (_max - _min) + _min
 
 
-def min_max_apply(data: Data, min: Data, max: Data) -> Data:
+def min_max_apply(data: Data, _min: Data, _max: Data, feature_range: Data = (0, 1)) -> Data:
     """
     Apply `min_max` scaling with given minimum and maximum.
 
     :param data: data to apply scaling
-    :param min: minimum value to use for min/max scaling
-    :param max: maximum value to use for min/max scaling
+    :param _min: minimum value to use for min/max scaling
+    :param _max: maximum value to use for min/max scaling
+    :param feature_range: scale data to any interval given in feature range. Default is scaling on interval [0, 1].
     :return: min/max scaled data
     """
-    return (data - min) / (max - min)
+    return (data - _min) / (_max - _min) * (max(feature_range) - min(feature_range)) + min(feature_range)
 
 
 def log(data: Data, dim: Union[str, int]) -> Tuple[Data, Dict[(str, Data)]]:
-- 
GitLab


From e67e16c33218bc6577e3e09be000325676365ed9 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 19 Apr 2021 14:11:45 +0200
Subject: [PATCH 091/175] offset is implemented in data handler, /close #305

---
 mlair/data_handler/data_handler_mixed_sampling.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index e2516257..a1036433 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -205,9 +205,9 @@ class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFil
         """
         window = -abs(window)
         data = self.input_data
-        self.history = self.stride(data, dim_name_of_shift, window)
+        self.history = self.stride(data, dim_name_of_shift, window, offset=self.window_history_offset)
 
-    def stride(self, data: xr.DataArray, dim: str, window: int) -> xr.DataArray:
+    def stride(self, data: xr.DataArray, dim: str, window: int, offset: int = 0) -> xr.DataArray:
 
         # this is just a code snippet to check the results of the kz filter
         # import matplotlib
@@ -218,12 +218,13 @@ class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFil
         time_deltas = np.round(self.time_delta(self.cutoff_period)).astype(int)
         start, end = window, 1
         res = []
-        window_array = self.create_index_array(self.window_dim, range(start, end), squeeze_dim=self.target_dim)
+        _range = list(map(lambda x: x + offset, range(start, end)))
+        window_array = self.create_index_array(self.window_dim, _range, squeeze_dim=self.target_dim)
         for delta, filter_name in zip(np.append(time_deltas, 1), data.coords["filter"]):
             res_filter = []
             data_filter = data.sel({"filter": filter_name})
-            for w in range(start, end):
-                res_filter.append(data_filter.shift({dim: -w * delta}))
+            for w in _range:
+                res_filter.append(data_filter.shift({dim: -(w - offset) * delta - offset}))
             res_filter = xr.concat(res_filter, dim=window_array).chunk()
             res.append(res_filter)
         res = xr.concat(res, dim="filter").compute()
-- 
GitLab


From 47813afab5a9ac345c47f1828aff8a0e85d05515 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 27 Apr 2021 16:30:46 +0200
Subject: [PATCH 092/175] index array was missing

---
 mlair/plotting/data_insight_plotting.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index 1176621a..4e7a1db3 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -515,9 +515,9 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
             plot_data_single = dict()
             plot_data_raw_single = dict()
             plot_data_mean_single = dict()
+            self.f_index = np.logspace(-3, 0 if self._sampling == "daily" else np.log10(24), 1000)
             raw_data_single = self._prepare_pgram_parallel_gen(generator, m, pos, use_multiprocessing)
             # raw_data_single = self._prepare_pgram_parallel_var(generator, m, pos, use_multiprocessing)
-            self.f_index = np.logspace(-3, 0 if self._sampling == "daily" else np.log10(24), 1000)
             for var in raw_data_single.keys():
                 pgram_com = []
                 pgram_mean = 0
@@ -577,14 +577,14 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
             pool = multiprocessing.Pool(
                 min([psutil.cpu_count(logical=False), len(generator), 16]))  # use only physical cpus
             output = [
-                pool.apply_async(f_proc_2, args=(g, m, pos, self.variables_dim, self.time_dim))
+                pool.apply_async(f_proc_2, args=(g, m, pos, self.variables_dim, self.time_dim, self.f_index))
                 for g in generator]
             for i, p in enumerate(output):
                 res.append(p.get())
             pool.close()
         else:
             for g in generator:
-                res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim))
+                res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim, self.f_index))
         for res_dict in res:
             for k, v in res_dict.items():
                 if k not in raw_data_single.keys():
@@ -607,7 +607,7 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         """
         ax.set_yscale('log')
         ax.set_xscale('log')
-        ax.set_ylabel("power", fontsize='x-large')
+        ax.set_ylabel("power spectral density", fontsize='x-large')  # unit depends on variable: [unit^2 day^-1]
         ax.set_xlabel("frequency $[day^{-1}$]", fontsize='x-large')
         lims = ax.get_ylim()
         self._add_annotation_line(ax, [1, 2, 3], 365.25, lims, "yr")  # per year
@@ -698,14 +698,15 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         plt.close('all')
 
 
-def f_proc(var, d_var):
+def f_proc(var, d_var, f_index):
     var_str = str(var)
     t = (d_var.datetime - d_var.datetime[0]).astype("timedelta64[h]").values / np.timedelta64(1, "D")
-    f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1).autopower()
-    return var_str, f, pgram
+    pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").power(f_index)
+    # f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").autopower()
+    return var_str, f_index, pgram
 
 
-def f_proc_2(g, m, pos, variables_dim, time_dim):
+def f_proc_2(g, m, pos, variables_dim, time_dim, f_index):
     raw_data_single = dict()
     if m == 0:
         d = g.id_class._data
@@ -716,6 +717,6 @@ def f_proc_2(g, m, pos, variables_dim, time_dim):
     d = d[pos] if isinstance(d, tuple) else d
     for var in d[variables_dim].values:
         d_var = d.loc[{variables_dim: var}].squeeze().dropna(time_dim)
-        var_str, f, pgram = f_proc(var, d_var)
+        var_str, f, pgram = f_proc(var, d_var, f_index)
         raw_data_single[var_str] = [(f, pgram)]
     return raw_data_single
-- 
GitLab


From 22ab59c81019f4d598d4da4b7c2f8cd823dee445 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 28 Apr 2021 10:40:10 +0200
Subject: [PATCH 093/175] kz filter has moved to a new location together with
 the new FIR filter class, first implementation of FIR data handler

---
 mlair/data_handler/data_handler_kz_filter.py  | 114 -------
 .../data_handler_mixed_sampling.py            |   2 +-
 .../data_handler/data_handler_with_filter.py  | 258 ++++++++++++++++
 mlair/helpers/filter.py                       | 284 ++++++++++++++++++
 mlair/helpers/statistics.py                   | 214 -------------
 .../test_data_handler_mixed_sampling.py       |   2 +-
 6 files changed, 544 insertions(+), 330 deletions(-)
 delete mode 100644 mlair/data_handler/data_handler_kz_filter.py
 create mode 100644 mlair/data_handler/data_handler_with_filter.py
 create mode 100644 mlair/helpers/filter.py

diff --git a/mlair/data_handler/data_handler_kz_filter.py b/mlair/data_handler/data_handler_kz_filter.py
deleted file mode 100644
index 539712b3..00000000
--- a/mlair/data_handler/data_handler_kz_filter.py
+++ /dev/null
@@ -1,114 +0,0 @@
-"""Data Handler using kz-filtered data."""
-
-__author__ = 'Lukas Leufen'
-__date__ = '2020-08-26'
-
-import inspect
-import numpy as np
-import pandas as pd
-import xarray as xr
-from typing import List, Union, Tuple, Optional
-from functools import partial
-
-from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
-from mlair.data_handler import DefaultDataHandler
-from mlair.helpers import remove_items, to_list, TimeTrackingWrapper
-from mlair.helpers.statistics import KolmogorovZurbenkoFilterMovingWindow as KZFilter
-
-# define a more general date type for type hinting
-str_or_list = Union[str, List[str]]
-
-
-class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
-    """Data handler for a single station to be used by a superior data handler. Inputs are kz filtered."""
-
-    _requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
-    _hash = DataHandlerSingleStation._hash + ["kz_filter_length", "kz_filter_iter", "filter_dim"]
-
-    DEFAULT_FILTER_DIM = "filter"
-
-    def __init__(self, *args, kz_filter_length, kz_filter_iter, filter_dim=DEFAULT_FILTER_DIM, **kwargs):
-        self._check_sampling(**kwargs)
-        # self.original_data = None  # ToDo: implement here something to store unfiltered data
-        self.kz_filter_length = to_list(kz_filter_length)
-        self.kz_filter_iter = to_list(kz_filter_iter)
-        self.filter_dim = filter_dim
-        self.cutoff_period = None
-        self.cutoff_period_days = None
-        super().__init__(*args, **kwargs)
-
-    def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
-        """
-        Adjust setup of transformation because kfz filtered data will have negative values which is not compatible with
-        the log transformation. Therefore, replace all log transformation methods by a default standardization. This is
-        only applied on input side.
-        """
-        transformation = super(__class__, self).setup_transformation(transformation)
-        if transformation[0] is not None:
-            for k, v in transformation[0].items():
-                if v["method"] == "log":
-                    transformation[0][k]["method"] = "standardise"
-        return transformation
-
-    def _check_sampling(self, **kwargs):
-        assert kwargs.get("sampling") == "hourly"  # This data handler requires hourly data resolution
-
-    def make_input_target(self):
-        data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
-                                         self.station_type, self.network, self.store_data_locally, self.data_origin)
-        self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
-                                      limit=self.interpolation_limit)
-        self.set_inputs_and_targets()
-        self.apply_kz_filter()
-        # this is just a code snippet to check the results of the kz filter
-        # import matplotlib
-        # matplotlib.use("TkAgg")
-        # import matplotlib.pyplot as plt
-        # self.input_data.sel(filter="74d", variables="temp", Stations="DEBW107").plot()
-        # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
-
-    @TimeTrackingWrapper
-    def apply_kz_filter(self):
-        """Apply kolmogorov zurbenko filter only on inputs."""
-        kz = KZFilter(self.input_data, wl=self.kz_filter_length, itr=self.kz_filter_iter, filter_dim=self.time_dim)
-        filtered_data: List[xr.DataArray] = kz.run()
-        self.cutoff_period = kz.period_null()
-        self.cutoff_period_days = kz.period_null_days()
-        self.input_data = xr.concat(filtered_data, pd.Index(self.create_filter_index(), name=self.filter_dim))
-
-    def create_filter_index(self) -> pd.Index:
-        """
-        Round cut off periods in days and append 'res' for residuum index.
-
-        Round small numbers (<10) to single decimal, and higher numbers to int. Transform as list of str and append
-        'res' for residuum index.
-        """
-        index = np.round(self.cutoff_period_days, 1)
-        f = lambda x: int(np.round(x)) if x >= 10 else np.round(x, 1)
-        index = list(map(f, index.tolist()))
-        index = list(map(lambda x: str(x) + "d", index)) + ["res"]
-        return pd.Index(index, name=self.filter_dim)
-
-    def get_transposed_history(self) -> xr.DataArray:
-        """Return history.
-
-        :return: history with dimensions datetime, window, Stations, variables, filter.
-        """
-        return self.history.transpose(self.time_dim, self.window_dim, self.iter_dim, self.target_dim,
-                                      self.filter_dim).copy()
-
-    def _create_lazy_data(self):
-        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
-
-    def _extract_lazy(self, lazy_data):
-        _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
-        f_prep = partial(self._slice_prep, start=self.start, end=self.end)
-        self._data, self.input_data, self.target_data = list(map(f_prep, [_data, _input_data, _target_data]))
-
-
-class DataHandlerKzFilter(DefaultDataHandler):
-    """Data handler using kz filtered data."""
-
-    data_handler = DataHandlerKzFilterSingleStation
-    data_handler_transformation = DataHandlerKzFilterSingleStation
-    _requirements = data_handler.requirements()
diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index a1036433..4c84866b 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -2,7 +2,7 @@ __author__ = 'Lukas Leufen'
 __date__ = '2020-11-05'
 
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
-from mlair.data_handler.data_handler_kz_filter import DataHandlerKzFilterSingleStation
+from mlair.data_handler.data_handler_with_filter import DataHandlerKzFilterSingleStation
 from mlair.data_handler import DefaultDataHandler
 from mlair import helpers
 from mlair.helpers import remove_items
diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
new file mode 100644
index 00000000..6a37a447
--- /dev/null
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -0,0 +1,258 @@
+"""Data Handler using kz-filtered data."""
+
+__author__ = 'Lukas Leufen'
+__date__ = '2020-08-26'
+
+import inspect
+import numpy as np
+import pandas as pd
+import xarray as xr
+from typing import List, Union, Tuple, Optional
+from functools import partial
+
+from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
+from mlair.data_handler import DefaultDataHandler
+from mlair.helpers import remove_items, to_list, TimeTrackingWrapper
+from mlair.helpers.filter import KolmogorovZurbenkoFilterMovingWindow as KZFilter
+from mlair.helpers.filter import FIRFilter
+
+# define a more general date type for type hinting
+str_or_list = Union[str, List[str]]
+
+
+# cutoff_p = [(None, 14), (8, 6), (2, 0.8), (0.8, None)]
+# cutoff = list(map(lambda x: (1. / x[0] if x[0] is not None else None, 1. / x[1] if x[1] is not None else None), cutoff_p))
+# fs = 24.
+# # order = int(60 * fs) + 1
+# order = np.array([int(14 * fs) + 1, int(14 * fs) + 1, int(4 * fs) + 1, int(2 * fs) + 1])
+# print("cutoff period", cutoff_p)
+# print("cutoff", cutoff)
+# print("fs", fs)
+# print("order", order)
+# print("delay", 0.5 * (order-1) / fs)
+# window = ("kaiser", 5)
+# # low pass
+# y, h = fir_filter(station_data.values.flatten(), fs, order[0], cutoff_low = cutoff[0][0], cutoff_high = cutoff[0][1], window=window)
+# filtered = xr.ones_like(station_data) * y.reshape(station_data.values.shape)
+
+class DataHandlerFirFilterSingleStation(DataHandlerSingleStation):
+    """Data handler for a single station to be used by a superior data handler. Inputs are FIR filtered."""
+
+    _requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
+    _hash = DataHandlerSingleStation._hash + ["filter_cutoff_period", "filter_order", "filter_window_type",
+                                              "filter_dim", "filter_add_unfiltered"]
+
+    DEFAULT_FILTER_DIM = "filter"
+    DEFAULT_WINDOW_TYPE = ("kaiser", 5)
+    DEFAULT_ADD_UNFILTERED = False
+
+    def __init__(self, *args, filter_cutoff_period, filter_order, filter_window_type=DEFAULT_WINDOW_TYPE,
+                 filter_dim=DEFAULT_FILTER_DIM, filter_add_unfiltered=DEFAULT_ADD_UNFILTERED, **kwargs):
+        # self._check_sampling(**kwargs)
+        # self.original_data = None  # ToDo: implement here something to store unfiltered data
+        self.filter_cutoff_period = (lambda x: [x] if isinstance(x, tuple) else to_list(x))(filter_cutoff_period)
+        self.filter_cutoff_freq = self._period_to_freq(self.filter_cutoff_period)
+        assert len(self.filter_cutoff_period) == len(filter_order)
+        self.filter_order = filter_order
+        self.filter_window_type = filter_window_type
+        self.filter_dim = filter_dim
+        self._add_unfiltered = filter_add_unfiltered
+        self.fs = self._get_fs(**kwargs)
+
+        super().__init__(*args, **kwargs)
+
+    @staticmethod
+    def _period_to_freq(cutoff_p):
+        return list(map(lambda x: (1. / x[0] if x[0] is not None else None, 1. / x[1] if x[1] is not None else None),
+                        cutoff_p))
+
+    def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
+        """
+        Adjust setup of transformation because kfz filtered data will have negative values which is not compatible with
+        the log transformation. Therefore, replace all log transformation methods by a default standardization. This is
+        only applied on input side.
+        """
+        transformation = super(__class__, self).setup_transformation(transformation)
+        if transformation[0] is not None:
+            for k, v in transformation[0].items():
+                if v["method"] == "log":
+                    transformation[0][k]["method"] = "standardise"
+        return transformation
+
+    @staticmethod
+    def _get_fs(**kwargs):
+        """Return frequency in 1/day (not Hz)"""
+        sampling = kwargs.get("sampling")
+        if sampling == "daily":
+            return 1
+        elif sampling == "hourly":
+            return 24
+        else:
+            raise ValueError(f"Unknown sampling rate {sampling}. Only daily and hourly resolution is supported.")
+
+    def _check_sampling(self, **kwargs):
+        assert kwargs.get("sampling") == "hourly"  # This data handler requires hourly data resolution, does it?
+
+    def make_input_target(self):
+        data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
+                                         self.station_type, self.network, self.store_data_locally, self.data_origin)
+        self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
+                                      limit=self.interpolation_limit)
+        self.set_inputs_and_targets()
+        self.apply_fir_filter()
+        # this is just a code snippet to check the results of the kz filter
+        # import matplotlib
+        # matplotlib.use("TkAgg")
+        # import matplotlib.pyplot as plt
+        # self.input_data.sel(filter="low", variables="temp", Stations="DEBW107").plot()
+        # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
+
+    @TimeTrackingWrapper
+    def apply_fir_filter(self):
+        """Apply FIR filter only on inputs."""
+        fir = FIRFilter(self.input_data, self.fs, self.filter_order, self.filter_cutoff_freq, self.filter_window_type,
+                        self.target_dim)
+        self.fir_coeff = fir.filter_coefficients()
+        fir_data = fir.filtered_data()
+        if self._add_unfiltered is True:
+            fir_data.append(self.input_data)
+        self.input_data = xr.concat(fir_data, pd.Index(self.create_filter_index(), name=self.filter_dim))
+
+    def create_filter_index(self) -> pd.Index:
+        """
+        Create name for filter dimension. Use 'high' or 'low' for high/low pass data and 'bandi' for band pass data with
+        increasing numerator i (starting from 1). If 1 low, 2 band, and 1 high pass filter is used the filter index will
+        become to ['low', 'band1', 'band2', 'high'].
+        """
+        index = []
+        band_num = 1
+        for (low, high) in self.filter_cutoff_period:
+            if low is None:
+                index.append("low")
+            elif high is None:
+                index.append("high")
+            else:
+                index.append(f"band{band_num}")
+                band_num += 1
+        if self._add_unfiltered:
+            index.append("unfiltered")
+        return pd.Index(index, name=self.filter_dim)
+
+    def get_transposed_history(self) -> xr.DataArray:
+        """Return history.
+
+        :return: history with dimensions datetime, window, Stations, variables, filter.
+        """
+        return self.history.transpose(self.time_dim, self.window_dim, self.iter_dim, self.target_dim,
+                                      self.filter_dim).copy()
+
+    def _create_lazy_data(self):
+        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
+
+    def _extract_lazy(self, lazy_data):
+        _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
+        f_prep = partial(self._slice_prep, start=self.start, end=self.end)
+        self._data, self.input_data, self.target_data = list(map(f_prep, [_data, _input_data, _target_data]))
+
+
+class DataHandlerFirFilter(DefaultDataHandler):
+    """Data handler using FIR filtered data."""
+
+    data_handler = DataHandlerFirFilterSingleStation
+    data_handler_transformation = DataHandlerFirFilterSingleStation
+    _requirements = data_handler.requirements()
+
+
+class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
+    """Data handler for a single station to be used by a superior data handler. Inputs are kz filtered."""
+
+    _requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
+    _hash = DataHandlerSingleStation._hash + ["kz_filter_length", "kz_filter_iter", "filter_dim"]
+
+    DEFAULT_FILTER_DIM = "filter"
+
+    def __init__(self, *args, kz_filter_length, kz_filter_iter, filter_dim=DEFAULT_FILTER_DIM, **kwargs):
+        self._check_sampling(**kwargs)
+        # self.original_data = None  # ToDo: implement here something to store unfiltered data
+        self.kz_filter_length = to_list(kz_filter_length)
+        self.kz_filter_iter = to_list(kz_filter_iter)
+        self.filter_dim = filter_dim
+        self.cutoff_period = None
+        self.cutoff_period_days = None
+        super().__init__(*args, **kwargs)
+
+    def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
+        """
+        Adjust setup of transformation because kfz filtered data will have negative values which is not compatible with
+        the log transformation. Therefore, replace all log transformation methods by a default standardization. This is
+        only applied on input side.
+        """
+        transformation = super(__class__, self).setup_transformation(transformation)
+        if transformation[0] is not None:
+            for k, v in transformation[0].items():
+                if v["method"] == "log":
+                    transformation[0][k]["method"] = "standardise"
+        return transformation
+
+    def _check_sampling(self, **kwargs):
+        assert kwargs.get("sampling") == "hourly"  # This data handler requires hourly data resolution
+
+    def make_input_target(self):
+        data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
+                                         self.station_type, self.network, self.store_data_locally, self.data_origin)
+        self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
+                                      limit=self.interpolation_limit)
+        self.set_inputs_and_targets()
+        self.apply_kz_filter()
+        # this is just a code snippet to check the results of the kz filter
+        # import matplotlib
+        # matplotlib.use("TkAgg")
+        # import matplotlib.pyplot as plt
+        # self.input_data.sel(filter="74d", variables="temp", Stations="DEBW107").plot()
+        # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
+
+    @TimeTrackingWrapper
+    def apply_kz_filter(self):
+        """Apply kolmogorov zurbenko filter only on inputs."""
+        kz = KZFilter(self.input_data, wl=self.kz_filter_length, itr=self.kz_filter_iter, filter_dim=self.time_dim)
+        filtered_data: List[xr.DataArray] = kz.run()
+        self.cutoff_period = kz.period_null()
+        self.cutoff_period_days = kz.period_null_days()
+        self.input_data = xr.concat(filtered_data, pd.Index(self.create_filter_index(), name=self.filter_dim))
+
+    def create_filter_index(self) -> pd.Index:
+        """
+        Round cut off periods in days and append 'res' for residuum index.
+
+        Round small numbers (<10) to single decimal, and higher numbers to int. Transform as list of str and append
+        'res' for residuum index.
+        """
+        index = np.round(self.cutoff_period_days, 1)
+        f = lambda x: int(np.round(x)) if x >= 10 else np.round(x, 1)
+        index = list(map(f, index.tolist()))
+        index = list(map(lambda x: str(x) + "d", index)) + ["res"]
+        return pd.Index(index, name=self.filter_dim)
+
+    def get_transposed_history(self) -> xr.DataArray:
+        """Return history.
+
+        :return: history with dimensions datetime, window, Stations, variables, filter.
+        """
+        return self.history.transpose(self.time_dim, self.window_dim, self.iter_dim, self.target_dim,
+                                      self.filter_dim).copy()
+
+    def _create_lazy_data(self):
+        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
+
+    def _extract_lazy(self, lazy_data):
+        _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
+        f_prep = partial(self._slice_prep, start=self.start, end=self.end)
+        self._data, self.input_data, self.target_data = list(map(f_prep, [_data, _input_data, _target_data]))
+
+
+class DataHandlerKzFilter(DefaultDataHandler):
+    """Data handler using kz filtered data."""
+
+    data_handler = DataHandlerKzFilterSingleStation
+    data_handler_transformation = DataHandlerKzFilterSingleStation
+    _requirements = data_handler.requirements()
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
new file mode 100644
index 00000000..ad2fd12d
--- /dev/null
+++ b/mlair/helpers/filter.py
@@ -0,0 +1,284 @@
+import gc
+import warnings
+from typing import Union
+
+import numpy as np
+from matplotlib import pyplot as plt
+from scipy import signal
+import xarray as xr
+
+from mlair.helpers import to_list, TimeTrackingWrapper
+
+
+class FIRFilter:
+
+    def __init__(self, data, fs, order, cutoff, window, dim):
+
+        filtered = []
+        h = []
+        for i in range(len(order)):
+            fi, hi = self.apply_fir_filter(data, fs, order[i], cutoff_low=cutoff[i][0], cutoff_high=cutoff[i][1],
+                                           window=window, dim=dim)
+            filtered.append(fi)
+            h.append(hi)
+
+        self._filtered = filtered
+        self._h = h
+
+    def filter_coefficients(self):
+        return self._h
+
+    def filtered_data(self):
+        return self._filtered
+        #
+        # y, h = fir_filter(station_data.values.flatten(), fs, order[0], cutoff_low=cutoff[0][0], cutoff_high=cutoff[0][1],
+        #                   window=window)
+        # filtered = xr.ones_like(station_data) * y.reshape(station_data.values.shape)
+        # # band pass
+        # y_band, h_band = fir_filter(station_data.values.flatten(), fs, order[1], cutoff_low=cutoff[1][0],
+        #                             cutoff_high=cutoff[1][1], window=window)
+        # filtered_band = xr.ones_like(station_data) * y_band.reshape(station_data.values.shape)
+        # # band pass 2
+        # y_band_2, h_band_2 = fir_filter(station_data.values.flatten(), fs, order[2], cutoff_low=cutoff[2][0],
+        #                                 cutoff_high=cutoff[2][1], window=window)
+        # filtered_band_2 = xr.ones_like(station_data) * y_band_2.reshape(station_data.values.shape)
+        # # high pass
+        # y_high, h_high = fir_filter(station_data.values.flatten(), fs, order[3], cutoff_low=cutoff[3][0],
+        #                             cutoff_high=cutoff[3][1], window=window)
+        # filtered_high = xr.ones_like(station_data) * y_high.reshape(station_data.values.shape)
+
+    def apply_fir_filter(self, data, fs, order=5, cutoff_low=None, cutoff_high=None, window="hamming", dim="variables"):
+
+        # create fir filter coeffs
+        cutoff = []
+        if cutoff_low is not None:
+            cutoff += [cutoff_low]
+        if cutoff_high is not None:
+            cutoff += [cutoff_high]
+        if len(cutoff) == 2:
+            filter_type = "bandpass"
+        elif len(cutoff) == 1 and cutoff_low is not None:
+            filter_type = "highpass"
+        elif len(cutoff) == 1 and cutoff_high is not None:
+            filter_type = "lowpass"
+        else:
+            raise ValueError("Please provide either cutoff_low or cutoff_high.")
+        h = signal.firwin(order, cutoff, pass_zero=filter_type, fs=fs, window=window)
+
+        # filter data
+        filtered = xr.ones_like(data)
+        for var in data.coords[dim]:
+            d = data.sel({dim: var}).values.flatten()
+            y = signal.lfilter(h, 1., d)
+            filtered.loc[{dim: var}] = y
+        return filtered, h
+
+
+class KolmogorovZurbenkoBaseClass:
+
+    def __init__(self, df, wl, itr, is_child=False, filter_dim="window"):
+        """
+        It create the variables associate with the Kolmogorov-Zurbenko-filter.
+
+        Args:
+            df(pd.DataFrame, None): time series of a variable
+            wl(list of int): window length
+            itr(list of int): number of iteration
+        """
+        self.df = df
+        self.filter_dim = filter_dim
+        self.wl = to_list(wl)
+        self.itr = to_list(itr)
+        if abs(len(self.wl) - len(self.itr)) > 0:
+            raise ValueError("Length of lists for wl and itr must agree!")
+        self._isChild = is_child
+        self.child = self.set_child()
+        self.type = type(self).__name__
+
+    def set_child(self):
+        if len(self.wl) > 1:
+            return KolmogorovZurbenkoBaseClass(None, self.wl[1:], self.itr[1:], True, self.filter_dim)
+        else:
+            return None
+
+    def kz_filter(self, df, m, k):
+        pass
+
+    def spectral_calc(self):
+        df_start = self.df
+        kz = self.kz_filter(df_start, self.wl[0], self.itr[0])
+        filtered = self.subtract(df_start, kz)
+        # case I: no child avail -> return kz and remaining
+        if self.child is None:
+            return [kz, filtered]
+        # case II: has child -> return current kz and all child results
+        else:
+            self.child.df = filtered
+            kz_next = self.child.spectral_calc()
+            return [kz] + kz_next
+
+    @staticmethod
+    def subtract(minuend, subtrahend):
+        try:  # pandas implementation
+            return minuend.sub(subtrahend, axis=0)
+        except AttributeError:  # general implementation
+            return minuend - subtrahend
+
+    def run(self):
+        return self.spectral_calc()
+
+    def transfer_function(self):
+        m = self.wl[0]
+        k = self.itr[0]
+        omega = np.linspace(0.00001, 0.15, 5000)
+        return omega, (np.sin(m * np.pi * omega) / (m * np.sin(np.pi * omega))) ** (2 * k)
+
+    def omega_null(self, alpha=0.5):
+        a = np.sqrt(6) / np.pi
+        b = 1 / (2 * np.array(self.itr))
+        c = 1 - alpha ** b
+        d = np.array(self.wl) ** 2 - alpha ** b
+        return a * np.sqrt(c / d)
+
+    def period_null(self, alpha=0.5):
+        return 1. / self.omega_null(alpha)
+
+    def period_null_days(self, alpha=0.5):
+        return self.period_null(alpha) / 24.
+
+    def plot_transfer_function(self, fig=None, name=None):
+        if fig is None:
+            fig = plt.figure()
+        omega, transfer_function = self.transfer_function()
+        if self.child is not None:
+            transfer_function_child = self.child.plot_transfer_function(fig)
+        else:
+            transfer_function_child = transfer_function * 0
+        plt.semilogx(omega, transfer_function - transfer_function_child,
+                     label="m={:3.0f}, k={:3.0f}, T={:6.2f}d".format(self.wl[0],
+                                                                     self.itr[0],
+                                                                     self.period_null_days()))
+        plt.axvline(x=self.omega_null())
+        if not self._isChild:
+            locs, labels = plt.xticks()
+            plt.xticks(locs, np.round(1. / (locs * 24), 1))
+            plt.xlim([0.00001, 0.15])
+            plt.legend()
+            if name is None:
+                plt.show()
+            else:
+                plt.savefig(name)
+        else:
+            return transfer_function
+
+
+class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
+
+    def __init__(self, df, wl: Union[list, int], itr: Union[list, int], is_child=False, filter_dim="window",
+                 method="mean", percentile=0.5):
+        """
+        It create the variables associate with the KolmogorovZurbenkoFilterMovingWindow class.
+
+        Args:
+            df(pd.DataFrame, xr.DataArray): time series of a variable
+            wl: window length
+            itr: number of iteration
+        """
+        self.valid_methods = ["mean", "percentile", "median", "max", "min"]
+        if method not in self.valid_methods:
+            raise ValueError("Method '{}' is not supported. Please select from [{}].".format(
+                method, ", ".join(self.valid_methods)))
+        else:
+            self.method = method
+            if percentile > 1 or percentile < 0:
+                raise ValueError("Percentile must be in range [0, 1]. Given was {}!".format(percentile))
+            else:
+                self.percentile = percentile
+        super().__init__(df, wl, itr, is_child, filter_dim)
+
+    def set_child(self):
+        if len(self.wl) > 1:
+            return KolmogorovZurbenkoFilterMovingWindow(self.df, self.wl[1:], self.itr[1:], is_child=True,
+                                                        filter_dim=self.filter_dim, method=self.method,
+                                                        percentile=self.percentile)
+        else:
+            return None
+
+    @TimeTrackingWrapper
+    def kz_filter_new(self, df, wl, itr):
+        """
+        It passes the low frequency time series.
+
+        If filter method is from mean, max, min this method will call construct and rechunk before the actual
+        calculation to improve performance. If filter method is either median or percentile this approach is not
+        applicable and depending on the data and window size, this method can become slow.
+
+        Args:
+             wl(int): a window length
+             itr(int): a number of iteration
+        """
+        warnings.filterwarnings("ignore")
+        df_itr = df.__deepcopy__()
+        try:
+            kwargs = {"min_periods": int(0.7 * wl),
+                      "center": True,
+                      self.filter_dim: wl}
+            for i in np.arange(0, itr):
+                print(i)
+                rolling = df_itr.chunk().rolling(**kwargs)
+                if self.method not in ["percentile", "median"]:
+                    rolling = rolling.construct("construct").chunk("auto")
+                if self.method == "median":
+                    df_mv_avg_tmp = rolling.median()
+                elif self.method == "percentile":
+                    df_mv_avg_tmp = rolling.quantile(self.percentile)
+                elif self.method == "max":
+                    df_mv_avg_tmp = rolling.max("construct")
+                elif self.method == "min":
+                    df_mv_avg_tmp = rolling.min("construct")
+                else:
+                    df_mv_avg_tmp = rolling.mean("construct")
+                df_itr = df_mv_avg_tmp.compute()
+                del df_mv_avg_tmp, rolling
+                gc.collect()
+            return df_itr
+        except ValueError:
+            raise ValueError
+
+    @TimeTrackingWrapper
+    def kz_filter(self, df, wl, itr):
+        """
+        It passes the low frequency time series.
+
+        Args:
+             wl(int): a window length
+             itr(int): a number of iteration
+        """
+        import warnings
+        warnings.filterwarnings("ignore")
+        df_itr = df.__deepcopy__()
+        try:
+            kwargs = {"min_periods": int(0.7 * wl),
+                      "center": True,
+                      self.filter_dim: wl}
+            iter_vars = df_itr.coords["variables"].values
+            for var in iter_vars:
+                df_itr_var = df_itr.sel(variables=[var])
+                for _ in np.arange(0, itr):
+                    df_itr_var = df_itr_var.chunk()
+                    rolling = df_itr_var.rolling(**kwargs)
+                    if self.method == "median":
+                        df_mv_avg_tmp = rolling.median()
+                    elif self.method == "percentile":
+                        df_mv_avg_tmp = rolling.quantile(self.percentile)
+                    elif self.method == "max":
+                        df_mv_avg_tmp = rolling.max()
+                    elif self.method == "min":
+                        df_mv_avg_tmp = rolling.min()
+                    else:
+                        df_mv_avg_tmp = rolling.mean()
+                    df_itr_var = df_mv_avg_tmp.compute()
+                df_itr.loc[{"variables": [var]}] = df_itr_var
+            return df_itr
+        except ValueError:
+            raise ValueError
diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index 30391998..0ee95098 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -9,12 +9,7 @@ import numpy as np
 import xarray as xr
 import pandas as pd
 from typing import Union, Tuple, Dict, List
-from matplotlib import pyplot as plt
 import itertools
-import gc
-import warnings
-
-from mlair.helpers import to_list, TimeTracking, TimeTrackingWrapper
 
 Data = Union[xr.DataArray, pd.DataFrame]
 
@@ -483,212 +478,3 @@ class SkillScores:
 
         return monthly_mean
 
-
-class KolmogorovZurbenkoBaseClass:
-
-    def __init__(self, df, wl, itr, is_child=False, filter_dim="window"):
-        """
-        It create the variables associate with the Kolmogorov-Zurbenko-filter.
-
-        Args:
-            df(pd.DataFrame, None): time series of a variable
-            wl(list of int): window length
-            itr(list of int): number of iteration
-        """
-        self.df = df
-        self.filter_dim = filter_dim
-        self.wl = to_list(wl)
-        self.itr = to_list(itr)
-        if abs(len(self.wl) - len(self.itr)) > 0:
-            raise ValueError("Length of lists for wl and itr must agree!")
-        self._isChild = is_child
-        self.child = self.set_child()
-        self.type = type(self).__name__
-
-    def set_child(self):
-        if len(self.wl) > 1:
-            return KolmogorovZurbenkoBaseClass(None, self.wl[1:], self.itr[1:], True, self.filter_dim)
-        else:
-            return None
-
-    def kz_filter(self, df, m, k):
-        pass
-
-    def spectral_calc(self):
-        df_start = self.df
-        kz = self.kz_filter(df_start, self.wl[0], self.itr[0])
-        filtered = self.subtract(df_start, kz)
-        # case I: no child avail -> return kz and remaining
-        if self.child is None:
-            return [kz, filtered]
-        # case II: has child -> return current kz and all child results
-        else:
-            self.child.df = filtered
-            kz_next = self.child.spectral_calc()
-            return [kz] + kz_next
-
-    @staticmethod
-    def subtract(minuend, subtrahend):
-        try:  # pandas implementation
-            return minuend.sub(subtrahend, axis=0)
-        except AttributeError:  # general implementation
-            return minuend - subtrahend
-
-    def run(self):
-        return self.spectral_calc()
-
-    def transfer_function(self):
-        m = self.wl[0]
-        k = self.itr[0]
-        omega = np.linspace(0.00001, 0.15, 5000)
-        return omega, (np.sin(m * np.pi * omega) / (m * np.sin(np.pi * omega))) ** (2 * k)
-
-    def omega_null(self, alpha=0.5):
-        a = np.sqrt(6) / np.pi
-        b = 1 / (2 * np.array(self.itr))
-        c = 1 - alpha ** b
-        d = np.array(self.wl) ** 2 - alpha ** b
-        return a * np.sqrt(c / d)
-
-    def period_null(self, alpha=0.5):
-        return 1. / self.omega_null(alpha)
-
-    def period_null_days(self, alpha=0.5):
-        return self.period_null(alpha) / 24.
-
-    def plot_transfer_function(self, fig=None, name=None):
-        if fig is None:
-            fig = plt.figure()
-        omega, transfer_function = self.transfer_function()
-        if self.child is not None:
-            transfer_function_child = self.child.plot_transfer_function(fig)
-        else:
-            transfer_function_child = transfer_function * 0
-        plt.semilogx(omega, transfer_function - transfer_function_child,
-                     label="m={:3.0f}, k={:3.0f}, T={:6.2f}d".format(self.wl[0],
-                                                                     self.itr[0],
-                                                                     self.period_null_days()))
-        plt.axvline(x=self.omega_null())
-        if not self._isChild:
-            locs, labels = plt.xticks()
-            plt.xticks(locs, np.round(1. / (locs * 24), 1))
-            plt.xlim([0.00001, 0.15])
-            plt.legend()
-            if name is None:
-                plt.show()
-            else:
-                plt.savefig(name)
-        else:
-            return transfer_function
-
-
-class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
-
-    def __init__(self, df, wl: Union[list, int], itr: Union[list, int], is_child=False, filter_dim="window",
-                 method="mean", percentile=0.5):
-        """
-        It create the variables associate with the KolmogorovZurbenkoFilterMovingWindow class.
-
-        Args:
-            df(pd.DataFrame, xr.DataArray): time series of a variable
-            wl: window length
-            itr: number of iteration
-        """
-        self.valid_methods = ["mean", "percentile", "median", "max", "min"]
-        if method not in self.valid_methods:
-            raise ValueError("Method '{}' is not supported. Please select from [{}].".format(
-                method, ", ".join(self.valid_methods)))
-        else:
-            self.method = method
-            if percentile > 1 or percentile < 0:
-                raise ValueError("Percentile must be in range [0, 1]. Given was {}!".format(percentile))
-            else:
-                self.percentile = percentile
-        super().__init__(df, wl, itr, is_child, filter_dim)
-
-    def set_child(self):
-        if len(self.wl) > 1:
-            return KolmogorovZurbenkoFilterMovingWindow(self.df, self.wl[1:], self.itr[1:], is_child=True,
-                                                        filter_dim=self.filter_dim, method=self.method,
-                                                        percentile=self.percentile)
-        else:
-            return None
-
-    @TimeTrackingWrapper
-    def kz_filter_new(self, df, wl, itr):
-        """
-        It passes the low frequency time series.
-
-        If filter method is from mean, max, min this method will call construct and rechunk before the actual
-        calculation to improve performance. If filter method is either median or percentile this approach is not
-        applicable and depending on the data and window size, this method can become slow.
-
-        Args:
-             wl(int): a window length
-             itr(int): a number of iteration
-        """
-        warnings.filterwarnings("ignore")
-        df_itr = df.__deepcopy__()
-        try:
-            kwargs = {"min_periods": int(0.7 * wl),
-                      "center": True,
-                      self.filter_dim: wl}
-            for i in np.arange(0, itr):
-                print(i)
-                rolling = df_itr.chunk().rolling(**kwargs)
-                if self.method not in ["percentile", "median"]:
-                    rolling = rolling.construct("construct").chunk("auto")
-                if self.method == "median":
-                    df_mv_avg_tmp = rolling.median()
-                elif self.method == "percentile":
-                    df_mv_avg_tmp = rolling.quantile(self.percentile)
-                elif self.method == "max":
-                    df_mv_avg_tmp = rolling.max("construct")
-                elif self.method == "min":
-                    df_mv_avg_tmp = rolling.min("construct")
-                else:
-                    df_mv_avg_tmp = rolling.mean("construct")
-                df_itr = df_mv_avg_tmp.compute()
-                del df_mv_avg_tmp, rolling
-                gc.collect()
-            return df_itr
-        except ValueError:
-            raise ValueError
-
-    @TimeTrackingWrapper
-    def kz_filter(self, df, wl, itr):
-        """
-        It passes the low frequency time series.
-
-        Args:
-             wl(int): a window length
-             itr(int): a number of iteration
-        """
-        import warnings
-        warnings.filterwarnings("ignore")
-        df_itr = df.__deepcopy__()
-        try:
-            kwargs = {"min_periods": int(0.7 * wl),
-                      "center": True,
-                      self.filter_dim: wl}
-            iter_vars = df_itr.coords["variables"].values
-            for var in iter_vars:
-                df_itr_var = df_itr.sel(variables=[var])
-                for _ in np.arange(0, itr):
-                    df_itr_var = df_itr_var.chunk()
-                    rolling = df_itr_var.rolling(**kwargs)
-                    if self.method == "median":
-                        df_mv_avg_tmp = rolling.median()
-                    elif self.method == "percentile":
-                        df_mv_avg_tmp = rolling.quantile(self.percentile)
-                    elif self.method == "max":
-                        df_mv_avg_tmp = rolling.max()
-                    elif self.method == "min":
-                        df_mv_avg_tmp = rolling.min()
-                    else:
-                        df_mv_avg_tmp = rolling.mean()
-                    df_itr_var = df_mv_avg_tmp.compute()
-                df_itr.loc[{"variables": [var]}] = df_itr_var
-            return df_itr
-        except ValueError:
-            raise ValueError
diff --git a/test/test_data_handler/test_data_handler_mixed_sampling.py b/test/test_data_handler/test_data_handler_mixed_sampling.py
index 2a6553b7..19899a77 100644
--- a/test/test_data_handler/test_data_handler_mixed_sampling.py
+++ b/test/test_data_handler/test_data_handler_mixed_sampling.py
@@ -5,7 +5,7 @@ from mlair.data_handler.data_handler_mixed_sampling import DataHandlerMixedSampl
     DataHandlerMixedSamplingSingleStation, DataHandlerMixedSamplingWithFilter, \
     DataHandlerMixedSamplingWithFilterSingleStation, DataHandlerSeparationOfScales, \
     DataHandlerSeparationOfScalesSingleStation
-from mlair.data_handler.data_handler_kz_filter import DataHandlerKzFilterSingleStation
+from mlair.data_handler.data_handler_with_filter import DataHandlerKzFilterSingleStation
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
 from mlair.helpers import remove_items
 from mlair.configuration.defaults import DEFAULT_INTERPOLATION_METHOD
-- 
GitLab


From b0637556bf06b1c3629413f22408735a332d137e Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 28 Apr 2021 11:58:56 +0200
Subject: [PATCH 094/175] datahandler kz and fir now inherit from a general
 filter data handler

---
 .../data_handler/data_handler_with_filter.py  | 198 ++++++++----------
 1 file changed, 91 insertions(+), 107 deletions(-)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 6a37a447..0757e528 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -35,40 +35,24 @@ str_or_list = Union[str, List[str]]
 # y, h = fir_filter(station_data.values.flatten(), fs, order[0], cutoff_low = cutoff[0][0], cutoff_high = cutoff[0][1], window=window)
 # filtered = xr.ones_like(station_data) * y.reshape(station_data.values.shape)
 
-class DataHandlerFirFilterSingleStation(DataHandlerSingleStation):
-    """Data handler for a single station to be used by a superior data handler. Inputs are FIR filtered."""
 
-    _requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
-    _hash = DataHandlerSingleStation._hash + ["filter_cutoff_period", "filter_order", "filter_window_type",
-                                              "filter_dim", "filter_add_unfiltered"]
+class DataHandlerFilterSingleStation(DataHandlerSingleStation):
+    """General data handler for a single station to be used by a superior data handler."""
+
+    # _requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
+    _requirements = DataHandlerSingleStation.requirements()
+    _hash = DataHandlerSingleStation._hash + ["filter_dim"]
 
     DEFAULT_FILTER_DIM = "filter"
-    DEFAULT_WINDOW_TYPE = ("kaiser", 5)
-    DEFAULT_ADD_UNFILTERED = False
 
-    def __init__(self, *args, filter_cutoff_period, filter_order, filter_window_type=DEFAULT_WINDOW_TYPE,
-                 filter_dim=DEFAULT_FILTER_DIM, filter_add_unfiltered=DEFAULT_ADD_UNFILTERED, **kwargs):
-        # self._check_sampling(**kwargs)
+    def __init__(self, *args, filter_dim=DEFAULT_FILTER_DIM, **kwargs):
         # self.original_data = None  # ToDo: implement here something to store unfiltered data
-        self.filter_cutoff_period = (lambda x: [x] if isinstance(x, tuple) else to_list(x))(filter_cutoff_period)
-        self.filter_cutoff_freq = self._period_to_freq(self.filter_cutoff_period)
-        assert len(self.filter_cutoff_period) == len(filter_order)
-        self.filter_order = filter_order
-        self.filter_window_type = filter_window_type
         self.filter_dim = filter_dim
-        self._add_unfiltered = filter_add_unfiltered
-        self.fs = self._get_fs(**kwargs)
-
         super().__init__(*args, **kwargs)
 
-    @staticmethod
-    def _period_to_freq(cutoff_p):
-        return list(map(lambda x: (1. / x[0] if x[0] is not None else None, 1. / x[1] if x[1] is not None else None),
-                        cutoff_p))
-
     def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
         """
-        Adjust setup of transformation because kfz filtered data will have negative values which is not compatible with
+        Adjust setup of transformation because filtered data will have negative values which is not compatible with
         the log transformation. Therefore, replace all log transformation methods by a default standardization. This is
         only applied on input side.
         """
@@ -79,17 +63,6 @@ class DataHandlerFirFilterSingleStation(DataHandlerSingleStation):
                     transformation[0][k]["method"] = "standardise"
         return transformation
 
-    @staticmethod
-    def _get_fs(**kwargs):
-        """Return frequency in 1/day (not Hz)"""
-        sampling = kwargs.get("sampling")
-        if sampling == "daily":
-            return 1
-        elif sampling == "hourly":
-            return 24
-        else:
-            raise ValueError(f"Unknown sampling rate {sampling}. Only daily and hourly resolution is supported.")
-
     def _check_sampling(self, **kwargs):
         assert kwargs.get("sampling") == "hourly"  # This data handler requires hourly data resolution, does it?
 
@@ -99,7 +72,7 @@ class DataHandlerFirFilterSingleStation(DataHandlerSingleStation):
         self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
                                       limit=self.interpolation_limit)
         self.set_inputs_and_targets()
-        self.apply_fir_filter()
+        self.apply_filter()
         # this is just a code snippet to check the results of the kz filter
         # import matplotlib
         # matplotlib.use("TkAgg")
@@ -107,8 +80,72 @@ class DataHandlerFirFilterSingleStation(DataHandlerSingleStation):
         # self.input_data.sel(filter="low", variables="temp", Stations="DEBW107").plot()
         # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
 
+    def apply_filter(self):
+        raise NotImplementedError
+
+    def create_filter_index(self) -> pd.Index:
+        """Create name for filter dimension."""
+        raise NotImplementedError
+
+    def get_transposed_history(self) -> xr.DataArray:
+        """Return history.
+
+        :return: history with dimensions datetime, window, Stations, variables, filter.
+        """
+        return self.history.transpose(self.time_dim, self.window_dim, self.iter_dim, self.target_dim,
+                                      self.filter_dim).copy()
+
+    def _create_lazy_data(self):
+        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
+
+    def _extract_lazy(self, lazy_data):
+        _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
+        f_prep = partial(self._slice_prep, start=self.start, end=self.end)
+        self._data, self.input_data, self.target_data = list(map(f_prep, [_data, _input_data, _target_data]))
+
+
+class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
+    """Data handler for a single station to be used by a superior data handler. Inputs are FIR filtered."""
+
+    # _requirements = remove_items(inspect.getfullargspec(DataHandlerFilterSingleStation).args, ["self", "station"])
+    _requirements = DataHandlerFilterSingleStation.requirements()
+    _hash = DataHandlerFilterSingleStation._hash + ["filter_cutoff_period", "filter_order", "filter_window_type",
+                                                    "filter_add_unfiltered"]
+
+    DEFAULT_WINDOW_TYPE = ("kaiser", 5)
+    DEFAULT_ADD_UNFILTERED = False
+
+    def __init__(self, *args, filter_cutoff_period, filter_order, filter_window_type=DEFAULT_WINDOW_TYPE,
+                 filter_add_unfiltered=DEFAULT_ADD_UNFILTERED, **kwargs):
+        # self._check_sampling(**kwargs)
+        # self.original_data = None  # ToDo: implement here something to store unfiltered data
+        self.filter_cutoff_period = (lambda x: [x] if isinstance(x, tuple) else to_list(x))(filter_cutoff_period)
+        self.filter_cutoff_freq = self._period_to_freq(self.filter_cutoff_period)
+        assert len(self.filter_cutoff_period) == len(filter_order)
+        self.filter_order = filter_order
+        self.filter_window_type = filter_window_type
+        self._add_unfiltered = filter_add_unfiltered
+        self.fs = self._get_fs(**kwargs)
+        super().__init__(*args, **kwargs)
+
+    @staticmethod
+    def _period_to_freq(cutoff_p):
+        return list(map(lambda x: (1. / x[0] if x[0] is not None else None, 1. / x[1] if x[1] is not None else None),
+                        cutoff_p))
+
+    @staticmethod
+    def _get_fs(**kwargs):
+        """Return frequency in 1/day (not Hz)"""
+        sampling = kwargs.get("sampling")
+        if sampling == "daily":
+            return 1
+        elif sampling == "hourly":
+            return 24
+        else:
+            raise ValueError(f"Unknown sampling rate {sampling}. Only daily and hourly resolution is supported.")
+
     @TimeTrackingWrapper
-    def apply_fir_filter(self):
+    def apply_filter(self):
         """Apply FIR filter only on inputs."""
         fir = FIRFilter(self.input_data, self.fs, self.filter_order, self.filter_cutoff_freq, self.filter_window_type,
                         self.target_dim)
@@ -117,6 +154,12 @@ class DataHandlerFirFilterSingleStation(DataHandlerSingleStation):
         if self._add_unfiltered is True:
             fir_data.append(self.input_data)
         self.input_data = xr.concat(fir_data, pd.Index(self.create_filter_index(), name=self.filter_dim))
+        # this is just a code snippet to check the results of the kz filter
+        # import matplotlib
+        # matplotlib.use("TkAgg")
+        # import matplotlib.pyplot as plt
+        # self.input_data.sel(filter="low", variables="temp", Stations="DEBW107").plot()
+        # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
 
     def create_filter_index(self) -> pd.Index:
         """
@@ -138,22 +181,6 @@ class DataHandlerFirFilterSingleStation(DataHandlerSingleStation):
             index.append("unfiltered")
         return pd.Index(index, name=self.filter_dim)
 
-    def get_transposed_history(self) -> xr.DataArray:
-        """Return history.
-
-        :return: history with dimensions datetime, window, Stations, variables, filter.
-        """
-        return self.history.transpose(self.time_dim, self.window_dim, self.iter_dim, self.target_dim,
-                                      self.filter_dim).copy()
-
-    def _create_lazy_data(self):
-        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
-
-    def _extract_lazy(self, lazy_data):
-        _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
-        f_prep = partial(self._slice_prep, start=self.start, end=self.end)
-        self._data, self.input_data, self.target_data = list(map(f_prep, [_data, _input_data, _target_data]))
-
 
 class DataHandlerFirFilter(DefaultDataHandler):
     """Data handler using FIR filtered data."""
@@ -163,62 +190,35 @@ class DataHandlerFirFilter(DefaultDataHandler):
     _requirements = data_handler.requirements()
 
 
-class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
+class DataHandlerKzFilterSingleStation(DataHandlerFilterSingleStation):
     """Data handler for a single station to be used by a superior data handler. Inputs are kz filtered."""
 
-    _requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
-    _hash = DataHandlerSingleStation._hash + ["kz_filter_length", "kz_filter_iter", "filter_dim"]
+    _requirements = remove_items(inspect.getfullargspec(DataHandlerFilterSingleStation).args, ["self", "station"])
+    _hash = DataHandlerFilterSingleStation._hash + ["kz_filter_length", "kz_filter_iter"]
 
-    DEFAULT_FILTER_DIM = "filter"
-
-    def __init__(self, *args, kz_filter_length, kz_filter_iter, filter_dim=DEFAULT_FILTER_DIM, **kwargs):
+    def __init__(self, *args, kz_filter_length, kz_filter_iter, **kwargs):
         self._check_sampling(**kwargs)
         # self.original_data = None  # ToDo: implement here something to store unfiltered data
         self.kz_filter_length = to_list(kz_filter_length)
         self.kz_filter_iter = to_list(kz_filter_iter)
-        self.filter_dim = filter_dim
         self.cutoff_period = None
         self.cutoff_period_days = None
         super().__init__(*args, **kwargs)
 
-    def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
-        """
-        Adjust setup of transformation because kfz filtered data will have negative values which is not compatible with
-        the log transformation. Therefore, replace all log transformation methods by a default standardization. This is
-        only applied on input side.
-        """
-        transformation = super(__class__, self).setup_transformation(transformation)
-        if transformation[0] is not None:
-            for k, v in transformation[0].items():
-                if v["method"] == "log":
-                    transformation[0][k]["method"] = "standardise"
-        return transformation
-
-    def _check_sampling(self, **kwargs):
-        assert kwargs.get("sampling") == "hourly"  # This data handler requires hourly data resolution
-
-    def make_input_target(self):
-        data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
-                                         self.station_type, self.network, self.store_data_locally, self.data_origin)
-        self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
-                                      limit=self.interpolation_limit)
-        self.set_inputs_and_targets()
-        self.apply_kz_filter()
-        # this is just a code snippet to check the results of the kz filter
-        # import matplotlib
-        # matplotlib.use("TkAgg")
-        # import matplotlib.pyplot as plt
-        # self.input_data.sel(filter="74d", variables="temp", Stations="DEBW107").plot()
-        # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
-
     @TimeTrackingWrapper
-    def apply_kz_filter(self):
+    def apply_filter(self):
         """Apply kolmogorov zurbenko filter only on inputs."""
         kz = KZFilter(self.input_data, wl=self.kz_filter_length, itr=self.kz_filter_iter, filter_dim=self.time_dim)
         filtered_data: List[xr.DataArray] = kz.run()
         self.cutoff_period = kz.period_null()
         self.cutoff_period_days = kz.period_null_days()
         self.input_data = xr.concat(filtered_data, pd.Index(self.create_filter_index(), name=self.filter_dim))
+        # this is just a code snippet to check the results of the kz filter
+        # import matplotlib
+        # matplotlib.use("TkAgg")
+        # import matplotlib.pyplot as plt
+        # self.input_data.sel(filter="74d", variables="temp", Stations="DEBW107").plot()
+        # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
 
     def create_filter_index(self) -> pd.Index:
         """
@@ -233,22 +233,6 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
         index = list(map(lambda x: str(x) + "d", index)) + ["res"]
         return pd.Index(index, name=self.filter_dim)
 
-    def get_transposed_history(self) -> xr.DataArray:
-        """Return history.
-
-        :return: history with dimensions datetime, window, Stations, variables, filter.
-        """
-        return self.history.transpose(self.time_dim, self.window_dim, self.iter_dim, self.target_dim,
-                                      self.filter_dim).copy()
-
-    def _create_lazy_data(self):
-        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
-
-    def _extract_lazy(self, lazy_data):
-        _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
-        f_prep = partial(self._slice_prep, start=self.start, end=self.end)
-        self._data, self.input_data, self.target_data = list(map(f_prep, [_data, _input_data, _target_data]))
-
 
 class DataHandlerKzFilter(DefaultDataHandler):
     """Data handler using kz filtered data."""
-- 
GitLab


From ec34a136d2c4cd0dfc79001f82bbce59ddd30b10 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 28 Apr 2021 12:38:29 +0200
Subject: [PATCH 095/175] new class
 DataHandlerMixedSamplingWithFilterSingleStation that bundles common methods
 of the kz and fir filter when used as mixed sampling

---
 .../data_handler_mixed_sampling.py            | 65 ++++++++++++++-----
 .../data_handler/data_handler_with_filter.py  | 18 ++++-
 .../test_data_handler_mixed_sampling.py       | 16 ++---
 3 files changed, 72 insertions(+), 27 deletions(-)

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 4c84866b..71f9fe73 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -2,7 +2,8 @@ __author__ = 'Lukas Leufen'
 __date__ = '2020-11-05'
 
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
-from mlair.data_handler.data_handler_with_filter import DataHandlerKzFilterSingleStation
+from mlair.data_handler.data_handler_with_filter import DataHandlerKzFilterSingleStation, \
+    DataHandlerFirFilterSingleStation, DataHandlerFilterSingleStation
 from mlair.data_handler import DefaultDataHandler
 from mlair import helpers
 from mlair.helpers import remove_items
@@ -94,8 +95,8 @@ class DataHandlerMixedSampling(DefaultDataHandler):
 
 
 class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSingleStation,
-                                                      DataHandlerKzFilterSingleStation):
-    _requirements1 = DataHandlerKzFilterSingleStation.requirements()
+                                                      DataHandlerFilterSingleStation):
+    _requirements1 = DataHandlerFilterSingleStation.requirements()
     _requirements2 = DataHandlerMixedSamplingSingleStation.requirements()
     _requirements = list(set(_requirements1 + _requirements2))
 
@@ -107,19 +108,16 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
 
     def make_input_target(self):
         """
-        A KZ filter is applied on the input data that has hourly resolution. Lables Y are provided as aggregated values
+        A FIR filter is applied on the input data that has hourly resolution. Lables Y are provided as aggregated values
         with daily resolution.
         """
         self._data = tuple(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
         self.set_inputs_and_targets()
-        self.apply_kz_filter()
+        self.apply_filter()
 
     def estimate_filter_width(self):
-        """
-        f = 0.5 / (len * sqrt(itr)) -> T = 1 / f
-        :return:
-        """
-        return int(self.kz_filter_length[0] * np.sqrt(self.kz_filter_iter[0]) * 2)
+        """Return maximum filter width."""
+        raise NotImplementedError
 
     @staticmethod
     def _add_time_delta(date, delta):
@@ -156,22 +154,55 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
         return data
 
     def _extract_lazy(self, lazy_data):
-        _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
+        _data, self.meta, _input_data, _target_data = lazy_data
         start_inp, end_inp = self.update_start_end(0)
         self._data = tuple(map(lambda x: self._slice_prep(_data[x], *self.update_start_end(x)), [0, 1]))
         self.input_data = self._slice_prep(_input_data, start_inp, end_inp)
         self.target_data = self._slice_prep(_target_data, self.start, self.end)
 
 
-class DataHandlerMixedSamplingWithFilter(DefaultDataHandler):
+class DataHandlerMixedSamplingWithKzFilterSingleStation(DataHandlerMixedSamplingWithFilterSingleStation,
+                                                        DataHandlerKzFilterSingleStation):
+    _requirements1 = DataHandlerKzFilterSingleStation.requirements()
+    _requirements2 = DataHandlerMixedSamplingWithFilterSingleStation.requirements()
+    _requirements = list(set(_requirements1 + _requirements2))
+
+    def estimate_filter_width(self):
+        """
+        f = 0.5 / (len * sqrt(itr)) -> T = 1 / f
+        :return:
+        """
+        return int(self.kz_filter_length[0] * np.sqrt(self.kz_filter_iter[0]) * 2)
+
+    def _extract_lazy(self, lazy_data):
+        _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
+        super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data))
+
+
+class DataHandlerMixedSamplingWithKzFilter(DefaultDataHandler):
     """Data handler using mixed sampling for input and target. Inputs are temporal filtered."""
 
-    data_handler = DataHandlerMixedSamplingWithFilterSingleStation
-    data_handler_transformation = DataHandlerMixedSamplingWithFilterSingleStation
+    data_handler = DataHandlerMixedSamplingWithKzFilterSingleStation
+    data_handler_transformation = DataHandlerMixedSamplingWithKzFilterSingleStation
     _requirements = data_handler.requirements()
 
 
-class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFilterSingleStation):
+class DataHandlerMixedSamplingWithFirFilterSingleStation(DataHandlerMixedSamplingWithFilterSingleStation,
+                                                         DataHandlerFirFilterSingleStation):
+    _requirements1 = DataHandlerFirFilterSingleStation.requirements()
+    _requirements2 = DataHandlerMixedSamplingWithFilterSingleStation.requirements()
+    _requirements = list(set(_requirements1 + _requirements2))
+
+    def estimate_filter_width(self):
+        """ """
+        return 5  # Todo: adjust this method
+
+    def _extract_lazy(self, lazy_data):
+        _data, _meta, _input_data, _target_data, self.fir_coeff = lazy_data
+        super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data))
+
+
+class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithKzFilterSingleStation):
     """
     Data handler using mixed sampling for input and target. Inputs are temporal filtered and depending on the
     separation frequency of a filtered time series the time step delta for input data is adjusted (see image below).
@@ -181,8 +212,8 @@ class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFil
 
     """
 
-    _requirements = DataHandlerMixedSamplingWithFilterSingleStation.requirements()
-    _hash = DataHandlerMixedSamplingWithFilterSingleStation._hash + ["time_delta"]
+    _requirements = DataHandlerMixedSamplingWithKzFilterSingleStation.requirements()
+    _hash = DataHandlerMixedSamplingWithKzFilterSingleStation._hash + ["time_delta"]
 
     def __init__(self, *args, time_delta=np.sqrt, **kwargs):
         assert isinstance(time_delta, Callable)
diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 0757e528..740642fe 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -96,10 +96,10 @@ class DataHandlerFilterSingleStation(DataHandlerSingleStation):
                                       self.filter_dim).copy()
 
     def _create_lazy_data(self):
-        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
+        raise NotImplementedError
 
     def _extract_lazy(self, lazy_data):
-        _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
+        _data, self.meta, _input_data, _target_data = lazy_data
         f_prep = partial(self._slice_prep, start=self.start, end=self.end)
         self._data, self.input_data, self.target_data = list(map(f_prep, [_data, _input_data, _target_data]))
 
@@ -181,6 +181,13 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
             index.append("unfiltered")
         return pd.Index(index, name=self.filter_dim)
 
+    def _create_lazy_data(self):
+        return [self._data, self.meta, self.input_data, self.target_data, self.fir_coeff]
+
+    def _extract_lazy(self, lazy_data):
+        _data, _meta, _input_data, _target_data, self.fir_coeff = lazy_data
+        super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data))
+
 
 class DataHandlerFirFilter(DefaultDataHandler):
     """Data handler using FIR filtered data."""
@@ -233,6 +240,13 @@ class DataHandlerKzFilterSingleStation(DataHandlerFilterSingleStation):
         index = list(map(lambda x: str(x) + "d", index)) + ["res"]
         return pd.Index(index, name=self.filter_dim)
 
+    def _create_lazy_data(self):
+        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
+
+    def _extract_lazy(self, lazy_data):
+        _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
+        super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data))
+
 
 class DataHandlerKzFilter(DefaultDataHandler):
     """Data handler using kz filtered data."""
diff --git a/test/test_data_handler/test_data_handler_mixed_sampling.py b/test/test_data_handler/test_data_handler_mixed_sampling.py
index 19899a77..56751c44 100644
--- a/test/test_data_handler/test_data_handler_mixed_sampling.py
+++ b/test/test_data_handler/test_data_handler_mixed_sampling.py
@@ -2,8 +2,8 @@ __author__ = 'Lukas Leufen'
 __date__ = '2020-12-10'
 
 from mlair.data_handler.data_handler_mixed_sampling import DataHandlerMixedSampling, \
-    DataHandlerMixedSamplingSingleStation, DataHandlerMixedSamplingWithFilter, \
-    DataHandlerMixedSamplingWithFilterSingleStation, DataHandlerSeparationOfScales, \
+    DataHandlerMixedSamplingSingleStation, DataHandlerMixedSamplingWithKzFilter, \
+    DataHandlerMixedSamplingWithKzFilterSingleStation, DataHandlerSeparationOfScales, \
     DataHandlerSeparationOfScalesSingleStation
 from mlair.data_handler.data_handler_with_filter import DataHandlerKzFilterSingleStation
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
@@ -89,15 +89,15 @@ class TestDataHandlerMixedSamplingSingleStation:
 class TestDataHandlerMixedSamplingWithFilter:
 
     def test_data_handler(self):
-        obj = object.__new__(DataHandlerMixedSamplingWithFilter)
-        assert obj.data_handler.__qualname__ == DataHandlerMixedSamplingWithFilterSingleStation.__qualname__
+        obj = object.__new__(DataHandlerMixedSamplingWithKzFilter)
+        assert obj.data_handler.__qualname__ == DataHandlerMixedSamplingWithKzFilterSingleStation.__qualname__
 
     def test_data_handler_transformation(self):
-        obj = object.__new__(DataHandlerMixedSamplingWithFilter)
-        assert obj.data_handler_transformation.__qualname__ == DataHandlerMixedSamplingWithFilterSingleStation.__qualname__
+        obj = object.__new__(DataHandlerMixedSamplingWithKzFilter)
+        assert obj.data_handler_transformation.__qualname__ == DataHandlerMixedSamplingWithKzFilterSingleStation.__qualname__
 
     def test_requirements(self):
-        obj = object.__new__(DataHandlerMixedSamplingWithFilter)
+        obj = object.__new__(DataHandlerMixedSamplingWithKzFilter)
         req1 = object.__new__(DataHandlerMixedSamplingSingleStation)
         req2 = object.__new__(DataHandlerKzFilterSingleStation)
         req = list(set(req1.requirements() + req2.requirements()))
@@ -119,7 +119,7 @@ class TestDataHandlerSeparationOfScales:
         assert obj.data_handler_transformation.__qualname__ == DataHandlerSeparationOfScalesSingleStation.__qualname__
 
     def test_requirements(self):
-        obj = object.__new__(DataHandlerMixedSamplingWithFilter)
+        obj = object.__new__(DataHandlerMixedSamplingWithKzFilter)
         req1 = object.__new__(DataHandlerMixedSamplingSingleStation)
         req2 = object.__new__(DataHandlerKzFilterSingleStation)
         req = list(set(req1.requirements() + req2.requirements()))
-- 
GitLab


From 607fc80359120875c1102d80b50de720f29e635b Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 28 Apr 2021 13:11:38 +0200
Subject: [PATCH 096/175] filter width for fir filter is now correct

---
 .../data_handler_mixed_sampling.py            | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 71f9fe73..718a8f3e 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -194,13 +194,32 @@ class DataHandlerMixedSamplingWithFirFilterSingleStation(DataHandlerMixedSamplin
     _requirements = list(set(_requirements1 + _requirements2))
 
     def estimate_filter_width(self):
-        """ """
-        return 5  # Todo: adjust this method
+        """Filter width is determined by the filter with the highest order."""
+        return max(self.filter_order)
 
     def _extract_lazy(self, lazy_data):
         _data, _meta, _input_data, _target_data, self.fir_coeff = lazy_data
         super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data))
 
+    @staticmethod
+    def _get_fs(**kwargs):
+        """Return frequency in 1/day (not Hz)"""
+        sampling = kwargs.get("sampling")[0]
+        if sampling == "daily":
+            return 1
+        elif sampling == "hourly":
+            return 24
+        else:
+            raise ValueError(f"Unknown sampling rate {sampling}. Only daily and hourly resolution is supported.")
+
+
+class DataHandlerMixedSamplingWithFirFilter(DefaultDataHandler):
+    """Data handler using mixed sampling for input and target. Inputs are temporal filtered."""
+
+    data_handler = DataHandlerMixedSamplingWithFirFilterSingleStation
+    data_handler_transformation = DataHandlerMixedSamplingWithFirFilterSingleStation
+    _requirements = data_handler.requirements()
+
 
 class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithKzFilterSingleStation):
     """
-- 
GitLab


From 37a30a9f2e3a4816dc3f2779168001772046d379 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 28 Apr 2021 13:22:58 +0200
Subject: [PATCH 097/175] test will now pass

---
 mlair/data_handler/data_handler_with_filter.py            | 6 ++----
 .../test_data_handler/test_data_handler_mixed_sampling.py | 8 ++++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 740642fe..b3274b69 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -39,8 +39,7 @@ str_or_list = Union[str, List[str]]
 class DataHandlerFilterSingleStation(DataHandlerSingleStation):
     """General data handler for a single station to be used by a superior data handler."""
 
-    # _requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
-    _requirements = DataHandlerSingleStation.requirements()
+    _requirements = remove_items(DataHandlerSingleStation.requirements(), "station")
     _hash = DataHandlerSingleStation._hash + ["filter_dim"]
 
     DEFAULT_FILTER_DIM = "filter"
@@ -107,8 +106,7 @@ class DataHandlerFilterSingleStation(DataHandlerSingleStation):
 class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
     """Data handler for a single station to be used by a superior data handler. Inputs are FIR filtered."""
 
-    # _requirements = remove_items(inspect.getfullargspec(DataHandlerFilterSingleStation).args, ["self", "station"])
-    _requirements = DataHandlerFilterSingleStation.requirements()
+    _requirements = remove_items(DataHandlerFilterSingleStation.requirements(), "station")
     _hash = DataHandlerFilterSingleStation._hash + ["filter_cutoff_period", "filter_order", "filter_window_type",
                                                     "filter_add_unfiltered"]
 
diff --git a/test/test_data_handler/test_data_handler_mixed_sampling.py b/test/test_data_handler/test_data_handler_mixed_sampling.py
index 56751c44..7418a435 100644
--- a/test/test_data_handler/test_data_handler_mixed_sampling.py
+++ b/test/test_data_handler/test_data_handler_mixed_sampling.py
@@ -4,7 +4,7 @@ __date__ = '2020-12-10'
 from mlair.data_handler.data_handler_mixed_sampling import DataHandlerMixedSampling, \
     DataHandlerMixedSamplingSingleStation, DataHandlerMixedSamplingWithKzFilter, \
     DataHandlerMixedSamplingWithKzFilterSingleStation, DataHandlerSeparationOfScales, \
-    DataHandlerSeparationOfScalesSingleStation
+    DataHandlerSeparationOfScalesSingleStation, DataHandlerMixedSamplingWithFilterSingleStation
 from mlair.data_handler.data_handler_with_filter import DataHandlerKzFilterSingleStation
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
 from mlair.helpers import remove_items
@@ -86,7 +86,7 @@ class TestDataHandlerMixedSamplingSingleStation:
         pass
 
 
-class TestDataHandlerMixedSamplingWithFilter:
+class TestDataHandlerMixedSamplingWithKzFilter:
 
     def test_data_handler(self):
         obj = object.__new__(DataHandlerMixedSamplingWithKzFilter)
@@ -98,7 +98,7 @@ class TestDataHandlerMixedSamplingWithFilter:
 
     def test_requirements(self):
         obj = object.__new__(DataHandlerMixedSamplingWithKzFilter)
-        req1 = object.__new__(DataHandlerMixedSamplingSingleStation)
+        req1 = object.__new__(DataHandlerMixedSamplingWithFilterSingleStation)
         req2 = object.__new__(DataHandlerKzFilterSingleStation)
         req = list(set(req1.requirements() + req2.requirements()))
         assert sorted(obj._requirements) == sorted(remove_items(req, "station"))
@@ -120,7 +120,7 @@ class TestDataHandlerSeparationOfScales:
 
     def test_requirements(self):
         obj = object.__new__(DataHandlerMixedSamplingWithKzFilter)
-        req1 = object.__new__(DataHandlerMixedSamplingSingleStation)
+        req1 = object.__new__(DataHandlerMixedSamplingWithFilterSingleStation)
         req2 = object.__new__(DataHandlerKzFilterSingleStation)
         req = list(set(req1.requirements() + req2.requirements()))
         assert sorted(obj._requirements) == sorted(remove_items(req, "station"))
-- 
GitLab


From 0eb8c234b1f38b74fa0542a0e203ca8eabdcd4be Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 28 Apr 2021 14:26:51 +0200
Subject: [PATCH 098/175] periodogram will not plot the "unfiltered" filter
 component

---
 mlair/plotting/data_insight_plotting.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index 45f5d3d6..cc8ba279 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -803,13 +803,15 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         plot_path = os.path.join(os.path.abspath(self.plot_folder), plot_name)
         logging.info(f"... plotting {plot_name}")
         pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
-        colors = ["blue", "red", "green", "orange", "purple", "black", "grey"]
+        colors = ["grey", "blue", "red", "green", "orange", "purple", "black"]
         label_names = ["orig"] + label_names
         max_iter = len(self.plot_data)
         var_keys = self.plot_data[0].keys()
         for var in var_keys:
             fig, ax = plt.subplots()
             for i in reversed(range(max_iter)):
+                if label_names[i] == "unfiltered":
+                    continue  # do not include the filter 'unfiltered' because this is equal to the 'orig' data
                 plot_data = self.plot_data[i]
                 c = colors[i]
                 ma = pd.DataFrame(np.vstack(plot_data[var]).T).rolling(5, center=True, axis=0)
-- 
GitLab


From f29988ee34df166f581e15da1c7952f011d4278d Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 28 Apr 2021 14:30:52 +0200
Subject: [PATCH 099/175] periodogram is now normalized

---
 mlair/plotting/data_insight_plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index cc8ba279..dcaf077b 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -831,7 +831,7 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
 def f_proc(var, d_var):  # pragma: no cover
     var_str = str(var)
     t = (d_var.datetime - d_var.datetime[0]).astype("timedelta64[h]").values / np.timedelta64(1, "D")
-    f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1).autopower()
+    f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").autopower()
     return var_str, f, pgram
 
 
-- 
GitLab


From 2e52fa7114fe8feaf0be6339df2c91235e5e2687 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 28 Apr 2021 14:57:31 +0200
Subject: [PATCH 100/175] missmatch between hash and parameter name

---
 mlair/data_handler/data_handler_with_filter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index b3274b69..e7101468 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -108,7 +108,7 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
 
     _requirements = remove_items(DataHandlerFilterSingleStation.requirements(), "station")
     _hash = DataHandlerFilterSingleStation._hash + ["filter_cutoff_period", "filter_order", "filter_window_type",
-                                                    "filter_add_unfiltered"]
+                                                    "_add_unfiltered"]
 
     DEFAULT_WINDOW_TYPE = ("kaiser", 5)
     DEFAULT_ADD_UNFILTERED = False
-- 
GitLab


From 3bcb957f5e68b7df64bad234c8fd7d45797d4739 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 29 Apr 2021 13:31:58 +0200
Subject: [PATCH 101/175] added proposed if clause, /close #307 if but
 disappears in test setup

---
 mlair/data_handler/data_handler_single_station.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 89aafa2c..25822762 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -406,7 +406,8 @@ class DataHandlerSingleStation(AbstractDataHandler):
                      "propane", "so2", "toluene"]
         # used_chem_vars = list(set(chem_vars) & set(self.statistics_per_var.keys()))
         used_chem_vars = list(set(chem_vars) & set(data.variables.values))
-        data.loc[..., used_chem_vars] = data.loc[..., used_chem_vars].clip(min=minimum)
+        if len(used_chem_vars) > 0:
+            data.loc[..., used_chem_vars] = data.loc[..., used_chem_vars].clip(min=minimum)
         return data
 
     def setup_data_path(self, data_path: str, sampling: str):
-- 
GitLab


From a6ba585001319fa7ea63a0b258830137b81dccd3 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 29 Apr 2021 14:37:09 +0200
Subject: [PATCH 102/175] fir filter can be used with daily data

---
 .../data_handler/data_handler_with_filter.py  | 33 ++++++++++++++++---
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index e7101468..b9b90d44 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -117,15 +117,40 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
                  filter_add_unfiltered=DEFAULT_ADD_UNFILTERED, **kwargs):
         # self._check_sampling(**kwargs)
         # self.original_data = None  # ToDo: implement here something to store unfiltered data
-        self.filter_cutoff_period = (lambda x: [x] if isinstance(x, tuple) else to_list(x))(filter_cutoff_period)
+        self.fs = self._get_fs(**kwargs)
+        self.filter_cutoff_period, removed_index = self._prepare_filter_cutoff_period(filter_cutoff_period, self.fs)
         self.filter_cutoff_freq = self._period_to_freq(self.filter_cutoff_period)
-        assert len(self.filter_cutoff_period) == len(filter_order)
-        self.filter_order = filter_order
+        assert len(self.filter_cutoff_period) == (len(filter_order) - len(removed_index))
+        self.filter_order = self._prepare_filter_order(filter_order, removed_index, self.fs)
         self.filter_window_type = filter_window_type
         self._add_unfiltered = filter_add_unfiltered
-        self.fs = self._get_fs(**kwargs)
         super().__init__(*args, **kwargs)
 
+    @staticmethod
+    def _prepare_filter_order(filter_order, removed_index, fs):
+        order = []
+        for i, o in enumerate(filter_order):
+            if i not in removed_index:
+                fo = int(o * fs)
+                fo = fo + 1 if fo % 2 == 0 else fo
+                order.append(fo)
+        return order
+
+    @staticmethod
+    def _prepare_filter_cutoff_period(filter_cutoff_period, fs):
+        """Frequency must be smaller than the sampling frequency fs. Otherwise remove given cutoff period pair."""
+        cutoff_tmp = (lambda x: [x] if isinstance(x, tuple) else to_list(x))(filter_cutoff_period)
+        cutoff = []
+        removed = []
+        for i, (low, high) in enumerate(cutoff_tmp):
+            low = low if (low is None or low > 2. / fs) else None
+            high = high if (high is None or high > 2. / fs) else None
+            if any([low, high]):
+                cutoff.append((low, high))
+            else:
+                removed.append(i)
+        return cutoff, removed
+
     @staticmethod
     def _period_to_freq(cutoff_p):
         return list(map(lambda x: (1. / x[0] if x[0] is not None else None, 1. / x[1] if x[1] is not None else None),
-- 
GitLab


From 4c596cc874e7335198fc339b86c7e66bbb8762ef Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 29 Apr 2021 15:45:51 +0200
Subject: [PATCH 103/175] small fix

---
 mlair/helpers/statistics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index 0ee95098..6e25a368 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -333,7 +333,7 @@ class SkillScores:
             skill_score.loc[["CASE II", "AII", "BII"], iahead] = np.stack(self._climatological_skill_score(
                 data, mu_type=2, forecast_name=forecast_name, observation_name=self.observation_name).values.flatten())
 
-            if self.external_data is not None:
+            if self.external_data is not None and self.observation_name in self.external_data.coords["type"]:
                 external_data = self.external_data.sel(ahead=iahead, type=[self.observation_name])
                 skill_score.loc[["CASE III", "AIII"], iahead] = np.stack(self._climatological_skill_score(
                     data, mu_type=3, forecast_name=forecast_name, observation_name=self.observation_name,
-- 
GitLab


From 6917b04adfc7936993963c7673303eb670692b2e Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 3 May 2021 11:29:15 +0200
Subject: [PATCH 104/175] MLAir can store parameters during preprocessing from
 train subset when using store_attributes

---
 mlair/data_handler/abstract_data_handler.py | 26 +++++++++++++++++++++
 mlair/data_handler/default_data_handler.py  | 11 +++++++++
 mlair/run_modules/pre_processing.py         | 14 +++++++++++
 3 files changed, 51 insertions(+)

diff --git a/mlair/data_handler/abstract_data_handler.py b/mlair/data_handler/abstract_data_handler.py
index 419db059..c020a413 100644
--- a/mlair/data_handler/abstract_data_handler.py
+++ b/mlair/data_handler/abstract_data_handler.py
@@ -11,6 +11,7 @@ from mlair.helpers import remove_items
 class AbstractDataHandler:
 
     _requirements = []
+    _store_attributes = []
 
     def __init__(self, *args, **kwargs):
         pass
@@ -32,6 +33,31 @@ class AbstractDataHandler:
         list_of_args = arg_spec.args + arg_spec.kwonlyargs
         return remove_items(list_of_args, ["self"] + list(args))
 
+    @classmethod
+    def store_attributes(cls):
+        """
+        Let MLAir know that some data should be stored in the data store. This is used for calculations on the train
+        subset that should be applied to validation and test subset.
+
+        To work properly, add a class variable cls._store_attributes to your data handler. If your custom data handler
+        is constructed on different data handlers (e.g. like the DefaultDataHandler), it is required to overwrite the
+        get_store_attributs method in addition to return attributes from the corresponding subclasses. This is not
+        required, if only attributes from the main class are to be returned.
+
+        Note, that MLAir will store these attributes with the data handler's identification. This depends on the custom
+        data handler setting. When loading an attribute from the data handler, it is therefore required to extract the
+        right information by using the class identification. In case of the DefaultDataHandler this can be achieved to
+        convert all keys of the attribute to string and compare these with the station parameter.
+        """
+        return list(set(cls._store_attributes))
+
+    def get_store_attributes(self):
+        """Returns all attribute names and values that are indicated by the store_attributes method."""
+        attr_dict = {}
+        for attr in self.store_attributes():
+            attr_dict[attr] = self.__getattribute__(attr)
+        return attr_dict
+
     @classmethod
     def transformation(cls, *args, **kwargs):
         return None
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 11461ad7..73b6b53d 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -33,6 +33,7 @@ class DefaultDataHandler(AbstractDataHandler):
     from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation as data_handler_transformation
 
     _requirements = remove_items(inspect.getfullargspec(data_handler).args, ["self", "station"])
+    _store_attributes = data_handler.store_attributes()
 
     DEFAULT_ITER_DIM = "Stations"
     DEFAULT_TIME_DIM = "datetime"
@@ -93,6 +94,16 @@ class DefaultDataHandler(AbstractDataHandler):
             logging.debug(f"save pickle data to {self._save_file}")
             self._reset_data()
 
+    def get_store_attributes(self):
+        attr_dict = {}
+        for attr in self.store_attributes():
+            try:
+                val = self.__getattribute__(attr)
+            except AttributeError:
+                val = self.id_class.__getattribute__(attr)
+            attr_dict[attr] = val
+        return attr_dict
+
     @staticmethod
     def _force_dask_computation(data):
         try:
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 9d44ce0b..d50f6f9a 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -268,8 +268,22 @@ class PreProcessing(RunEnvironment):
 
         logging.info(f"run for {t_outer} to check {len(set_stations)} station(s). Found {len(collection)}/"
                      f"{len(set_stations)} valid stations.")
+        if set_name == "train":
+            self.store_data_handler_attributes(data_handler, collection)
         return collection, valid_stations
 
+    def store_data_handler_attributes(self, data_handler, collection):
+        store_attributes = data_handler.store_attributes()
+        if len(store_attributes) > 0:
+            logging.info("store data requested by the data handler")
+            attrs = {}
+            for dh in collection:
+                station = str(dh)
+                for k, v in dh.get_store_attributes().items():
+                    attrs[k] = dict(attrs.get(k, {}), **{station: v})
+            for k, v in attrs.items():
+                self.data_store.set(k, v)
+
     def validate_station_old(self, data_handler: AbstractDataHandler, set_stations, set_name=None,
                              store_processed_data=True):
         """
-- 
GitLab


From 2652975d5764c08a072e3f635a18a8646b954c68 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 3 May 2021 19:33:46 +0200
Subject: [PATCH 105/175] new fir filter data handler using climatological
 heuristics

---
 mlair/data_handler/abstract_data_handler.py   |   2 +-
 .../data_handler/data_handler_with_filter.py  | 125 ++++++++++++-
 mlair/helpers/filter.py                       | 166 +++++++++++++++---
 3 files changed, 265 insertions(+), 28 deletions(-)

diff --git a/mlair/data_handler/abstract_data_handler.py b/mlair/data_handler/abstract_data_handler.py
index c020a413..36d6e9ae 100644
--- a/mlair/data_handler/abstract_data_handler.py
+++ b/mlair/data_handler/abstract_data_handler.py
@@ -34,7 +34,7 @@ class AbstractDataHandler:
         return remove_items(list_of_args, ["self"] + list(args))
 
     @classmethod
-    def store_attributes(cls):
+    def store_attributes(cls) -> list:
         """
         Let MLAir know that some data should be stored in the data store. This is used for calculations on the train
         subset that should be applied to validation and test subset.
diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index b9b90d44..0619c74a 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -14,7 +14,7 @@ from mlair.data_handler.data_handler_single_station import DataHandlerSingleStat
 from mlair.data_handler import DefaultDataHandler
 from mlair.helpers import remove_items, to_list, TimeTrackingWrapper
 from mlair.helpers.filter import KolmogorovZurbenkoFilterMovingWindow as KZFilter
-from mlair.helpers.filter import FIRFilter
+from mlair.helpers.filter import FIRFilter, ClimateFIRFilter
 
 # define a more general date type for type hinting
 str_or_list = Union[str, List[str]]
@@ -67,7 +67,8 @@ class DataHandlerFilterSingleStation(DataHandlerSingleStation):
 
     def make_input_target(self):
         data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
-                                         self.station_type, self.network, self.store_data_locally, self.data_origin)
+                                         self.station_type, self.network, self.store_data_locally, self.data_origin,
+                                         self.start, self.end)
         self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
                                       limit=self.interpolation_limit)
         self.set_inputs_and_targets()
@@ -277,3 +278,123 @@ class DataHandlerKzFilter(DefaultDataHandler):
     data_handler = DataHandlerKzFilterSingleStation
     data_handler_transformation = DataHandlerKzFilterSingleStation
     _requirements = data_handler.requirements()
+
+
+class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation):
+    """
+    Data handler for a single station to be used by a superior data handler. Inputs are FIR filtered. In contrast to
+    the simple DataHandlerFirFilterSingleStation, this data handler is centered around t0 to have no time delay. For
+    values in the future (t > t0), this data handler assumes a climatological value for the low pass data and values of
+    0 for all residuum components.
+
+    :param apriori: Data to use as apriori information. This should be either a xarray dataarray containing monthly or
+        any other heuristic to support the clim filter, or a list of such arrays containint heuristics for all residua
+        in addition. The 2nd can be used together with apriori_type `residuum_stat` which estimates the error of the
+        residuum when the clim filter should be applied with exogenous parameters. If apriori_type is None/`zeros` data
+        can be provided, but this is not required in this case.
+    :param apriori_type: set type of information that is provided to the clim filter. For the first low pass always a
+        calculated or given statistic is used. For residuum prediction a constant value of zero is assumed if
+        apriori_type is None or `zeros`, and a climatology of the residuum is used for `residuum_stat`.
+    """
+
+    _requirements = remove_items(DataHandlerFirFilterSingleStation.requirements(), "station")
+    _hash = DataHandlerFirFilterSingleStation._hash + ["apriori_type", "apriori_sel_opts"]
+    _store_attributes = DataHandlerFirFilterSingleStation.store_attributes() + ["apriori"]
+
+    def __init__(self, *args, apriori=None, apriori_type=None, apriori_sel_opts=None, **kwargs):
+        self.apriori_type = apriori_type
+        self.climate_filter_coeff = None  # coefficents of the used FIR filter
+        self.apriori = apriori  # exogenous apriori information or None to calculate from data (endogenous)
+        self.all_apriori = None  # collection of all apriori information
+        self.apriori_sel_opts = apriori_sel_opts  # ensure to separate exogenous and endogenous information
+        super().__init__(*args, **kwargs)
+
+    @TimeTrackingWrapper
+    def apply_filter(self):
+        """Apply FIR filter only on inputs."""
+        apriori = self.apriori.get(str(self)) if isinstance(self.apriori, dict) else self.apriori
+        climate_filter = ClimateFIRFilter(self.input_data, self.fs, self.filter_order, self.filter_cutoff_freq,
+                                          self.filter_window_type, time_dim=self.time_dim, var_dim=self.target_dim,
+                                          apriori_type=self.apriori_type, apriori=apriori,
+                                          sel_opts=self.apriori_sel_opts)
+        self.climate_filter_coeff = climate_filter.filter_coefficients
+
+        # store apriori information: store all if residuum_stat method was used, otherwise just store initial apriori
+        if self.apriori_type == "residuum_stat":
+            self.apriori = climate_filter.apriori_data
+        else:
+            self.apriori = climate_filter.initial_apriori_data
+        self.all_apriori = climate_filter.apriori_data
+        climate_filter_data = climate_filter.filtered_data
+
+        # add unfiltered raw data
+        if self._add_unfiltered is True:
+            climate_filter_data.append(self.input_data)
+
+        # create input data with filter index
+        self.input_data = xr.concat(climate_filter_data, pd.Index(self.create_filter_index(), name=self.filter_dim))
+
+        # this is just a code snippet to check the results of the filter
+        # import matplotlib
+        # matplotlib.use("TkAgg")
+        # import matplotlib.pyplot as plt
+        # self.input_data.sel(filter="low", variables="temp", Stations="DEBW107").plot()
+        # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
+
+    def create_filter_index(self) -> pd.Index:
+        """
+        Round cut off periods in days and append 'res' for residuum index.
+
+        Round small numbers (<10) to single decimal, and higher numbers to int. Transform as list of str and append
+        'res' for residuum index. Add index unfiltered if the raw / unfiltered data is appended to data in addition.
+        """
+        index = np.round(self.filter_cutoff_period, 1)
+        f = lambda x: int(np.round(x)) if x >= 10 else np.round(x, 1)
+        index = list(map(f, index.tolist()))
+        index = list(map(lambda x: str(x) + "d", index)) + ["res"]
+        if self._add_unfiltered:
+            index.append("unfiltered")
+        return pd.Index(index, name=self.filter_dim)
+
+    def _create_lazy_data(self):
+        return [self._data, self.meta, self.input_data, self.target_data, self.climate_filter_coeff,
+                self.apriori, self.all_apriori]
+
+    def _extract_lazy(self, lazy_data):
+        _data, _meta, _input_data, _target_data, self.climate_filter_coeff, self.apriori, self.all_apriori = lazy_data
+        DataHandlerSingleStation._extract_lazy(self, (_data, _meta, _input_data, _target_data))
+
+    @staticmethod
+    def _prepare_filter_order(filter_order, removed_index, fs):
+        order = []
+        for i, o in enumerate(filter_order):
+            if i not in removed_index:
+                fo = int(o * fs)
+                fo = fo + 1 if fo % 2 == 0 else fo
+                order.append(fo)
+        return order
+
+    @staticmethod
+    def _prepare_filter_cutoff_period(filter_cutoff_period, fs):
+        """Frequency must be smaller than the sampling frequency fs. Otherwise remove given cutoff period pair."""
+        cutoff = []
+        removed = []
+        for i, period in enumerate(to_list(filter_cutoff_period)):
+            if period > 2. / fs:
+                cutoff.append(period)
+            else:
+                removed.append(i)
+        return cutoff, removed
+
+    @staticmethod
+    def _period_to_freq(cutoff_p):
+        return [1. / x for x in cutoff_p]
+
+
+class DataHandlerClimateFirFilter(DefaultDataHandler):
+    """Data handler using climatic adjusted FIR filtered data."""
+
+    data_handler = DataHandlerClimateFirFilterSingleStation
+    data_handler_transformation = DataHandlerClimateFirFilterSingleStation
+    _requirements = data_handler.requirements()
+    _store_attributes = data_handler.store_attributes()
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index ad2fd12d..4c386885 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -3,6 +3,7 @@ import warnings
 from typing import Union
 
 import numpy as np
+import pandas as pd
 from matplotlib import pyplot as plt
 from scipy import signal
 import xarray as xr
@@ -17,8 +18,8 @@ class FIRFilter:
         filtered = []
         h = []
         for i in range(len(order)):
-            fi, hi = self.apply_fir_filter(data, fs, order[i], cutoff_low=cutoff[i][0], cutoff_high=cutoff[i][1],
-                                           window=window, dim=dim)
+            fi, hi = fir_filter(data, fs, order=order[i], cutoff_low=cutoff[i][0], cutoff_high=cutoff[i][1],
+                                window=window, dim=dim, h=None, causal=True, padlen=None)
             filtered.append(fi)
             h.append(hi)
 
@@ -47,31 +48,146 @@ class FIRFilter:
         #                             cutoff_high=cutoff[3][1], window=window)
         # filtered_high = xr.ones_like(station_data) * y_high.reshape(station_data.values.shape)
 
-    def apply_fir_filter(self, data, fs, order=5, cutoff_low=None, cutoff_high=None, window="hamming", dim="variables"):
-
-        # create fir filter coeffs
-        cutoff = []
-        if cutoff_low is not None:
-            cutoff += [cutoff_low]
-        if cutoff_high is not None:
-            cutoff += [cutoff_high]
-        if len(cutoff) == 2:
-            filter_type = "bandpass"
-        elif len(cutoff) == 1 and cutoff_low is not None:
-            filter_type = "highpass"
-        elif len(cutoff) == 1 and cutoff_high is not None:
-            filter_type = "lowpass"
-        else:
-            raise ValueError("Please provide either cutoff_low or cutoff_high.")
-        h = signal.firwin(order, cutoff, pass_zero=filter_type, fs=fs, window=window)
 
-        # filter data
-        filtered = xr.ones_like(data)
-        for var in data.coords[dim]:
-            d = data.sel({dim: var}).values.flatten()
+class ClimateFIRFilter:
+
+    def __init__(self, data, fs, order, cutoff, window, time_dim, var_dim, apriori=None, apriori_type=None,
+                 sel_opts=None):
+        """
+        :param data: data to filter
+        :param fs: sampling frequency in 1/days -> 1d: fs=1 -> 1H: fs=24
+        :param order: a tuple with the order of the filter in same ordering like cutoff
+        :param cutoff: a tuple with the cutoff frequencies (all are applied as low pass)
+        :param window: window type of the filter (e.g. hamming)
+        :param time_dim: name of time dimension to apply filter along
+        :param var_dim: name of variables dimension
+        :param apriori: apriori information to use for the first low pass. If None, climatology is calculated on the
+            provided data.
+        :param apriori_type: type of apriori information to use. Climatology will be used always for first low pass. For
+            the residuum either the value zero is used (apriori_type is None or "zeros") or a climatology on the
+            residua is used ("residuum_stats").
+        """
+        filtered = []
+        h = []
+        sel_opts = sel_opts if isinstance(sel_opts, dict) else {time_dim: sel_opts}
+        sampling = {1: "1d", 24: "1H"}.get(int(fs))
+        if apriori is None:
+            apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
+        apriori_list = to_list(apriori)
+        input_data = data.__deepcopy__()
+        for i in range(len(order)):
+            fi, hi, apriori = self.clim_filter(input_data, fs, cutoff[i], order[i], apriori=apriori_list[i],
+                                               sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window,
+                                               var_dim=var_dim)
+            filtered.append(fi)
+            h.append(hi)
+            input_data = input_data - fi  # calculate residuum
+            if len(apriori_list) <= i + 1:
+                if apriori_type is None or apriori_type == "zeros":
+                    apriori_list.append(xr.zeros_like(apriori_list[i]))  # zero version
+                elif apriori_type == "residuum_stats":
+                    apriori_list.append(-self.create_monthly_mean(input_data, sel_opts=sel_opts, sampling=sampling,
+                                                                  time_dim=time_dim))
+                else:
+                    raise ValueError(f"Cannot handle unkown apriori type: {apriori_type}. Please choose from None, "
+                                     f"`zeros` or `residuum_stats`.")
+        # add residuum to filtered
+        filtered.append(input_data)
+        self._filtered = filtered
+        self._h = h
+        self._apriori = apriori_list
+
+    @staticmethod
+    def create_monthly_mean(data, sel_opts=None, sampling="1d", time_dim="datetime"):
+        monthly = xr.ones_like(data)
+        if sel_opts is not None:
+            data = data.sel(**sel_opts)
+        monthly_mean = data.groupby(f"{time_dim}.month").mean()
+        for month in monthly_mean.month.values:
+            loc = (monthly[f"{time_dim}.month"] == month)
+            monthly.loc[{time_dim: loc}] = monthly_mean.sel(month=month)
+        return monthly.resample({time_dim: "1m"}).mean().resample({time_dim: sampling}).interpolate()
+
+    def clim_filter(self, data, fs, cutoff_high, order, apriori=None, padlen=None, sel_opts=None, sampling="1d",
+                    time_dim="datetime", var_dim="variables", window="hamming"):
+        if apriori is None:
+            apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
+        h = signal.firwin(order, cutoff_high, pass_zero="lowpass", fs=fs, window=window)
+        length = len(h)
+        dt = data.coords[time_dim].values
+        res = xr.zeros_like(data)
+        print("start iteration")
+        for i in range(0, len(dt)):
+            t0 = dt[i]
+            pd_date = pd.to_datetime(t0)
+            if pd_date.day == 1 and pd_date.month == 1:
+                print(t0)
+            try:
+                i_m = max(0, i - length)
+                i_p = min(i + length, len(dt) - 2)
+                t_hist = slice(dt[i_m], dt[i])
+                t_fut = slice(dt[i + 1], dt[i_p + 1])
+                tmp_hist = data.sel({time_dim: t_hist})
+                tmp_fut = apriori.sel({time_dim: t_fut})
+                tmp_comb = xr.concat([tmp_hist, tmp_fut], dim=time_dim)
+                _padlen = padlen if padlen is not None else int(0.5 * len(tmp_comb.coords[time_dim]))
+                tmp_filter, _ = fir_filter(tmp_comb, fs, cutoff_high=cutoff_high, order=order, causal=False,
+                                           padlen=_padlen, dim=var_dim, window=window, h=h)
+                res.loc[{time_dim: t0}] = tmp_filter.loc[{time_dim: t0}]
+            except IndexError:
+                pass
+            # if i == 720:
+            #     for var in data.coords[var_dim]:
+            #         data.sel({var_dim: var, time_dim: slice(dt[i_m], dt[i_p+1])}).plot()
+            #         tmp_comb.sel({var_dim: var}).plot()
+            #         plt.title(var)
+            #         plt.show()
+        return res, h, apriori
+
+    @property
+    def filter_coefficients(self):
+        return self._h
+
+    @property
+    def filtered_data(self):
+        return self._filtered
+
+    @property
+    def apriori_data(self):
+        return self._apriori
+
+    @property
+    def initial_apriori_data(self):
+        return self.apriori_data[0]
+
+
+def fir_filter(data, fs, order=5, cutoff_low=None, cutoff_high=None, window="hamming", dim="variables", h=None,
+               causal=True, padlen=None):
+    cutoff = []
+    if cutoff_low is not None:
+        cutoff += [cutoff_low]
+    if cutoff_high is not None:
+        cutoff += [cutoff_high]
+    if len(cutoff) == 2:
+        filter_type = "bandpass"
+    elif len(cutoff) == 1 and cutoff_low is not None:
+        filter_type = "highpass"
+    elif len(cutoff) == 1 and cutoff_high is not None:
+        filter_type = "lowpass"
+    else:
+        raise ValueError("Please provide either cutoff_low or cutoff_high.")
+    if h is None:
+        h = signal.firwin(order, cutoff, pass_zero=filter_type, fs=fs, window=window)
+    filtered = xr.ones_like(data)
+    for var in data.coords[dim]:
+        d = data.sel({dim: var}).values.flatten()
+        if causal:
             y = signal.lfilter(h, 1., d)
-            filtered.loc[{dim: var}] = y
-        return filtered, h
+        else:
+            padlen = padlen if padlen is not None else 3 * len(h)
+            y = signal.filtfilt(h, 1., d, padlen=padlen)
+        filtered.loc[{dim: var}] = y
+    return filtered, h
 
 
 class KolmogorovZurbenkoBaseClass:
-- 
GitLab


From 3bf973c85674bc99b0851df791744feae1ec1b9b Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 4 May 2021 16:41:20 +0200
Subject: [PATCH 106/175] filter code has much more comments for understanding,
 time range of apriori is extended if required

---
 .../data_handler/data_handler_with_filter.py  |  10 +-
 mlair/helpers/filter.py                       | 112 ++++++++++++++++--
 2 files changed, 107 insertions(+), 15 deletions(-)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 0619c74a..67902cd0 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -289,12 +289,12 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
 
     :param apriori: Data to use as apriori information. This should be either a xarray dataarray containing monthly or
         any other heuristic to support the clim filter, or a list of such arrays containint heuristics for all residua
-        in addition. The 2nd can be used together with apriori_type `residuum_stat` which estimates the error of the
+        in addition. The 2nd can be used together with apriori_type `residuum_stats` which estimates the error of the
         residuum when the clim filter should be applied with exogenous parameters. If apriori_type is None/`zeros` data
         can be provided, but this is not required in this case.
     :param apriori_type: set type of information that is provided to the clim filter. For the first low pass always a
         calculated or given statistic is used. For residuum prediction a constant value of zero is assumed if
-        apriori_type is None or `zeros`, and a climatology of the residuum is used for `residuum_stat`.
+        apriori_type is None or `zeros`, and a climatology of the residuum is used for `residuum_stats`.
     """
 
     _requirements = remove_items(DataHandlerFirFilterSingleStation.requirements(), "station")
@@ -312,15 +312,15 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
     @TimeTrackingWrapper
     def apply_filter(self):
         """Apply FIR filter only on inputs."""
-        apriori = self.apriori.get(str(self)) if isinstance(self.apriori, dict) else self.apriori
+        self.apriori = self.apriori.get(str(self)) if isinstance(self.apriori, dict) else self.apriori
         climate_filter = ClimateFIRFilter(self.input_data, self.fs, self.filter_order, self.filter_cutoff_freq,
                                           self.filter_window_type, time_dim=self.time_dim, var_dim=self.target_dim,
-                                          apriori_type=self.apriori_type, apriori=apriori,
+                                          apriori_type=self.apriori_type, apriori=self.apriori,
                                           sel_opts=self.apriori_sel_opts)
         self.climate_filter_coeff = climate_filter.filter_coefficients
 
         # store apriori information: store all if residuum_stat method was used, otherwise just store initial apriori
-        if self.apriori_type == "residuum_stat":
+        if self.apriori_type == "residuum_stats":
             self.apriori = climate_filter.apriori_data
         else:
             self.apriori = climate_filter.initial_apriori_data
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 4c386885..4fce4f50 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -1,7 +1,9 @@
 import gc
 import warnings
 from typing import Union
+import logging
 
+import datetime
 import numpy as np
 import pandas as pd
 from matplotlib import pyplot as plt
@@ -76,47 +78,137 @@ class ClimateFIRFilter:
         apriori_list = to_list(apriori)
         input_data = data.__deepcopy__()
         for i in range(len(order)):
+            # calculate climatological filter
             fi, hi, apriori = self.clim_filter(input_data, fs, cutoff[i], order[i], apriori=apriori_list[i],
                                                sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window,
                                                var_dim=var_dim)
             filtered.append(fi)
             h.append(hi)
-            input_data = input_data - fi  # calculate residuum
+
+            # calculate residuum
+            input_data = input_data - fi
+
+            # create new apriori information for next iteration if no further apriori is provided
             if len(apriori_list) <= i + 1:
-                if apriori_type is None or apriori_type == "zeros":
-                    apriori_list.append(xr.zeros_like(apriori_list[i]))  # zero version
-                elif apriori_type == "residuum_stats":
+                if apriori_type is None or apriori_type == "zeros":  # zero version
+                    apriori_list.append(xr.zeros_like(apriori_list[i]))
+                elif apriori_type == "residuum_stats":  # calculate monthly statistic on residuum
                     apriori_list.append(-self.create_monthly_mean(input_data, sel_opts=sel_opts, sampling=sampling,
                                                                   time_dim=time_dim))
                 else:
                     raise ValueError(f"Cannot handle unkown apriori type: {apriori_type}. Please choose from None, "
                                      f"`zeros` or `residuum_stats`.")
-        # add residuum to filtered
+        # add last residuum to filtered
         filtered.append(input_data)
         self._filtered = filtered
         self._h = h
         self._apriori = apriori_list
 
     @staticmethod
-    def create_monthly_mean(data, sel_opts=None, sampling="1d", time_dim="datetime"):
-        monthly = xr.ones_like(data)
+    def create_unity_array(data, time_dim, extend_range=365):
+        """Create a xr data array filled with ones. time_dim is extended by extend_range days in future and past."""
+        coords = data.coords
+
+        # extend time_dim by given extend_range days
+        start = coords[time_dim][0].values.astype("datetime64[D]") - np.timedelta64(extend_range, "D")
+        end = coords[time_dim][-1].values.astype("datetime64[D]") + np.timedelta64(extend_range, "D")
+        new_time_axis = np.arange(start, end).astype("datetime64[ns]")
+
+        # construct data array with updated coords
+        new_coords = {k: data.coords[k].values if k != time_dim else new_time_axis for k in coords}
+        new_array = xr.DataArray(1, coords=new_coords, dims=new_coords.keys()).transpose(*data.dims)
+
+        # loffset is required because resampling uses last day in month as resampling timestamp
+        return new_array.resample({time_dim: "1m"}, loffset=datetime.timedelta(days=-15)).max()
+
+    def create_monthly_mean(self, data, sel_opts=None, sampling="1d", time_dim="datetime"):
+        """Calculate monthly statistics."""
+
+        # create unity xarray in monthly resolution with sampling point in mid of each month
+        monthly = self.create_unity_array(data, time_dim)
+
+        # apply selection if given (only use subset for monthly means)
         if sel_opts is not None:
             data = data.sel(**sel_opts)
+
+        # create monthly mean and replace entries in unity array
         monthly_mean = data.groupby(f"{time_dim}.month").mean()
         for month in monthly_mean.month.values:
             loc = (monthly[f"{time_dim}.month"] == month)
             monthly.loc[{time_dim: loc}] = monthly_mean.sel(month=month)
-        return monthly.resample({time_dim: "1m"}).mean().resample({time_dim: sampling}).interpolate()
+
+        # aggregate monthly information (shift by half month, because resample base is last day)
+        return monthly.resample({time_dim: "1m"}).max().resample({time_dim: sampling}).interpolate()
+
+    @staticmethod
+    def extend_apriori(data, apriori, time_dim):
+        """
+        Extend time range of apriori information.
+
+        This method will fail, if apriori is available for a shorter period than the gab to fill.
+        """
+        dates = data.coords[time_dim].values
+
+        # apriori starts after data
+        if dates[0] < apriori.coords[time_dim].values[0]:
+            # add difference in full years
+            date_diff = abs(dates[0] - apriori.coords[time_dim].values[0]).astype("timedelta64[D]")
+            extend_range = np.ceil(date_diff / (np.timedelta64(1, "D") * 365)).astype(int) * 365
+            coords = apriori.coords
+
+            # create new time axis
+            start = coords[time_dim][0].values.astype("datetime64[D]") - np.timedelta64(extend_range, "D")
+            end = coords[time_dim][0].values.astype("datetime64[D]")
+            new_time_axis = np.arange(start, end).astype("datetime64[ns]")
+
+            # extract old values to use with new axis
+            start = coords[time_dim][0].values.astype("datetime64[D]")
+            end = coords[time_dim][0].values.astype("datetime64[D]") + np.timedelta64(extend_range - 1, "D")
+            new_values = apriori.sel({time_dim: slice(start, end)})
+            new_values.coords[time_dim] = new_time_axis
+
+            # add new values to apriori
+            apriori = apriori.combine_first(new_values)
+
+        # apriori ends before data
+        if dates[-1] + np.timedelta64(365, "D") > apriori.coords[time_dim].values[-1]:
+            # add difference in full years + 1 year (because apriori is used as future estimate)
+            date_diff = abs(dates[-1] - apriori.coords[time_dim].values[-1]).astype("timedelta64[D]")
+            extend_range = np.ceil(date_diff / (np.timedelta64(1, "D") * 365)).astype(int) * 365 + 365
+            coords = apriori.coords
+
+            # create new time axis
+            start = coords[time_dim][-1].values.astype("datetime64[D]")
+            end = coords[time_dim][-1].values.astype("datetime64[D]") + np.timedelta64(extend_range, "D")
+            new_time_axis = np.arange(start, end).astype("datetime64[ns]")
+
+            # extract old values to use with new axis
+            start = coords[time_dim][-1].values.astype("datetime64[D]") - np.timedelta64(extend_range - 1, "D")
+            end = coords[time_dim][-1].values.astype("datetime64[D]")
+            new_values = apriori.sel({time_dim: slice(start, end)})
+            new_values.coords[time_dim] = new_time_axis
+
+            # add new values to apriori
+            apriori = apriori.combine_first(new_values)
+
+        return apriori
 
     def clim_filter(self, data, fs, cutoff_high, order, apriori=None, padlen=None, sel_opts=None, sampling="1d",
                     time_dim="datetime", var_dim="variables", window="hamming"):
+
+        # calculate apriori information from data if not given and extend its range if not sufficient long enough
         if apriori is None:
             apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
+        apriori = self.extend_apriori(data, apriori, time_dim)
+
+        # calculate FIR filter coefficients
         h = signal.firwin(order, cutoff_high, pass_zero="lowpass", fs=fs, window=window)
         length = len(h)
+
+        # start loop on all timestamps
         dt = data.coords[time_dim].values
         res = xr.zeros_like(data)
-        print("start iteration")
+        logging.info("start iteration")
         for i in range(0, len(dt)):
             t0 = dt[i]
             pd_date = pd.to_datetime(t0)
@@ -135,7 +227,7 @@ class ClimateFIRFilter:
                                            padlen=_padlen, dim=var_dim, window=window, h=h)
                 res.loc[{time_dim: t0}] = tmp_filter.loc[{time_dim: t0}]
             except IndexError:
-                pass
+                res.loc[{time_dim: t0}] = np.nan
             # if i == 720:
             #     for var in data.coords[var_dim]:
             #         data.sel({var_dim: var, time_dim: slice(dt[i_m], dt[i_p+1])}).plot()
-- 
GitLab


From be60dba27c71619c5319314cd28cbdc78583be95 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 4 May 2021 17:17:00 +0200
Subject: [PATCH 107/175] climFIR data handler can create a sample plot if a
 path is provided.

---
 .../data_handler/data_handler_with_filter.py  |  5 +--
 mlair/helpers/filter.py                       | 33 ++++++++++++++-----
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 67902cd0..7be76082 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -301,12 +301,13 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
     _hash = DataHandlerFirFilterSingleStation._hash + ["apriori_type", "apriori_sel_opts"]
     _store_attributes = DataHandlerFirFilterSingleStation.store_attributes() + ["apriori"]
 
-    def __init__(self, *args, apriori=None, apriori_type=None, apriori_sel_opts=None, **kwargs):
+    def __init__(self, *args, apriori=None, apriori_type=None, apriori_sel_opts=None, plot_path=None, **kwargs):
         self.apriori_type = apriori_type
         self.climate_filter_coeff = None  # coefficents of the used FIR filter
         self.apriori = apriori  # exogenous apriori information or None to calculate from data (endogenous)
         self.all_apriori = None  # collection of all apriori information
         self.apriori_sel_opts = apriori_sel_opts  # ensure to separate exogenous and endogenous information
+        self.plot_path = plot_path  # use this path to create insight plots
         super().__init__(*args, **kwargs)
 
     @TimeTrackingWrapper
@@ -316,7 +317,7 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         climate_filter = ClimateFIRFilter(self.input_data, self.fs, self.filter_order, self.filter_cutoff_freq,
                                           self.filter_window_type, time_dim=self.time_dim, var_dim=self.target_dim,
                                           apriori_type=self.apriori_type, apriori=self.apriori,
-                                          sel_opts=self.apriori_sel_opts)
+                                          sel_opts=self.apriori_sel_opts, plot_path=self.plot_path, plot_name=str(self))
         self.climate_filter_coeff = climate_filter.filter_coefficients
 
         # store apriori information: store all if residuum_stat method was used, otherwise just store initial apriori
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 4fce4f50..5b521f7d 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -2,6 +2,7 @@ import gc
 import warnings
 from typing import Union
 import logging
+import os
 
 import datetime
 import numpy as np
@@ -54,7 +55,7 @@ class FIRFilter:
 class ClimateFIRFilter:
 
     def __init__(self, data, fs, order, cutoff, window, time_dim, var_dim, apriori=None, apriori_type=None,
-                 sel_opts=None):
+                 sel_opts=None, plot_path=None, plot_name=None):
         """
         :param data: data to filter
         :param fs: sampling frequency in 1/days -> 1d: fs=1 -> 1H: fs=24
@@ -69,6 +70,8 @@ class ClimateFIRFilter:
             the residuum either the value zero is used (apriori_type is None or "zeros") or a climatology on the
             residua is used ("residuum_stats").
         """
+        self.plot_path = plot_path
+        self.plot_name = plot_name
         filtered = []
         h = []
         sel_opts = sel_opts if isinstance(sel_opts, dict) else {time_dim: sel_opts}
@@ -81,7 +84,7 @@ class ClimateFIRFilter:
             # calculate climatological filter
             fi, hi, apriori = self.clim_filter(input_data, fs, cutoff[i], order[i], apriori=apriori_list[i],
                                                sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window,
-                                               var_dim=var_dim)
+                                               var_dim=var_dim, plot_index=i)
             filtered.append(fi)
             h.append(hi)
 
@@ -194,7 +197,7 @@ class ClimateFIRFilter:
         return apriori
 
     def clim_filter(self, data, fs, cutoff_high, order, apriori=None, padlen=None, sel_opts=None, sampling="1d",
-                    time_dim="datetime", var_dim="variables", window="hamming"):
+                    time_dim="datetime", var_dim="variables", window="hamming", plot_index=None):
 
         # calculate apriori information from data if not given and extend its range if not sufficient long enough
         if apriori is None:
@@ -226,16 +229,28 @@ class ClimateFIRFilter:
                 tmp_filter, _ = fir_filter(tmp_comb, fs, cutoff_high=cutoff_high, order=order, causal=False,
                                            padlen=_padlen, dim=var_dim, window=window, h=h)
                 res.loc[{time_dim: t0}] = tmp_filter.loc[{time_dim: t0}]
+                if i == 720 and self.plot_path is not None:
+                    self.plot(data, tmp_comb, var_dim, time_dim, slice(dt[i_m], dt[i_p + 1]), t0, plot_index)
             except IndexError:
                 res.loc[{time_dim: t0}] = np.nan
-            # if i == 720:
-            #     for var in data.coords[var_dim]:
-            #         data.sel({var_dim: var, time_dim: slice(dt[i_m], dt[i_p+1])}).plot()
-            #         tmp_comb.sel({var_dim: var}).plot()
-            #         plt.title(var)
-            #         plt.show()
         return res, h, apriori
 
+    def plot(self, data, tmp_comb, var_dim, time_dim, time_dim_slice, t0, plot_index):
+        try:
+            plot_folder = os.path.join(os.path.abspath(self.plot_path), "climFIR")
+            if not os.path.exists(plot_folder):
+                os.makedirs(plot_folder)
+            for var in data.coords[var_dim]:
+                data.sel({var_dim: var, time_dim: time_dim_slice}).plot()
+                tmp_comb.sel({var_dim: var}).plot()
+                plt.axvline(t0, color="lightgrey")
+                plt.title(str(var.values))
+                plot_name = os.path.join(plot_folder, f"climFIR_{self.plot_name}_{str(var.values)}_{plot_index}.pdf")
+                plt.savefig(plot_name, dpi=300)
+                plt.close('all')
+        except:
+            pass
+
     @property
     def filter_coefficients(self):
         return self._h
-- 
GitLab


From 15dc6218e134d80b10114d385f158421e4b3c7ca Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 6 May 2021 10:48:08 +0200
Subject: [PATCH 108/175] implemented vectorized version of fir filter for
 faster computation

---
 mlair/helpers/filter.py | 165 ++++++++++++++++++++++++++++++++++------
 1 file changed, 141 insertions(+), 24 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 5b521f7d..df5522da 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -1,6 +1,6 @@
 import gc
 import warnings
-from typing import Union
+from typing import Union, Callable
 import logging
 import os
 
@@ -55,7 +55,7 @@ class FIRFilter:
 class ClimateFIRFilter:
 
     def __init__(self, data, fs, order, cutoff, window, time_dim, var_dim, apriori=None, apriori_type=None,
-                 sel_opts=None, plot_path=None, plot_name=None):
+                 sel_opts=None, plot_path=None, plot_name=None, vectorized=True, padlen_factor=0.8):
         """
         :param data: data to filter
         :param fs: sampling frequency in 1/days -> 1d: fs=1 -> 1H: fs=24
@@ -82,9 +82,11 @@ class ClimateFIRFilter:
         input_data = data.__deepcopy__()
         for i in range(len(order)):
             # calculate climatological filter
-            fi, hi, apriori = self.clim_filter(input_data, fs, cutoff[i], order[i], apriori=apriori_list[i],
-                                               sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window,
-                                               var_dim=var_dim, plot_index=i)
+            clim_filter: Callable = {True: self.clim_filter_vectorized, False: self.clim_filter}[vectorized]
+            fi, hi, apriori = clim_filter(input_data.sel({time_dim: slice("2006")}), fs, cutoff[i], order[i],
+                                          apriori=apriori_list[i],
+                                          sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window,
+                                          var_dim=var_dim, plot_index=i, padlen_factor=padlen_factor)
             filtered.append(fi)
             h.append(hi)
 
@@ -196,7 +198,8 @@ class ClimateFIRFilter:
 
         return apriori
 
-    def clim_filter(self, data, fs, cutoff_high, order, apriori=None, padlen=None, sel_opts=None, sampling="1d",
+    @TimeTrackingWrapper
+    def clim_filter(self, data, fs, cutoff_high, order, apriori=None, padlen_factor=0.5, sel_opts=None, sampling="1d",
                     time_dim="datetime", var_dim="variables", window="hamming", plot_index=None):
 
         # calculate apriori information from data if not given and extend its range if not sufficient long enough
@@ -225,7 +228,7 @@ class ClimateFIRFilter:
                 tmp_hist = data.sel({time_dim: t_hist})
                 tmp_fut = apriori.sel({time_dim: t_fut})
                 tmp_comb = xr.concat([tmp_hist, tmp_fut], dim=time_dim)
-                _padlen = padlen if padlen is not None else int(0.5 * len(tmp_comb.coords[time_dim]))
+                _padlen = int(min(padlen_factor, 1) * len(tmp_comb.coords[time_dim]))
                 tmp_filter, _ = fir_filter(tmp_comb, fs, cutoff_high=cutoff_high, order=order, causal=False,
                                            padlen=_padlen, dim=var_dim, window=window, h=h)
                 res.loc[{time_dim: t0}] = tmp_filter.loc[{time_dim: t0}]
@@ -235,16 +238,97 @@ class ClimateFIRFilter:
                 res.loc[{time_dim: t0}] = np.nan
         return res, h, apriori
 
+    @TimeTrackingWrapper
+    def clim_filter_vectorized(self, data, fs, cutoff_high, order, apriori=None, padlen_factor=0.5, sel_opts=None,
+                               sampling="1d", time_dim="datetime", var_dim="variables", window="hamming",
+                               plot_index=None):
+
+        # calculate apriori information from data if not given and extend its range if not sufficient long enough
+        if apriori is None:
+            apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
+        apriori = self.extend_apriori(data, apriori, time_dim)
+
+        # calculate FIR filter coefficients
+        h = signal.firwin(order, cutoff_high, pass_zero="lowpass", fs=fs, window=window)
+        length = len(h)
+
+        # create tmp dimension to apply filter, search for unused name
+        new_dim = self._create_tmp_dimension(data)
+
+        # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
+        history = self._shift_data(data, range(-length, 1), time_dim, var_dim, new_dim)
+        future = self._shift_data(apriori, range(1, length + 1), time_dim, var_dim, new_dim)
+        filter_input_data = history.combine_first(future)
+
+        # apply vectorized fir filter along the tmp dimension
+        filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data,
+                              input_core_dims=[[new_dim]], output_core_dims=[[new_dim]], vectorize=True,
+                              kwargs={"fs": fs, "cutoff_high": cutoff_high, "order": order,
+                                      "causal": False, "padlen": int(min(padlen_factor, 1) * length)})
+
+        # plot
+        if self.plot_path is not None:
+            pos = 720
+            filter_example = filter_input_data.isel({time_dim: pos})
+            t0 = filter_example.coords[time_dim].values
+            t_slice = filter_input_data.isel({time_dim: slice(pos - length, pos + length + 1)}).coords[time_dim].values
+            self.plot(data, filter_example, var_dim, time_dim, t_slice, t0, plot_index)
+
+        # select only values at tmp dimension 0 at each point in time
+        return filt.sel({new_dim: 0}, drop=True), h, apriori
+
+    @staticmethod
+    def _create_tmp_dimension(data):
+        new_dim = "window"
+        count = 0
+        while new_dim in data.dims:
+            new_dim += new_dim
+            count += 1
+            if count > 10:
+                raise ValueError("Could not create new dimension.")
+        return new_dim
+
+    def _shift_data(self, data, index_value, time_dim, squeeze_dim, new_dim):
+        coll = []
+        for i in index_value:
+            coll.append(data.shift({time_dim: -i}))
+        new_ind = self.create_index_array(new_dim, index_value, squeeze_dim)
+        return xr.concat(coll, dim=new_ind)
+
+    @staticmethod
+    def create_index_array(index_name: str, index_value, squeeze_dim: str):
+        ind = pd.DataFrame({'val': index_value}, index=index_value)
+        res = xr.Dataset.from_dataframe(ind).to_array(squeeze_dim).rename({'index': index_name}).squeeze(
+            dim=squeeze_dim,
+            drop=True)
+        res.name = index_name
+        return res
+
     def plot(self, data, tmp_comb, var_dim, time_dim, time_dim_slice, t0, plot_index):
         try:
             plot_folder = os.path.join(os.path.abspath(self.plot_path), "climFIR")
             if not os.path.exists(plot_folder):
                 os.makedirs(plot_folder)
             for var in data.coords[var_dim]:
-                data.sel({var_dim: var, time_dim: time_dim_slice}).plot()
-                tmp_comb.sel({var_dim: var}).plot()
-                plt.axvline(t0, color="lightgrey")
-                plt.title(str(var.values))
+                time_axis = data.sel({var_dim: var, time_dim: time_dim_slice}).coords[time_dim].values
+                rc_params = {'axes.labelsize': 'large',
+                             'xtick.labelsize': 'large',
+                             'ytick.labelsize': 'large',
+                             'legend.fontsize': 'large',
+                             'axes.titlesize': 'large',
+                             }
+                plt.rcParams.update(rc_params)
+                fig, ax = plt.subplots()
+                ax.axvline(t0, color="lightgrey", lw=6, label="time of interest ($t_0$)")
+                ax.plot(time_axis, data.sel({var_dim: var, time_dim: time_dim_slice}).values.flatten(),
+                        color="darkgrey", linestyle="--", label="original")
+                ax.plot(time_axis, tmp_comb.sel({var_dim: var}).values.flatten(), color="black", label="filter input")
+                # data.sel({var_dim: var, time_dim: time_dim_slice}).plot()
+                # tmp_comb.sel({var_dim: var}).plot()
+                plt.title(f"Input of ClimFilter ({str(var.values)})")
+                plt.legend()
+                fig.autofmt_xdate()
+                plt.tight_layout()
                 plot_name = os.path.join(plot_folder, f"climFIR_{self.plot_name}_{str(var.values)}_{plot_index}.pdf")
                 plt.savefig(plot_name, dpi=300)
                 plt.close('all')
@@ -270,20 +354,21 @@ class ClimateFIRFilter:
 
 def fir_filter(data, fs, order=5, cutoff_low=None, cutoff_high=None, window="hamming", dim="variables", h=None,
                causal=True, padlen=None):
-    cutoff = []
-    if cutoff_low is not None:
-        cutoff += [cutoff_low]
-    if cutoff_high is not None:
-        cutoff += [cutoff_high]
-    if len(cutoff) == 2:
-        filter_type = "bandpass"
-    elif len(cutoff) == 1 and cutoff_low is not None:
-        filter_type = "highpass"
-    elif len(cutoff) == 1 and cutoff_high is not None:
-        filter_type = "lowpass"
-    else:
-        raise ValueError("Please provide either cutoff_low or cutoff_high.")
+    """Expects xarray."""
     if h is None:
+        cutoff = []
+        if cutoff_low is not None:
+            cutoff += [cutoff_low]
+        if cutoff_high is not None:
+            cutoff += [cutoff_high]
+        if len(cutoff) == 2:
+            filter_type = "bandpass"
+        elif len(cutoff) == 1 and cutoff_low is not None:
+            filter_type = "highpass"
+        elif len(cutoff) == 1 and cutoff_high is not None:
+            filter_type = "lowpass"
+        else:
+            raise ValueError("Please provide either cutoff_low or cutoff_high.")
         h = signal.firwin(order, cutoff, pass_zero=filter_type, fs=fs, window=window)
     filtered = xr.ones_like(data)
     for var in data.coords[dim]:
@@ -297,6 +382,38 @@ def fir_filter(data, fs, order=5, cutoff_low=None, cutoff_high=None, window="ham
     return filtered, h
 
 
+def fir_filter_vectorized(data, fs, order=5, cutoff_low=None, cutoff_high=None, window="hamming", h=None, causal=True,
+                          padlen=None):
+    """Expects numpy array."""
+    sel = ~np.isnan(data)
+    res = np.empty_like(data)
+    if h is None:
+        cutoff = []
+        if cutoff_low is not None:
+            cutoff += [cutoff_low]
+        if cutoff_high is not None:
+            cutoff += [cutoff_high]
+        if len(cutoff) == 2:
+            filter_type = "bandpass"
+        elif len(cutoff) == 1 and cutoff_low is not None:
+            filter_type = "highpass"
+        elif len(cutoff) == 1 and cutoff_high is not None:
+            filter_type = "lowpass"
+        else:
+            raise ValueError("Please provide either cutoff_low or cutoff_high.")
+        h = signal.firwin(order, cutoff, pass_zero=filter_type, fs=fs, window=window)
+    if causal:
+        y = signal.lfilter(h, 1., data[sel])
+    else:
+        padlen = padlen if padlen is not None else 3 * len(h)
+        if sum(sel) <= padlen:
+            y = np.empty_like(data[sel])
+        else:
+            y = signal.filtfilt(h, 1., data[sel], padlen=padlen)
+    res[sel] = y
+    return res
+
+
 class KolmogorovZurbenkoBaseClass:
 
     def __init__(self, df, wl, itr, is_child=False, filter_dim="window"):
-- 
GitLab


From 202d9baa0ec517385994ccae792208608789682b Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 6 May 2021 11:20:37 +0200
Subject: [PATCH 109/175] add log statement to track progress

---
 mlair/helpers/filter.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index df5522da..1e864ee8 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -260,9 +260,10 @@ class ClimateFIRFilter:
         future = self._shift_data(apriori, range(1, length + 1), time_dim, var_dim, new_dim)
         filter_input_data = history.combine_first(future)
 
+        time_axis = filter_input_data.coords["datetime"]
         # apply vectorized fir filter along the tmp dimension
-        filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data,
-                              input_core_dims=[[new_dim]], output_core_dims=[[new_dim]], vectorize=True,
+        filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
+                              input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
                               kwargs={"fs": fs, "cutoff_high": cutoff_high, "order": order,
                                       "causal": False, "padlen": int(min(padlen_factor, 1) * length)})
 
@@ -382,9 +383,13 @@ def fir_filter(data, fs, order=5, cutoff_low=None, cutoff_high=None, window="ham
     return filtered, h
 
 
-def fir_filter_vectorized(data, fs, order=5, cutoff_low=None, cutoff_high=None, window="hamming", h=None, causal=True,
+def fir_filter_vectorized(data, time_stamp, fs, order=5, cutoff_low=None, cutoff_high=None, window="hamming", h=None,
+                          causal=True,
                           padlen=None):
     """Expects numpy array."""
+    pd_date = pd.to_datetime(time_stamp)
+    if pd_date.day == 1 and pd_date.month in [1, 7]:
+        logging.info(time_stamp)
     sel = ~np.isnan(data)
     res = np.empty_like(data)
     if h is None:
-- 
GitLab


From 955426d739358007b9253c63396a021735dc0fb2 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 6 May 2021 13:43:37 +0200
Subject: [PATCH 110/175] added a numpy filter version (should be again faster)

---
 mlair/helpers/filter.py | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 1e864ee8..3f5ee5f3 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -11,7 +11,7 @@ from matplotlib import pyplot as plt
 from scipy import signal
 import xarray as xr
 
-from mlair.helpers import to_list, TimeTrackingWrapper
+from mlair.helpers import to_list, TimeTrackingWrapper, TimeTracking
 
 
 class FIRFilter:
@@ -258,14 +258,19 @@ class ClimateFIRFilter:
         # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
         history = self._shift_data(data, range(-length, 1), time_dim, var_dim, new_dim)
         future = self._shift_data(apriori, range(1, length + 1), time_dim, var_dim, new_dim)
-        filter_input_data = history.combine_first(future)
+        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
+        # filter_input_data = history.combine_first(future)
 
         time_axis = filter_input_data.coords["datetime"]
         # apply vectorized fir filter along the tmp dimension
-        filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
-                              input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
-                              kwargs={"fs": fs, "cutoff_high": cutoff_high, "order": order,
-                                      "causal": False, "padlen": int(min(padlen_factor, 1) * length)})
+        kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
+                  "causal": False, "padlen": int(min(padlen_factor, 1) * length)}
+        with TimeTracking():
+            filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, kwargs)
+        # with TimeTracking():
+        #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
+        #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
+        #                           kwargs=kwargs)
 
         # plot
         if self.plot_path is not None:
@@ -383,13 +388,23 @@ def fir_filter(data, fs, order=5, cutoff_low=None, cutoff_high=None, window="ham
     return filtered, h
 
 
-def fir_filter_vectorized(data, time_stamp, fs, order=5, cutoff_low=None, cutoff_high=None, window="hamming", h=None,
+def fir_filter_numpy_vectorized(filter_input_data, var_dim, kwargs):
+    filt_np = xr.DataArray(np.nan, coords=filter_input_data.coords)
+    for var in filter_input_data.coords[var_dim]:
+        a = np.apply_along_axis(fir_filter_vectorized, 2, filter_input_data.sel({var_dim: var}).values, **kwargs)
+        filt_np.loc[{var_dim: var}] = a
+    return filt_np
+
+
+def fir_filter_vectorized(data, time_stamp=None, fs=1, order=5, cutoff_low=None, cutoff_high=None, window="hamming",
+                          h=None,
                           causal=True,
                           padlen=None):
     """Expects numpy array."""
-    pd_date = pd.to_datetime(time_stamp)
-    if pd_date.day == 1 and pd_date.month in [1, 7]:
-        logging.info(time_stamp)
+    if time_stamp is not None:
+        pd_date = pd.to_datetime(time_stamp)
+        if pd_date.day == 1 and pd_date.month in [1, 7]:
+            logging.info(time_stamp)
     sel = ~np.isnan(data)
     res = np.empty_like(data)
     if h is None:
-- 
GitLab


From 1892f3cbab446347f32aa1bbe54e10f617889936 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 6 May 2021 14:02:14 +0200
Subject: [PATCH 111/175] added debug logging

---
 mlair/helpers/filter.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 3f5ee5f3..30e81320 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -391,6 +391,8 @@ def fir_filter(data, fs, order=5, cutoff_low=None, cutoff_high=None, window="ham
 def fir_filter_numpy_vectorized(filter_input_data, var_dim, kwargs):
     filt_np = xr.DataArray(np.nan, coords=filter_input_data.coords)
     for var in filter_input_data.coords[var_dim]:
+        logging.info(
+            f"{filter_input_data.coords['Stations'].values[0]}: {str(var.values)}")  # ToDo must be removed, just for debug
         a = np.apply_along_axis(fir_filter_vectorized, 2, filter_input_data.sel({var_dim: var}).values, **kwargs)
         filt_np.loc[{var_dim: var}] = a
     return filt_np
-- 
GitLab


From 1215576ee0d14d1ff2679ef88829fbfcdf90ff12 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 6 May 2021 15:09:42 +0200
Subject: [PATCH 112/175] add exception if filter input plot wasn't successful

---
 mlair/helpers/filter.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 30e81320..5d2c440d 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -274,11 +274,15 @@ class ClimateFIRFilter:
 
         # plot
         if self.plot_path is not None:
-            pos = 720
-            filter_example = filter_input_data.isel({time_dim: pos})
-            t0 = filter_example.coords[time_dim].values
-            t_slice = filter_input_data.isel({time_dim: slice(pos - length, pos + length + 1)}).coords[time_dim].values
-            self.plot(data, filter_example, var_dim, time_dim, t_slice, t0, plot_index)
+            try:
+                pos = 720
+                filter_example = filter_input_data.isel({time_dim: pos})
+                t0 = filter_example.coords[time_dim].values
+                t_slice = filter_input_data.isel({time_dim: slice(pos - length, pos + length + 1)}).coords[
+                    time_dim].values
+                self.plot(data, filter_example, var_dim, time_dim, t_slice, t0, plot_index)
+            except IndexError:
+                pass
 
         # select only values at tmp dimension 0 at each point in time
         return filt.sel({new_dim: 0}, drop=True), h, apriori
-- 
GitLab


From f13604b013c53ce2bfc38d649da3b68ae1f3bc7d Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 6 May 2021 15:36:39 +0200
Subject: [PATCH 113/175] removed debug slice

---
 mlair/helpers/filter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 5d2c440d..a66eef77 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -83,7 +83,7 @@ class ClimateFIRFilter:
         for i in range(len(order)):
             # calculate climatological filter
             clim_filter: Callable = {True: self.clim_filter_vectorized, False: self.clim_filter}[vectorized]
-            fi, hi, apriori = clim_filter(input_data.sel({time_dim: slice("2006")}), fs, cutoff[i], order[i],
+            fi, hi, apriori = clim_filter(input_data, fs, cutoff[i], order[i],
                                           apriori=apriori_list[i],
                                           sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window,
                                           var_dim=var_dim, plot_index=i, padlen_factor=padlen_factor)
-- 
GitLab


From e32f012e5ce16b9ca587a5224f0d9c205910f26e Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 6 May 2021 15:54:33 +0200
Subject: [PATCH 114/175] try out dask approach

---
 mlair/helpers/filter.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index a66eef77..0957d41b 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -10,6 +10,7 @@ import pandas as pd
 from matplotlib import pyplot as plt
 from scipy import signal
 import xarray as xr
+import dask.array as da
 
 from mlair.helpers import to_list, TimeTrackingWrapper, TimeTracking
 
@@ -267,10 +268,10 @@ class ClimateFIRFilter:
                   "causal": False, "padlen": int(min(padlen_factor, 1) * length)}
         with TimeTracking():
             filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, kwargs)
-        # with TimeTracking():
-        #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
-        #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
-        #                           kwargs=kwargs)
+        with TimeTracking():
+            filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
+                                  input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
+                                  kwargs=kwargs)
 
         # plot
         if self.plot_path is not None:
@@ -397,7 +398,7 @@ def fir_filter_numpy_vectorized(filter_input_data, var_dim, kwargs):
     for var in filter_input_data.coords[var_dim]:
         logging.info(
             f"{filter_input_data.coords['Stations'].values[0]}: {str(var.values)}")  # ToDo must be removed, just for debug
-        a = np.apply_along_axis(fir_filter_vectorized, 2, filter_input_data.sel({var_dim: var}).values, **kwargs)
+        a = da.apply_along_axis(fir_filter_vectorized, 2, filter_input_data.sel({var_dim: var}).values, **kwargs)
         filt_np.loc[{var_dim: var}] = a
     return filt_np
 
-- 
GitLab


From 4ab948e3007a1c02b9f887b888d6a07616d1aee5 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 6 May 2021 16:24:49 +0200
Subject: [PATCH 115/175] maybe found axis bug

---
 mlair/helpers/filter.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 0957d41b..82f0020f 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -267,11 +267,11 @@ class ClimateFIRFilter:
         kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
                   "causal": False, "padlen": int(min(padlen_factor, 1) * length)}
         with TimeTracking():
-            filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, kwargs)
-        with TimeTracking():
-            filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
-                                  input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
-                                  kwargs=kwargs)
+            filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs)
+        # with TimeTracking():
+        #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
+        #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
+        #                           kwargs=kwargs)
 
         # plot
         if self.plot_path is not None:
@@ -393,12 +393,13 @@ def fir_filter(data, fs, order=5, cutoff_low=None, cutoff_high=None, window="ham
     return filtered, h
 
 
-def fir_filter_numpy_vectorized(filter_input_data, var_dim, kwargs):
+def fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs):
     filt_np = xr.DataArray(np.nan, coords=filter_input_data.coords)
     for var in filter_input_data.coords[var_dim]:
         logging.info(
             f"{filter_input_data.coords['Stations'].values[0]}: {str(var.values)}")  # ToDo must be removed, just for debug
-        a = da.apply_along_axis(fir_filter_vectorized, 2, filter_input_data.sel({var_dim: var}).values, **kwargs)
+        a = np.apply_along_axis(fir_filter_vectorized, filter_input_data.dims.index(new_dim),
+                                filter_input_data.sel({var_dim: var}).values, **kwargs)
         filt_np.loc[{var_dim: var}] = a
     return filt_np
 
-- 
GitLab


From 0682b90348165ed5f19f5d04b5708a50abc2a9e3 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 6 May 2021 17:13:22 +0200
Subject: [PATCH 116/175] speed comparison

---
 mlair/helpers/filter.py | 46 +++++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 82f0020f..1434b6a8 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -265,13 +265,13 @@ class ClimateFIRFilter:
         time_axis = filter_input_data.coords["datetime"]
         # apply vectorized fir filter along the tmp dimension
         kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
-                  "causal": False, "padlen": int(min(padlen_factor, 1) * length)}
-        with TimeTracking():
+                  "causal": False, "padlen": int(min(padlen_factor, 1) * length), "h": h}
+        with TimeTracking(name="numpy_vec"):
             filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs)
-        # with TimeTracking():
-        #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
-        #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
-        #                           kwargs=kwargs)
+        with TimeTracking(name="xr_apply_ufunc"):
+            filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
+                                  input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
+                                  kwargs=kwargs)
 
         # plot
         if self.plot_path is not None:
@@ -413,8 +413,8 @@ def fir_filter_vectorized(data, time_stamp=None, fs=1, order=5, cutoff_low=None,
         pd_date = pd.to_datetime(time_stamp)
         if pd_date.day == 1 and pd_date.month in [1, 7]:
             logging.info(time_stamp)
-    sel = ~np.isnan(data)
-    res = np.empty_like(data)
+    # sel = ~np.isnan(data)
+    # res = np.empty_like(data)
     if h is None:
         cutoff = []
         if cutoff_low is not None:
@@ -431,13 +431,33 @@ def fir_filter_vectorized(data, time_stamp=None, fs=1, order=5, cutoff_low=None,
             raise ValueError("Please provide either cutoff_low or cutoff_high.")
         h = signal.firwin(order, cutoff, pass_zero=filter_type, fs=fs, window=window)
     if causal:
-        y = signal.lfilter(h, 1., data[sel])
+        # y = signal.lfilter(h, 1., data[sel])
+        y = signal.lfilter(h, 1., data)
     else:
         padlen = padlen if padlen is not None else 3 * len(h)
-        if sum(sel) <= padlen:
-            y = np.empty_like(data[sel])
-        else:
-            y = signal.filtfilt(h, 1., data[sel], padlen=padlen)
+        # if sum(sel) <= padlen:
+        #     y = np.empty_like(data[sel])
+        # else:
+        #     with TimeTracking():
+        #         y = signal.filtfilt(h, 1., data[sel], padlen=padlen)
+    y = signal.filtfilt(h, 1., data, padlen=padlen)
+    # res[sel] = y
+    # return res
+    return y
+
+
+def fir_filter_vectorized_short(data, time_stamp=None, fs=1, order=5, cutoff_low=None, cutoff_high=None,
+                                window="hamming",
+                                h=None,
+                                causal=True,
+                                padlen=None):
+    """Expects numpy array."""
+    sel = ~np.isnan(data)
+    res = np.empty_like(data)
+    if sum(sel) <= padlen:
+        y = np.empty_like(data[sel])
+    else:
+        y = signal.filtfilt(h, 1., data[sel], padlen=padlen)
     res[sel] = y
     return res
 
-- 
GitLab


From d28ad0a0b36fc1084f67304b10443f8499fc3ec8 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 6 May 2021 22:48:27 +0200
Subject: [PATCH 117/175] new try with convolve

---
 mlair/helpers/filter.py | 46 +++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 1434b6a8..84b97295 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -3,6 +3,7 @@ import warnings
 from typing import Union, Callable
 import logging
 import os
+import time
 
 import datetime
 import numpy as np
@@ -266,12 +267,19 @@ class ClimateFIRFilter:
         # apply vectorized fir filter along the tmp dimension
         kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
                   "causal": False, "padlen": int(min(padlen_factor, 1) * length), "h": h}
-        with TimeTracking(name="numpy_vec"):
-            filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs)
-        with TimeTracking(name="xr_apply_ufunc"):
-            filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
-                                  input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
-                                  kwargs=kwargs)
+        # with TimeTracking(name="numpy_vec"):
+        #     filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs)
+        # with TimeTracking(name="xr_apply_ufunc"):
+        #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
+        #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
+        #                           kwargs=kwargs)
+        with TimeTracking(name="convolve"):
+            slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
+            filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel(window=slicer),
+                                  input_core_dims=[["window"]],
+                                  output_core_dims=[["window"]],
+                                  vectorize=True,
+                                  kwargs={"h": h})
 
         # plot
         if self.plot_path is not None:
@@ -404,6 +412,10 @@ def fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs):
     return filt_np
 
 
+def fir_filter_convolve_vectorized(data, h):
+    return signal.convolve(data, h, mode='same', method="direct") / sum(h)
+
+
 def fir_filter_vectorized(data, time_stamp=None, fs=1, order=5, cutoff_low=None, cutoff_high=None, window="hamming",
                           h=None,
                           causal=True,
@@ -435,31 +447,21 @@ def fir_filter_vectorized(data, time_stamp=None, fs=1, order=5, cutoff_low=None,
         y = signal.lfilter(h, 1., data)
     else:
         padlen = padlen if padlen is not None else 3 * len(h)
+        # print(sum(sel))
         # if sum(sel) <= padlen:
         #     y = np.empty_like(data[sel])
         # else:
-        #     with TimeTracking():
-        #         y = signal.filtfilt(h, 1., data[sel], padlen=padlen)
-    y = signal.filtfilt(h, 1., data, padlen=padlen)
+        #     y = signal.filtfilt(h, 1., data[sel], padlen=padlen)
+        y = signal.filtfilt(h, 1., data, padlen=padlen)
     # res[sel] = y
     # return res
     return y
 
 
-def fir_filter_vectorized_short(data, time_stamp=None, fs=1, order=5, cutoff_low=None, cutoff_high=None,
-                                window="hamming",
-                                h=None,
-                                causal=True,
-                                padlen=None):
+def fir_filter_vectorized_short(data, h=None, padlen=None):
     """Expects numpy array."""
-    sel = ~np.isnan(data)
-    res = np.empty_like(data)
-    if sum(sel) <= padlen:
-        y = np.empty_like(data[sel])
-    else:
-        y = signal.filtfilt(h, 1., data[sel], padlen=padlen)
-    res[sel] = y
-    return res
+    y = signal.filtfilt(h, 1., data, padlen=padlen)
+    return y
 
 
 class KolmogorovZurbenkoBaseClass:
-- 
GitLab


From 82c4ba1e2844b1e9b23bcc23654d8df8ca2fae29 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Mon, 17 May 2021 18:25:29 +0200
Subject: [PATCH 118/175] new data handler for mixed sampling and climate fir,
 fine tuning is required for new parameter apriori_diurnal

---
 .../data_handler_mixed_sampling.py            | 39 +++++++-
 .../data_handler/data_handler_with_filter.py  | 11 ++-
 mlair/helpers/filter.py                       | 99 ++++++++++++++-----
 3 files changed, 120 insertions(+), 29 deletions(-)

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 718a8f3e..565a50df 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -3,7 +3,7 @@ __date__ = '2020-11-05'
 
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
 from mlair.data_handler.data_handler_with_filter import DataHandlerKzFilterSingleStation, \
-    DataHandlerFirFilterSingleStation, DataHandlerFilterSingleStation
+    DataHandlerFirFilterSingleStation, DataHandlerFilterSingleStation, DataHandlerClimateFirFilterSingleStation
 from mlair.data_handler import DefaultDataHandler
 from mlair import helpers
 from mlair.helpers import remove_items
@@ -221,6 +221,43 @@ class DataHandlerMixedSamplingWithFirFilter(DefaultDataHandler):
     _requirements = data_handler.requirements()
 
 
+class DataHandlerMixedSamplingWithClimateFirFilterSingleStation(DataHandlerMixedSamplingWithFilterSingleStation,
+                                                                DataHandlerClimateFirFilterSingleStation):
+    _requirements1 = DataHandlerClimateFirFilterSingleStation.requirements()
+    _requirements2 = DataHandlerMixedSamplingWithFilterSingleStation.requirements()
+    _requirements = list(set(_requirements1 + _requirements2))
+
+    def estimate_filter_width(self):
+        """Filter width is determined by the filter with the highest order."""
+        return max(self.filter_order)
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def _extract_lazy(self, lazy_data):
+        _data, _meta, _input_data, _target_data, self.climate_filter_coeff, self.apriori, self.all_apriori = lazy_data
+        DataHandlerSingleStation._extract_lazy(self, (_data, _meta, _input_data, _target_data))
+
+    @staticmethod
+    def _get_fs(**kwargs):
+        """Return frequency in 1/day (not Hz)"""
+        sampling = kwargs.get("sampling")[0]
+        if sampling == "daily":
+            return 1
+        elif sampling == "hourly":
+            return 24
+        else:
+            raise ValueError(f"Unknown sampling rate {sampling}. Only daily and hourly resolution is supported.")
+
+
+class DataHandlerMixedSamplingWithClimateFirFilter(DefaultDataHandler):
+    """Data handler using mixed sampling for input and target. Inputs are temporal filtered."""
+
+    data_handler = DataHandlerMixedSamplingWithClimateFirFilterSingleStation
+    data_handler_transformation = DataHandlerMixedSamplingWithClimateFirFilterSingleStation
+    _requirements = data_handler.requirements()
+
+
 class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithKzFilterSingleStation):
     """
     Data handler using mixed sampling for input and target. Inputs are temporal filtered and depending on the
diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 7be76082..097c0da7 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -295,16 +295,20 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
     :param apriori_type: set type of information that is provided to the clim filter. For the first low pass always a
         calculated or given statistic is used. For residuum prediction a constant value of zero is assumed if
         apriori_type is None or `zeros`, and a climatology of the residuum is used for `residuum_stats`.
+    :param apriori_diurnal: use diurnal anomalies of each hour as addition to the apriori information type chosen by
+        parameter apriori_type. This is only applicable for hourly resolution data.
     """
 
     _requirements = remove_items(DataHandlerFirFilterSingleStation.requirements(), "station")
-    _hash = DataHandlerFirFilterSingleStation._hash + ["apriori_type", "apriori_sel_opts"]
+    _hash = DataHandlerFirFilterSingleStation._hash + ["apriori_type", "apriori_sel_opts", "apriori_diurnal"]
     _store_attributes = DataHandlerFirFilterSingleStation.store_attributes() + ["apriori"]
 
-    def __init__(self, *args, apriori=None, apriori_type=None, apriori_sel_opts=None, plot_path=None, **kwargs):
+    def __init__(self, *args, apriori=None, apriori_type=None, apriori_diurnal=False, apriori_sel_opts=None,
+                 plot_path=None, **kwargs):
         self.apriori_type = apriori_type
         self.climate_filter_coeff = None  # coefficents of the used FIR filter
         self.apriori = apriori  # exogenous apriori information or None to calculate from data (endogenous)
+        self.apriori_diurnal = apriori_diurnal
         self.all_apriori = None  # collection of all apriori information
         self.apriori_sel_opts = apriori_sel_opts  # ensure to separate exogenous and endogenous information
         self.plot_path = plot_path  # use this path to create insight plots
@@ -317,7 +321,8 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         climate_filter = ClimateFIRFilter(self.input_data, self.fs, self.filter_order, self.filter_cutoff_freq,
                                           self.filter_window_type, time_dim=self.time_dim, var_dim=self.target_dim,
                                           apriori_type=self.apriori_type, apriori=self.apriori,
-                                          sel_opts=self.apriori_sel_opts, plot_path=self.plot_path, plot_name=str(self))
+                                          apriori_diurnal=self.apriori_diurnal, sel_opts=self.apriori_sel_opts,
+                                          plot_path=self.plot_path, plot_name=str(self))
         self.climate_filter_coeff = climate_filter.filter_coefficients
 
         # store apriori information: store all if residuum_stat method was used, otherwise just store initial apriori
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 84b97295..b26b616f 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -57,7 +57,8 @@ class FIRFilter:
 class ClimateFIRFilter:
 
     def __init__(self, data, fs, order, cutoff, window, time_dim, var_dim, apriori=None, apriori_type=None,
-                 sel_opts=None, plot_path=None, plot_name=None, vectorized=True, padlen_factor=0.8):
+                 apriori_diurnal=False, sel_opts=None, plot_path=None, plot_name=None, vectorized=True,
+                 padlen_factor=0.8):
         """
         :param data: data to filter
         :param fs: sampling frequency in 1/days -> 1d: fs=1 -> 1H: fs=24
@@ -71,6 +72,8 @@ class ClimateFIRFilter:
         :param apriori_type: type of apriori information to use. Climatology will be used always for first low pass. For
             the residuum either the value zero is used (apriori_type is None or "zeros") or a climatology on the
             residua is used ("residuum_stats").
+        :param apriori_diurnal: Use diurnal cycle as additional apriori information (only applicable for hourly
+            resoluted data). The mean anomaly of each hour is added to the apriori_type information.
         """
         self.plot_path = plot_path
         self.plot_name = plot_name
@@ -78,8 +81,14 @@ class ClimateFIRFilter:
         h = []
         sel_opts = sel_opts if isinstance(sel_opts, dict) else {time_dim: sel_opts}
         sampling = {1: "1d", 24: "1H"}.get(int(fs))
+        if apriori_diurnal is True and sampling == "1H":
+            diurnal_anomalies = self.create_hourly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim,
+                                                        as_anomaly=True)
+        else:
+            diurnal_anomalies = 0
         if apriori is None:
-            apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
+            apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling,
+                                               time_dim=time_dim) + diurnal_anomalies
         apriori_list = to_list(apriori)
         input_data = data.__deepcopy__()
         for i in range(len(order)):
@@ -97,11 +106,16 @@ class ClimateFIRFilter:
 
             # create new apriori information for next iteration if no further apriori is provided
             if len(apriori_list) <= i + 1:
+                if apriori_diurnal is True and sampling == "1H":
+                    diurnal_anomalies = self.create_hourly_mean(input_data, sel_opts=sel_opts, sampling=sampling,
+                                                                time_dim=time_dim, as_anomaly=True)
+                else:
+                    diurnal_anomalies = 0
                 if apriori_type is None or apriori_type == "zeros":  # zero version
-                    apriori_list.append(xr.zeros_like(apriori_list[i]))
+                    apriori_list.append(xr.zeros_like(apriori_list[i]) + diurnal_anomalies)
                 elif apriori_type == "residuum_stats":  # calculate monthly statistic on residuum
                     apriori_list.append(-self.create_monthly_mean(input_data, sel_opts=sel_opts, sampling=sampling,
-                                                                  time_dim=time_dim))
+                                                                  time_dim=time_dim) + diurnal_anomalies)
                 else:
                     raise ValueError(f"Cannot handle unkown apriori type: {apriori_type}. Please choose from None, "
                                      f"`zeros` or `residuum_stats`.")
@@ -141,20 +155,49 @@ class ClimateFIRFilter:
         # create monthly mean and replace entries in unity array
         monthly_mean = data.groupby(f"{time_dim}.month").mean()
         for month in monthly_mean.month.values:
-            loc = (monthly[f"{time_dim}.month"] == month)
-            monthly.loc[{time_dim: loc}] = monthly_mean.sel(month=month)
-
+            monthly = xr.where((monthly[f"{time_dim}.month"] == month),
+                               monthly_mean.sel(month=month, drop=True),
+                               monthly)
+        # transform monthly information into original sampling rate
+        return monthly.resample({time_dim: sampling}).interpolate()
+
+        # for month in monthly_mean.month.values:
+        #     loc = (monthly[f"{time_dim}.month"] == month)
+        #     monthly.loc[{time_dim: loc}] = monthly_mean.sel(month=month, drop=True)
         # aggregate monthly information (shift by half month, because resample base is last day)
-        return monthly.resample({time_dim: "1m"}).max().resample({time_dim: sampling}).interpolate()
+        # return monthly.resample({time_dim: "1m"}).max().resample({time_dim: sampling}).interpolate()
+
+    @staticmethod
+    def create_hourly_mean(data, sel_opts=None, sampling="1H", time_dim="datetime", as_anomaly=True):
+        """Calculate hourly statistics. Either the absolute value or the anomaly (as_anomaly=True)."""
+        # can only be used for hourly sampling rate
+        assert sampling == "1H"
+
+        # create unity xarray in hourly resolution
+        hourly = xr.ones_like(data)
+
+        # apply selection if given (only use subset for hourly means)
+        if sel_opts is not None:
+            data = data.sel(**sel_opts)
+
+        # create mean for each hour and replace entries in unity array, calculate anomaly if enabled
+        hourly_mean = data.groupby(f"{time_dim}.hour").mean()
+        if as_anomaly is True:
+            hourly_mean = hourly_mean - hourly_mean.mean("hour")
+        for hour in hourly_mean.hour.values:
+            loc = (hourly[f"{time_dim}.hour"] == hour)
+            hourly.loc[{f"{time_dim}": loc}] = hourly_mean.sel(hour=hour)
+        return hourly
 
     @staticmethod
-    def extend_apriori(data, apriori, time_dim):
+    def extend_apriori(data, apriori, time_dim, sampling="1d"):
         """
         Extend time range of apriori information.
 
         This method will fail, if apriori is available for a shorter period than the gab to fill.
         """
         dates = data.coords[time_dim].values
+        td_type = {"1d": "D", "1H": "h"}.get(sampling)
 
         # apriori starts after data
         if dates[0] < apriori.coords[time_dim].values[0]:
@@ -164,8 +207,8 @@ class ClimateFIRFilter:
             coords = apriori.coords
 
             # create new time axis
-            start = coords[time_dim][0].values.astype("datetime64[D]") - np.timedelta64(extend_range, "D")
-            end = coords[time_dim][0].values.astype("datetime64[D]")
+            start = coords[time_dim][0].values.astype("datetime64[%s]" % td_type) - np.timedelta64(extend_range, "D")
+            end = coords[time_dim][0].values.astype("datetime64[%s]" % td_type)
             new_time_axis = np.arange(start, end).astype("datetime64[ns]")
 
             # extract old values to use with new axis
@@ -185,13 +228,16 @@ class ClimateFIRFilter:
             coords = apriori.coords
 
             # create new time axis
-            start = coords[time_dim][-1].values.astype("datetime64[D]")
-            end = coords[time_dim][-1].values.astype("datetime64[D]") + np.timedelta64(extend_range, "D")
+            factor = 1 if td_type == "D" else 24
+            start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type)
+            end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) + np.timedelta64(extend_range * factor,
+                                                                                                  td_type)
             new_time_axis = np.arange(start, end).astype("datetime64[ns]")
 
             # extract old values to use with new axis
-            start = coords[time_dim][-1].values.astype("datetime64[D]") - np.timedelta64(extend_range - 1, "D")
-            end = coords[time_dim][-1].values.astype("datetime64[D]")
+            start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) - np.timedelta64(
+                extend_range * factor - 1, td_type)
+            end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type)
             new_values = apriori.sel({time_dim: slice(start, end)})
             new_values.coords[time_dim] = new_time_axis
 
@@ -207,7 +253,7 @@ class ClimateFIRFilter:
         # calculate apriori information from data if not given and extend its range if not sufficient long enough
         if apriori is None:
             apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
-        apriori = self.extend_apriori(data, apriori, time_dim)
+        apriori = self.extend_apriori(data, apriori, time_dim, sampling)
 
         # calculate FIR filter coefficients
         h = signal.firwin(order, cutoff_high, pass_zero="lowpass", fs=fs, window=window)
@@ -248,7 +294,7 @@ class ClimateFIRFilter:
         # calculate apriori information from data if not given and extend its range if not sufficient long enough
         if apriori is None:
             apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
-        apriori = self.extend_apriori(data, apriori, time_dim)
+        apriori = self.extend_apriori(data, apriori, time_dim, sampling)
 
         # calculate FIR filter coefficients
         h = signal.firwin(order, cutoff_high, pass_zero="lowpass", fs=fs, window=window)
@@ -258,12 +304,14 @@ class ClimateFIRFilter:
         new_dim = self._create_tmp_dimension(data)
 
         # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
-        history = self._shift_data(data, range(-length, 1), time_dim, var_dim, new_dim)
-        future = self._shift_data(apriori, range(1, length + 1), time_dim, var_dim, new_dim)
+        history = self._shift_data(data, range(int(-(length - 1) / 2), 1), time_dim, var_dim, new_dim)
+        future = self._shift_data(apriori, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim)
         filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
         # filter_input_data = history.combine_first(future)
+        # history.sel(datetime=slice("2010-11-01", "2011-04-01"),variables="o3").plot()
+        # filter_input_data.sel(datetime=slice("2009-11-01", "2011-04-01"),variables="temp").plot()
 
-        time_axis = filter_input_data.coords["datetime"]
+        time_axis = filter_input_data.coords[time_dim]
         # apply vectorized fir filter along the tmp dimension
         kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
                   "causal": False, "padlen": int(min(padlen_factor, 1) * length), "h": h}
@@ -275,19 +323,20 @@ class ClimateFIRFilter:
         #                           kwargs=kwargs)
         with TimeTracking(name="convolve"):
             slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
-            filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel(window=slicer),
-                                  input_core_dims=[["window"]],
-                                  output_core_dims=[["window"]],
+            filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel({new_dim: slicer}),
+                                  input_core_dims=[[new_dim]],
+                                  output_core_dims=[[new_dim]],
                                   vectorize=True,
                                   kwargs={"h": h})
 
         # plot
         if self.plot_path is not None:
             try:
-                pos = 720
+                pos = 720 * fs
                 filter_example = filter_input_data.isel({time_dim: pos})
                 t0 = filter_example.coords[time_dim].values
-                t_slice = filter_input_data.isel({time_dim: slice(pos - length, pos + length + 1)}).coords[
+                t_slice = filter_input_data.isel(
+                    {time_dim: slice(pos - int((length - 1) / 2), pos + int((length - 1) / 2) + 1)}).coords[
                     time_dim].values
                 self.plot(data, filter_example, var_dim, time_dim, t_slice, t0, plot_index)
             except IndexError:
-- 
GitLab


From c7b75f3e6a0b018b6cb711f09a4bfc8bb1a3c774 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 18 May 2021 14:28:26 +0200
Subject: [PATCH 119/175] filter plots different seasons, filter returns data
 with missing values now (not squeezed version)

---
 mlair/helpers/filter.py | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index b26b616f..b77a5910 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -230,13 +230,16 @@ class ClimateFIRFilter:
             # create new time axis
             factor = 1 if td_type == "D" else 24
             start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type)
-            end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) + np.timedelta64(extend_range * factor,
-                                                                                                  td_type)
+            end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) + np.timedelta64(
+                extend_range * factor + 1,
+                td_type)
             new_time_axis = np.arange(start, end).astype("datetime64[ns]")
 
             # extract old values to use with new axis
             start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) - np.timedelta64(
-                extend_range * factor - 1, td_type)
+                extend_range * factor, td_type)
+            # start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) - np.timedelta64(
+            #     extend_range * factor, td_type)
             end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type)
             new_values = apriori.sel({time_dim: slice(start, end)})
             new_values.coords[time_dim] = new_time_axis
@@ -331,19 +334,24 @@ class ClimateFIRFilter:
 
         # plot
         if self.plot_path is not None:
-            try:
-                pos = 720 * fs
-                filter_example = filter_input_data.isel({time_dim: pos})
-                t0 = filter_example.coords[time_dim].values
-                t_slice = filter_input_data.isel(
-                    {time_dim: slice(pos - int((length - 1) / 2), pos + int((length - 1) / 2) + 1)}).coords[
-                    time_dim].values
-                self.plot(data, filter_example, var_dim, time_dim, t_slice, t0, plot_index)
-            except IndexError:
-                pass
+            for i, time_pos in enumerate([0.25, 1.5, 2.75, 4]):  # [0.25, 1.5, 2.75, 4] x 365 days
+                try:
+                    pos = int(time_pos * 365 * fs)
+                    filter_example = filter_input_data.isel({time_dim: pos})
+                    t0 = filter_example.coords[time_dim].values
+                    t_slice = filter_input_data.isel(
+                        {time_dim: slice(pos - int((length - 1) / 2), pos + int((length - 1) / 2) + 1)}).coords[
+                        time_dim].values
+                    self.plot(data, filter_example, var_dim, time_dim, t_slice, t0, f"{plot_index}_{i}")
+                except IndexError:
+                    pass
 
         # select only values at tmp dimension 0 at each point in time
-        return filt.sel({new_dim: 0}, drop=True), h, apriori
+        res = filt.sel({new_dim: 0}, drop=True)
+        # create result array with same shape like input data, gabs are filled by nans
+        res_full = xr.ones_like(data) * np.nan
+        res_full.loc[res.coords] = res
+        return res_full, h, apriori
 
     @staticmethod
     def _create_tmp_dimension(data):
-- 
GitLab


From 09d8ceda2a860b1ccfbad4ce8a07100ed9c56e55 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 18 May 2021 15:07:20 +0200
Subject: [PATCH 120/175] use less memory intense version for
 clim_filter_vectorized to avoid memory issues

---
 mlair/helpers/filter.py | 76 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 1 deletion(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index b77a5910..ced279cc 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -93,7 +93,8 @@ class ClimateFIRFilter:
         input_data = data.__deepcopy__()
         for i in range(len(order)):
             # calculate climatological filter
-            clim_filter: Callable = {True: self.clim_filter_vectorized, False: self.clim_filter}[vectorized]
+            # clim_filter: Callable = {True: self.clim_filter_vectorized, False: self.clim_filter}[vectorized]
+            clim_filter: Callable = {True: self.clim_filter_vectorized_less_memory, False: self.clim_filter}[vectorized]
             fi, hi, apriori = clim_filter(input_data, fs, cutoff[i], order[i],
                                           apriori=apriori_list[i],
                                           sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window,
@@ -353,6 +354,79 @@ class ClimateFIRFilter:
         res_full.loc[res.coords] = res
         return res_full, h, apriori
 
+    @TimeTrackingWrapper
+    def clim_filter_vectorized_less_memory(self, data, fs, cutoff_high, order, apriori=None, padlen_factor=0.5,
+                                           sel_opts=None,
+                                           sampling="1d", time_dim="datetime", var_dim="variables", window="hamming",
+                                           plot_index=None):
+
+        # calculate apriori information from data if not given and extend its range if not sufficient long enough
+        if apriori is None:
+            apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
+        apriori = self.extend_apriori(data, apriori, time_dim, sampling)
+
+        # calculate FIR filter coefficients
+        h = signal.firwin(order, cutoff_high, pass_zero="lowpass", fs=fs, window=window)
+        length = len(h)
+
+        # create tmp dimension to apply filter, search for unused name
+        new_dim = self._create_tmp_dimension(data)
+
+        coll = []
+
+        for var in data.coords[var_dim].values:
+            d = data.sel({var_dim: [var]})
+            a = apriori.sel({var_dim: [var]})
+
+            # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
+            history = self._shift_data(d, range(int(-(length - 1) / 2), 1), time_dim, var_dim, new_dim)
+            future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim)
+            filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
+            # filter_input_data = history.combine_first(future)
+            # history.sel(datetime=slice("2010-11-01", "2011-04-01"),variables="o3").plot()
+            # filter_input_data.sel(datetime=slice("2009-11-01", "2011-04-01"),variables="temp").plot()
+
+            time_axis = filter_input_data.coords[time_dim]
+            # apply vectorized fir filter along the tmp dimension
+            kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
+                      "causal": False, "padlen": int(min(padlen_factor, 1) * length), "h": h}
+            # with TimeTracking(name="numpy_vec"):
+            #     filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs)
+            # with TimeTracking(name="xr_apply_ufunc"):
+            #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
+            #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
+            #                           kwargs=kwargs)
+            with TimeTracking(name="convolve"):
+                slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
+                filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel({new_dim: slicer}),
+                                      input_core_dims=[[new_dim]],
+                                      output_core_dims=[[new_dim]],
+                                      vectorize=True,
+                                      kwargs={"h": h})
+
+            # plot
+            if self.plot_path is not None:
+                for i, time_pos in enumerate([0.25, 1.5, 2.75, 4]):  # [0.25, 1.5, 2.75, 4] x 365 days
+                    try:
+                        pos = int(time_pos * 365 * fs)
+                        filter_example = filter_input_data.isel({time_dim: pos})
+                        t0 = filter_example.coords[time_dim].values
+                        t_slice = filter_input_data.isel(
+                            {time_dim: slice(pos - int((length - 1) / 2), pos + int((length - 1) / 2) + 1)}).coords[
+                            time_dim].values
+                        self.plot(d, filter_example, var_dim, time_dim, t_slice, t0, f"{plot_index}_{i}")
+                    except IndexError:
+                        pass
+
+            # select only values at tmp dimension 0 at each point in time
+            coll.append(filt.sel({new_dim: 0}, drop=True))
+
+        res = xr.concat(coll, var_dim)
+        # create result array with same shape like input data, gabs are filled by nans
+        res_full = xr.ones_like(data) * np.nan
+        res_full.loc[res.coords] = res
+        return res_full, h, apriori
+
     @staticmethod
     def _create_tmp_dimension(data):
         new_dim = "window"
-- 
GitLab


From 9e4c6db88281e1d489ebe0317ea12241da378703 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 18 May 2021 15:27:16 +0200
Subject: [PATCH 121/175] add gc collect statement

---
 mlair/helpers/filter.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index ced279cc..ff3bb6a5 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -101,6 +101,7 @@ class ClimateFIRFilter:
                                           var_dim=var_dim, plot_index=i, padlen_factor=padlen_factor)
             filtered.append(fi)
             h.append(hi)
+            gc.collect()
 
             # calculate residuum
             input_data = input_data - fi
@@ -396,13 +397,13 @@ class ClimateFIRFilter:
             #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
             #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
             #                           kwargs=kwargs)
-            with TimeTracking(name="convolve"):
-                slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
-                filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel({new_dim: slicer}),
-                                      input_core_dims=[[new_dim]],
-                                      output_core_dims=[[new_dim]],
-                                      vectorize=True,
-                                      kwargs={"h": h})
+            # with TimeTracking(name="convolve"):
+            slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
+            filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel({new_dim: slicer}),
+                                  input_core_dims=[[new_dim]],
+                                  output_core_dims=[[new_dim]],
+                                  vectorize=True,
+                                  kwargs={"h": h})
 
             # plot
             if self.plot_path is not None:
-- 
GitLab


From d31c0176cfdbe05aa13bdcdace9a2214715d0c17 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 18 May 2021 16:25:41 +0200
Subject: [PATCH 122/175] solved lazy error, some station removals have to be
 fixed still

---
 mlair/data_handler/data_handler_mixed_sampling.py |  2 +-
 mlair/helpers/filter.py                           | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 565a50df..03f10eb8 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -236,7 +236,7 @@ class DataHandlerMixedSamplingWithClimateFirFilterSingleStation(DataHandlerMixed
 
     def _extract_lazy(self, lazy_data):
         _data, _meta, _input_data, _target_data, self.climate_filter_coeff, self.apriori, self.all_apriori = lazy_data
-        DataHandlerSingleStation._extract_lazy(self, (_data, _meta, _input_data, _target_data))
+        DataHandlerMixedSamplingWithFilterSingleStation._extract_lazy(self, (_data, _meta, _input_data, _target_data))
 
     @staticmethod
     def _get_fs(**kwargs):
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index ff3bb6a5..9f7b5a6e 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -397,13 +397,13 @@ class ClimateFIRFilter:
             #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
             #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
             #                           kwargs=kwargs)
-            # with TimeTracking(name="convolve"):
-            slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
-            filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel({new_dim: slicer}),
-                                  input_core_dims=[[new_dim]],
-                                  output_core_dims=[[new_dim]],
-                                  vectorize=True,
-                                  kwargs={"h": h})
+            with TimeTracking(name="convolve"):
+                slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
+                filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel({new_dim: slicer}),
+                                      input_core_dims=[[new_dim]],
+                                      output_core_dims=[[new_dim]],
+                                      vectorize=True,
+                                      kwargs={"h": h})
 
             # plot
             if self.plot_path is not None:
-- 
GitLab


From 754e61f0ad927f37bf54c81d8398aff67e528092 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 18 May 2021 16:40:19 +0200
Subject: [PATCH 123/175] added many loggings to find HPC error

---
 mlair/data_handler/data_handler_with_filter.py |  3 ++-
 mlair/helpers/filter.py                        | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 097c0da7..c33da2a9 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -9,7 +9,7 @@ import pandas as pd
 import xarray as xr
 from typing import List, Union, Tuple, Optional
 from functools import partial
-
+import logging
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
 from mlair.data_handler import DefaultDataHandler
 from mlair.helpers import remove_items, to_list, TimeTrackingWrapper
@@ -318,6 +318,7 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
     def apply_filter(self):
         """Apply FIR filter only on inputs."""
         self.apriori = self.apriori.get(str(self)) if isinstance(self.apriori, dict) else self.apriori
+        logging.info(f"{self.station}: call ClimateFIRFilter")
         climate_filter = ClimateFIRFilter(self.input_data, self.fs, self.filter_order, self.filter_cutoff_freq,
                                           self.filter_window_type, time_dim=self.time_dim, var_dim=self.target_dim,
                                           apriori_type=self.apriori_type, apriori=self.apriori,
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 9f7b5a6e..21dfcbee 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -75,23 +75,27 @@ class ClimateFIRFilter:
         :param apriori_diurnal: Use diurnal cycle as additional apriori information (only applicable for hourly
             resoluted data). The mean anomaly of each hour is added to the apriori_type information.
         """
+        logging.info(f"{plot_name}: start init ClimateFIRFilter")
         self.plot_path = plot_path
         self.plot_name = plot_name
         filtered = []
         h = []
         sel_opts = sel_opts if isinstance(sel_opts, dict) else {time_dim: sel_opts}
         sampling = {1: "1d", 24: "1H"}.get(int(fs))
+        logging.info(f"{plot_name}: create diurnal_anomalies")
         if apriori_diurnal is True and sampling == "1H":
             diurnal_anomalies = self.create_hourly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim,
                                                         as_anomaly=True)
         else:
             diurnal_anomalies = 0
+        logging.info(f"{plot_name}: create monthly apriori")
         if apriori is None:
             apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling,
                                                time_dim=time_dim) + diurnal_anomalies
         apriori_list = to_list(apriori)
         input_data = data.__deepcopy__()
         for i in range(len(order)):
+            logging.info(f"{plot_name}: start filter for order {order[i]}")
             # calculate climatological filter
             # clim_filter: Callable = {True: self.clim_filter_vectorized, False: self.clim_filter}[vectorized]
             clim_filter: Callable = {True: self.clim_filter_vectorized_less_memory, False: self.clim_filter}[vectorized]
@@ -99,20 +103,25 @@ class ClimateFIRFilter:
                                           apriori=apriori_list[i],
                                           sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window,
                                           var_dim=var_dim, plot_index=i, padlen_factor=padlen_factor)
+
+            logging.info(f"{plot_name}: finished clim_filter calculation")
             filtered.append(fi)
             h.append(hi)
             gc.collect()
 
             # calculate residuum
+            logging.info(f"{plot_name}: calculate residuum")
             input_data = input_data - fi
 
             # create new apriori information for next iteration if no further apriori is provided
             if len(apriori_list) <= i + 1:
+                logging.info(f"{plot_name}: create diurnal_anomalies")
                 if apriori_diurnal is True and sampling == "1H":
                     diurnal_anomalies = self.create_hourly_mean(input_data, sel_opts=sel_opts, sampling=sampling,
                                                                 time_dim=time_dim, as_anomaly=True)
                 else:
                     diurnal_anomalies = 0
+                logging.info(f"{plot_name}: create monthly apriori")
                 if apriori_type is None or apriori_type == "zeros":  # zero version
                     apriori_list.append(xr.zeros_like(apriori_list[i]) + diurnal_anomalies)
                 elif apriori_type == "residuum_stats":  # calculate monthly statistic on residuum
@@ -361,6 +370,8 @@ class ClimateFIRFilter:
                                            sampling="1d", time_dim="datetime", var_dim="variables", window="hamming",
                                            plot_index=None):
 
+        logging.info(f"{data.coords['Stations'].values[0]}: extend apriori")
+
         # calculate apriori information from data if not given and extend its range if not sufficient long enough
         if apriori is None:
             apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
@@ -376,12 +387,16 @@ class ClimateFIRFilter:
         coll = []
 
         for var in data.coords[var_dim].values:
+            logging.info(f"{data.coords['Stations'].values[0]} ({var}): sel data")
             d = data.sel({var_dim: [var]})
             a = apriori.sel({var_dim: [var]})
 
             # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
+            logging.info(f"{data.coords['Stations'].values[0]} ({var}): history")
             history = self._shift_data(d, range(int(-(length - 1) / 2), 1), time_dim, var_dim, new_dim)
+            logging.info(f"{data.coords['Stations'].values[0]} ({var}): future")
             future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim)
+            logging.info(f"{data.coords['Stations'].values[0]} ({var}): concat to filter input")
             filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
             # filter_input_data = history.combine_first(future)
             # history.sel(datetime=slice("2010-11-01", "2011-04-01"),variables="o3").plot()
@@ -397,6 +412,7 @@ class ClimateFIRFilter:
             #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
             #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
             #                           kwargs=kwargs)
+            logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
             with TimeTracking(name="convolve"):
                 slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
                 filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel({new_dim: slicer}),
@@ -422,8 +438,10 @@ class ClimateFIRFilter:
             # select only values at tmp dimension 0 at each point in time
             coll.append(filt.sel({new_dim: 0}, drop=True))
 
+        logging.info(f"{data.coords['Stations'].values[0]}: concat all variables")
         res = xr.concat(coll, var_dim)
         # create result array with same shape like input data, gabs are filled by nans
+        logging.info(f"{data.coords['Stations'].values[0]}: create res_full")
         res_full = xr.ones_like(data) * np.nan
         res_full.loc[res.coords] = res
         return res_full, h, apriori
-- 
GitLab


From 939f19805a2cd6dd610ec8e44d614295a94cd226 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 18 May 2021 16:51:42 +0200
Subject: [PATCH 124/175] more loggings

---
 mlair/helpers/filter.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 21dfcbee..e47364ac 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -212,20 +212,36 @@ class ClimateFIRFilter:
 
         # apriori starts after data
         if dates[0] < apriori.coords[time_dim].values[0]:
+            logging.info(f"{data.coords['Stations'].values[0]}: apriori starts after data")
             # add difference in full years
             date_diff = abs(dates[0] - apriori.coords[time_dim].values[0]).astype("timedelta64[D]")
             extend_range = np.ceil(date_diff / (np.timedelta64(1, "D") * 365)).astype(int) * 365
             coords = apriori.coords
 
             # create new time axis
-            start = coords[time_dim][0].values.astype("datetime64[%s]" % td_type) - np.timedelta64(extend_range, "D")
+            # start = coords[time_dim][0].values.astype("datetime64[%s]" % td_type) - np.timedelta64(extend_range, "D")
+            # end = coords[time_dim][0].values.astype("datetime64[%s]" % td_type)
+            # new_time_axis = np.arange(start, end).astype("datetime64[ns]")
+
+            factor = 1 if td_type == "D" else 24
+            start = coords[time_dim][0].values.astype("datetime64[%s]" % td_type) - np.timedelta64(
+                extend_range * factor + 1,
+                td_type)
             end = coords[time_dim][0].values.astype("datetime64[%s]" % td_type)
             new_time_axis = np.arange(start, end).astype("datetime64[ns]")
+            logging.info(f"{data.coords['Stations'].values[0]}: shape of new_time_axis = {new_time_axis.shape}")
 
             # extract old values to use with new axis
-            start = coords[time_dim][0].values.astype("datetime64[D]")
-            end = coords[time_dim][0].values.astype("datetime64[D]") + np.timedelta64(extend_range - 1, "D")
+            # start = coords[time_dim][0].values.astype("datetime64[D]")
+            # end = coords[time_dim][0].values.astype("datetime64[D]") + np.timedelta64(extend_range - 1, "D")
+            # new_values = apriori.sel({time_dim: slice(start, end)})
+            # new_values.coords[time_dim] = new_time_axis
+
+            start = coords[time_dim][0].values.astype("datetime64[%s]" % td_type)
+            end = coords[time_dim][0].values.astype("datetime64[%s]" % td_type) + np.timedelta64(
+                extend_range * factor - 1, td_type)
             new_values = apriori.sel({time_dim: slice(start, end)})
+            logging.info(f"{data.coords['Stations'].values[0]}: shape of new_values = {new_values.shape}")
             new_values.coords[time_dim] = new_time_axis
 
             # add new values to apriori
@@ -233,6 +249,7 @@ class ClimateFIRFilter:
 
         # apriori ends before data
         if dates[-1] + np.timedelta64(365, "D") > apriori.coords[time_dim].values[-1]:
+            logging.info(f"{data.coords['Stations'].values[0]}: apriori ends before data")
             # add difference in full years + 1 year (because apriori is used as future estimate)
             date_diff = abs(dates[-1] - apriori.coords[time_dim].values[-1]).astype("timedelta64[D]")
             extend_range = np.ceil(date_diff / (np.timedelta64(1, "D") * 365)).astype(int) * 365 + 365
@@ -245,6 +262,7 @@ class ClimateFIRFilter:
                 extend_range * factor + 1,
                 td_type)
             new_time_axis = np.arange(start, end).astype("datetime64[ns]")
+            logging.info(f"{data.coords['Stations'].values[0]}: shape of new_time_axis = {new_time_axis.shape}")
 
             # extract old values to use with new axis
             start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) - np.timedelta64(
@@ -253,6 +271,7 @@ class ClimateFIRFilter:
             #     extend_range * factor, td_type)
             end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type)
             new_values = apriori.sel({time_dim: slice(start, end)})
+            logging.info(f"{data.coords['Stations'].values[0]}: shape of new_values = {new_values.shape}")
             new_values.coords[time_dim] = new_time_axis
 
             # add new values to apriori
-- 
GitLab


From 9ff5501727b98dea80425c735ea918dee9b173ca Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 18 May 2021 17:05:29 +0200
Subject: [PATCH 125/175] more loggings

---
 mlair/helpers/filter.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index e47364ac..4a6a5044 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -263,6 +263,9 @@ class ClimateFIRFilter:
                 td_type)
             new_time_axis = np.arange(start, end).astype("datetime64[ns]")
             logging.info(f"{data.coords['Stations'].values[0]}: shape of new_time_axis = {new_time_axis.shape}")
+            logging.info(f"{data.coords['Stations'].values[0]}: start of new_time_axis = {start}")
+            logging.info(f"{data.coords['Stations'].values[0]}: end of new_time_axis = {end}")
+            logging.info(f"{data.coords['Stations'].values[0]}: delta of new_time_axis = {end - start}")
 
             # extract old values to use with new axis
             start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) - np.timedelta64(
@@ -272,6 +275,9 @@ class ClimateFIRFilter:
             end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type)
             new_values = apriori.sel({time_dim: slice(start, end)})
             logging.info(f"{data.coords['Stations'].values[0]}: shape of new_values = {new_values.shape}")
+            logging.info(f"{data.coords['Stations'].values[0]}: start of new_values = {start}")
+            logging.info(f"{data.coords['Stations'].values[0]}: end of new_values = {end}")
+            logging.info(f"{data.coords['Stations'].values[0]}: delta of new_values = {end - start}")
             new_values.coords[time_dim] = new_time_axis
 
             # add new values to apriori
-- 
GitLab


From dcfde574847d85e85257a48144fcb093b24429bc Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 18 May 2021 18:06:00 +0200
Subject: [PATCH 126/175] interpolate now takes care, that missing values are
 not dropped but indicated with nans

---
 .../data_handler_mixed_sampling.py            |  2 +-
 .../data_handler_single_station.py            | 21 +++++++++++++++----
 mlair/helpers/filter.py                       | 10 ++++++---
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 03f10eb8..00408684 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -150,7 +150,7 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
                                          self.station_type, self.network, self.store_data_locally, self.data_origin,
                                          start, end)
         data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method[ind],
-                                limit=self.interpolation_limit[ind])
+                                limit=self.interpolation_limit[ind], sampling=self.sampling[ind])
         return data
 
     def _extract_lazy(self, lazy_data):
diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 25822762..4330efd9 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -280,7 +280,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
                                          self.station_type, self.network, self.store_data_locally, self.data_origin,
                                          self.start, self.end)
         self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
-                                      limit=self.interpolation_limit)
+                                      limit=self.interpolation_limit, sampling=self.sampling)
         self.set_inputs_and_targets()
 
     def set_inputs_and_targets(self):
@@ -469,9 +469,8 @@ class DataHandlerSingleStation(AbstractDataHandler):
         all_vars = sorted(statistics_per_var.keys())
         return os.path.join(path, f"{''.join(station)}_{'_'.join(all_vars)}_meta.csv")
 
-    @staticmethod
-    def interpolate(data, dim: str, method: str = 'linear', limit: int = None, use_coordinate: Union[bool, str] = True,
-                    **kwargs):
+    def interpolate(self, data, dim: str, method: str = 'linear', limit: int = None,
+                    use_coordinate: Union[bool, str] = True, sampling="daily", **kwargs):
         """
         Interpolate values according to different methods.
 
@@ -508,8 +507,22 @@ class DataHandlerSingleStation(AbstractDataHandler):
 
         :return: xarray.DataArray
         """
+        data = self.create_full_time_dim(data, dim, sampling)
         return data.interpolate_na(dim=dim, method=method, limit=limit, use_coordinate=use_coordinate, **kwargs)
 
+    @staticmethod
+    def create_full_time_dim(data, dim, sampling):
+        """Ensure time dimension to be equidistant. Sometimes dates if missing values have been dropped."""
+        start = data.coords[dim].values[0]
+        end = data.coords[dim].values[-1]
+        freq = {"daily": "1D", "hourly": "1H"}.get(sampling)
+        datetime_index = pd.DataFrame(index=pd.date_range(start, end, freq=freq))
+        t = data.sel({dim: start}, drop=True)
+        res = xr.DataArray(coords=[datetime_index.index, *[t.coords[c] for c in t.coords]], dims=[dim, *t.coords])
+        res = res.transpose(*data.dims)
+        res.loc[data.coords] = data
+        return res
+
     def make_history_window(self, dim_name_of_inputs: str, window: int, dim_name_of_shift: str) -> None:
         """
         Create a xr.DataArray containing history data.
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 4a6a5044..b6c27cbc 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -92,6 +92,7 @@ class ClimateFIRFilter:
         if apriori is None:
             apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling,
                                                time_dim=time_dim) + diurnal_anomalies
+            logging.info(f"{plot_name}: apriori shape = {apriori.shape}")
         apriori_list = to_list(apriori)
         input_data = data.__deepcopy__()
         for i in range(len(order)):
@@ -137,7 +138,7 @@ class ClimateFIRFilter:
         self._apriori = apriori_list
 
     @staticmethod
-    def create_unity_array(data, time_dim, extend_range=365):
+    def create_unity_array(data, time_dim, extend_range=366):
         """Create a xr data array filled with ones. time_dim is extended by extend_range days in future and past."""
         coords = data.coords
 
@@ -261,7 +262,7 @@ class ClimateFIRFilter:
             end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) + np.timedelta64(
                 extend_range * factor + 1,
                 td_type)
-            new_time_axis = np.arange(start, end).astype("datetime64[ns]")
+            new_time_axis = np.arange(start, end).astype("datetime64[ns]")  # hint: arange does not include end date
             logging.info(f"{data.coords['Stations'].values[0]}: shape of new_time_axis = {new_time_axis.shape}")
             logging.info(f"{data.coords['Stations'].values[0]}: start of new_time_axis = {start}")
             logging.info(f"{data.coords['Stations'].values[0]}: end of new_time_axis = {end}")
@@ -273,14 +274,17 @@ class ClimateFIRFilter:
             # start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) - np.timedelta64(
             #     extend_range * factor, td_type)
             end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type)
-            new_values = apriori.sel({time_dim: slice(start, end)})
+            new_values = apriori.sel({time_dim: slice(start, end)})  # hint: slice includes end date
             logging.info(f"{data.coords['Stations'].values[0]}: shape of new_values = {new_values.shape}")
             logging.info(f"{data.coords['Stations'].values[0]}: start of new_values = {start}")
             logging.info(f"{data.coords['Stations'].values[0]}: end of new_values = {end}")
             logging.info(f"{data.coords['Stations'].values[0]}: delta of new_values = {end - start}")
+
+            logging.info(f"{data.coords['Stations'].values[0]}: set new_time_axis")
             new_values.coords[time_dim] = new_time_axis
 
             # add new values to apriori
+            logging.info(f"{data.coords['Stations'].values[0]}: add to apriori")
             apriori = apriori.combine_first(new_values)
 
         return apriori
-- 
GitLab


From 57999dac60a283a295d30d0db487cb1de463b281 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 19 May 2021 09:43:04 +0200
Subject: [PATCH 127/175] add another gc collect

---
 mlair/helpers/filter.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index b6c27cbc..2fe95ebb 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -466,6 +466,7 @@ class ClimateFIRFilter:
 
             # select only values at tmp dimension 0 at each point in time
             coll.append(filt.sel({new_dim: 0}, drop=True))
+            gc.collect()
 
         logging.info(f"{data.coords['Stations'].values[0]}: concat all variables")
         res = xr.concat(coll, var_dim)
-- 
GitLab


From 1f1e658d7b0457a2268eaac2a820d89002ea78f8 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 19 May 2021 10:20:16 +0200
Subject: [PATCH 128/175] add another gc collect

---
 mlair/helpers/filter.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 2fe95ebb..3cb07140 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -423,8 +423,10 @@ class ClimateFIRFilter:
             # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): history")
             history = self._shift_data(d, range(int(-(length - 1) / 2), 1), time_dim, var_dim, new_dim)
+            gc.collect()
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): future")
             future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim)
+            gc.collect()
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): concat to filter input")
             filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
             # filter_input_data = history.combine_first(future)
-- 
GitLab


From e96d635bbede4295e9abd79232ae35da9f71ac74 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 19 May 2021 10:44:13 +0200
Subject: [PATCH 129/175] add index error to f_proc

---
 mlair/run_modules/pre_processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index d50f6f9a..d21f8920 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -351,7 +351,7 @@ def f_proc(data_handler, station, name_affix, store, **kwargs):
     """
     try:
         res = data_handler.build(station, name_affix=name_affix, store_processed_data=store, **kwargs)
-    except (AttributeError, EmptyQueryResult, KeyError, requests.ConnectionError, ValueError) as e:
+    except (AttributeError, EmptyQueryResult, KeyError, requests.ConnectionError, ValueError, IndexError) as e:
         formatted_lines = traceback.format_exc().splitlines()
         logging.info(
             f"remove station {station} because it raised an error: {e} -> {' | '.join(f_inspect_error(formatted_lines))}")
-- 
GitLab


From df510cdbebce3acff3ebe66419b8494bc8225136 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 20 May 2021 20:34:39 +0200
Subject: [PATCH 130/175] new version now applies along window axis instead of
 just using t0

---
 .../data_handler/data_handler_with_filter.py  |  41 ++++-
 mlair/helpers/filter.py                       | 173 +++++++++++++++---
 2 files changed, 187 insertions(+), 27 deletions(-)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index c33da2a9..576fe9d7 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -323,7 +323,8 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
                                           self.filter_window_type, time_dim=self.time_dim, var_dim=self.target_dim,
                                           apriori_type=self.apriori_type, apriori=self.apriori,
                                           apriori_diurnal=self.apriori_diurnal, sel_opts=self.apriori_sel_opts,
-                                          plot_path=self.plot_path, plot_name=str(self))
+                                          plot_path=self.plot_path, plot_name=str(self),
+                                          minimum_length=self.window_history_size, new_dim=self.window_dim)
         self.climate_filter_coeff = climate_filter.filter_coefficients
 
         # store apriori information: store all if residuum_stat method was used, otherwise just store initial apriori
@@ -332,15 +333,24 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         else:
             self.apriori = climate_filter.initial_apriori_data
         self.all_apriori = climate_filter.apriori_data
-        climate_filter_data = climate_filter.filtered_data
+
+        climate_filter_data = [c.sel({self.window_dim: slice(-self.window_history_size, 0)}) for c in
+                               climate_filter.filtered_data]
+        # climate_filter_data = climate_filter.filtered_data
+
+        # create input data with filter index
+        input_data = xr.concat(climate_filter_data, pd.Index(self.create_filter_index(), name=self.filter_dim))
+        # self.input_data = xr.concat([c.sel(window=slice(-self.window_history_size, 0)) for c in climate_filter_data], pd.Index(self.create_filter_index(), name=self.filter_dim))
 
         # add unfiltered raw data
         if self._add_unfiltered is True:
-            climate_filter_data.append(self.input_data)
+            data_raw = self.shift(self.input_data, self.time_dim, -self.window_history_size)
+            data_raw = data_raw.expand_dims({self.filter_dim: ["unfiltered"]}, -1)
+            input_data = xr.concat([input_data, data_raw], self.filter_dim)
 
-        # create input data with filter index
-        self.input_data = xr.concat(climate_filter_data, pd.Index(self.create_filter_index(), name=self.filter_dim))
+        self.input_data = input_data
 
+        # self.history = self.shift(data, dim_name_of_shift, window, offset=self.window_history_offset)
         # this is just a code snippet to check the results of the filter
         # import matplotlib
         # matplotlib.use("TkAgg")
@@ -397,6 +407,27 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
     def _period_to_freq(cutoff_p):
         return [1. / x for x in cutoff_p]
 
+    def make_history_window(self, dim_name_of_inputs: str, window: int, dim_name_of_shift: str) -> None:
+        """
+        Create a xr.DataArray containing history data.
+
+        Shift the data window+1 times and return a xarray which has a new dimension 'window' containing the shifted
+        data. This is used to represent history in the data. Results are stored in history attribute.
+
+        :param dim_name_of_inputs: Name of dimension which contains the input variables
+        :param window: number of time steps to look back in history
+                Note: window will be treated as negative value. This should be in agreement with looking back on
+                a time line. Nonetheless positive values are allowed but they are converted to its negative
+                expression
+        :param dim_name_of_shift: Dimension along shift will be applied
+        """
+        data = self.input_data
+        sampling = {"daily": "D", "hourly": "h"}.get(to_list(self.sampling)[0])
+        data.coords[dim_name_of_shift] = data.coords[dim_name_of_shift] - np.timedelta64(self.window_history_offset,
+                                                                                         sampling)
+        data.coords[self.window_dim] = data.coords[self.window_dim] + self.window_history_offset
+        self.history = data
+
 
 class DataHandlerClimateFirFilter(DefaultDataHandler):
     """Data handler using climatic adjusted FIR filtered data."""
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 3cb07140..d9226b7c 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -58,7 +58,7 @@ class ClimateFIRFilter:
 
     def __init__(self, data, fs, order, cutoff, window, time_dim, var_dim, apriori=None, apriori_type=None,
                  apriori_diurnal=False, sel_opts=None, plot_path=None, plot_name=None, vectorized=True,
-                 padlen_factor=0.8):
+                 padlen_factor=0.8, minimum_length=None, new_dim=None):
         """
         :param data: data to filter
         :param fs: sampling frequency in 1/days -> 1d: fs=1 -> 1H: fs=24
@@ -95,30 +95,45 @@ class ClimateFIRFilter:
             logging.info(f"{plot_name}: apriori shape = {apriori.shape}")
         apriori_list = to_list(apriori)
         input_data = data.__deepcopy__()
+
+        # create tmp dimension to apply filter, search for unused name
+        new_dim = self._create_tmp_dimension(input_data) if new_dim is None else new_dim
+
         for i in range(len(order)):
             logging.info(f"{plot_name}: start filter for order {order[i]}")
             # calculate climatological filter
             # clim_filter: Callable = {True: self.clim_filter_vectorized, False: self.clim_filter}[vectorized]
+            # ToDo: remove all methods except the vectorized version
             clim_filter: Callable = {True: self.clim_filter_vectorized_less_memory, False: self.clim_filter}[vectorized]
+            _minimum_length = self._minimum_length(order, minimum_length, i)
             fi, hi, apriori = clim_filter(input_data, fs, cutoff[i], order[i],
                                           apriori=apriori_list[i],
                                           sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window,
-                                          var_dim=var_dim, plot_index=i, padlen_factor=padlen_factor)
+                                          var_dim=var_dim, plot_index=i, padlen_factor=padlen_factor,
+                                          minimum_length=_minimum_length, new_dim=new_dim)
 
             logging.info(f"{plot_name}: finished clim_filter calculation")
-            filtered.append(fi)
+            if minimum_length is None:
+                filtered.append(fi)
+            else:
+                filtered.append(fi.sel({new_dim: slice(-minimum_length, 0)}))
             h.append(hi)
             gc.collect()
 
             # calculate residuum
             logging.info(f"{plot_name}: calculate residuum")
-            input_data = input_data - fi
+            coord_range = range(fi.coords[new_dim].values.min(), fi.coords[new_dim].values.max() + 1)
+            if new_dim in input_data.coords:
+                input_data = input_data.sel({new_dim: coord_range}) - fi
+            else:
+                input_data = self._shift_data(input_data, coord_range, time_dim, var_dim, new_dim) - fi
 
             # create new apriori information for next iteration if no further apriori is provided
             if len(apriori_list) <= i + 1:
                 logging.info(f"{plot_name}: create diurnal_anomalies")
                 if apriori_diurnal is True and sampling == "1H":
-                    diurnal_anomalies = self.create_hourly_mean(input_data, sel_opts=sel_opts, sampling=sampling,
+                    diurnal_anomalies = self.create_hourly_mean(input_data.sel({new_dim: 0}, drop=True),
+                                                                sel_opts=sel_opts, sampling=sampling,
                                                                 time_dim=time_dim, as_anomaly=True)
                 else:
                     diurnal_anomalies = 0
@@ -126,17 +141,32 @@ class ClimateFIRFilter:
                 if apriori_type is None or apriori_type == "zeros":  # zero version
                     apriori_list.append(xr.zeros_like(apriori_list[i]) + diurnal_anomalies)
                 elif apriori_type == "residuum_stats":  # calculate monthly statistic on residuum
-                    apriori_list.append(-self.create_monthly_mean(input_data, sel_opts=sel_opts, sampling=sampling,
-                                                                  time_dim=time_dim) + diurnal_anomalies)
+                    apriori_list.append(
+                        -self.create_monthly_mean(input_data.sel({new_dim: 0}, drop=True), sel_opts=sel_opts,
+                                                  sampling=sampling,
+                                                  time_dim=time_dim) + diurnal_anomalies)
                 else:
                     raise ValueError(f"Cannot handle unkown apriori type: {apriori_type}. Please choose from None, "
                                      f"`zeros` or `residuum_stats`.")
         # add last residuum to filtered
-        filtered.append(input_data)
+        if minimum_length is None:
+            filtered.append(input_data)
+        else:
+            filtered.append(input_data.sel({new_dim: slice(-minimum_length, 0)}))
+        # filtered.append(input_data)
         self._filtered = filtered
         self._h = h
         self._apriori = apriori_list
 
+    @staticmethod
+    def _minimum_length(order, minimum_length, pos):
+        next_order = 0
+        if pos + 1 < len(order):
+            next_order = order[pos + 1]
+        if minimum_length is not None:
+            next_order = max(next_order, minimum_length)
+        return next_order if next_order > 0 else None
+
     @staticmethod
     def create_unity_array(data, time_dim, extend_range=366):
         """Create a xr data array filled with ones. time_dim is extended by extend_range days in future and past."""
@@ -393,11 +423,94 @@ class ClimateFIRFilter:
         res_full.loc[res.coords] = res
         return res_full, h, apriori
 
+    def _tmp_analysis(self, data, apriori, var, var_dim, length, time_dim, new_dim, h):
+        logging.info(f"{data.coords['Stations'].values[0]} ({var}): sel data")
+        d = data.sel({var_dim: [var]}).sel(datetime=slice("2007", "2010"))
+        a = apriori.sel({var_dim: [var]}).sel(datetime=slice("2007", "2010"))
+
+        # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
+        history = self._shift_data(d, range(-length, 1), time_dim, var_dim, new_dim)
+
+        future = self._shift_data(d, range(1, length), time_dim, var_dim, new_dim)
+        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
+        logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
+        with TimeTracking(name="convolve"):
+            filt_nc = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data,
+                                     input_core_dims=[[new_dim]],
+                                     output_core_dims=[[new_dim]],
+                                     vectorize=True,
+                                     kwargs={"h": h})
+
+        future = self._shift_data(a, range(1, length), time_dim, var_dim, new_dim)
+        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
+        logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
+        with TimeTracking(name="convolve"):
+            filt_t0 = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data,
+                                     input_core_dims=[[new_dim]],
+                                     output_core_dims=[[new_dim]],
+                                     vectorize=True,
+                                     kwargs={"h": h})
+
+        diff = (a - history.sel(window=slice(-24, 1)).mean(new_dim))
+        future = self._shift_data(a, range(1, length), time_dim, var_dim, new_dim) - diff
+        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
+        logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
+        with TimeTracking(name="convolve"):
+            filt_diff1d = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data,
+                                         input_core_dims=[[new_dim]],
+                                         output_core_dims=[[new_dim]],
+                                         vectorize=True,
+                                         kwargs={"h": h})
+
+        diff = (a - history.sel(window=slice(-24 * 7, 1)).mean(new_dim))
+        future = self._shift_data(a, range(1, length), time_dim, var_dim, new_dim) - diff
+        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
+        logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
+        with TimeTracking(name="convolve"):
+            filt_diff1w = xr.apply_ufunc(fir_filter_convolve_vectorized,
+                                         filter_input_data,
+                                         input_core_dims=[[new_dim]],
+                                         output_core_dims=[[new_dim]],
+                                         vectorize=True,
+                                         kwargs={"h": h})
+
+        diff = (a - history.sel(window=slice(-24 * 7, 1)).mean(new_dim))
+        future = self._shift_data(a, range(1, length), time_dim, var_dim, new_dim)
+        diff = xr.zeros_like(future) + diff
+        lam = np.log(2) / (7 * 24)
+        diff = diff * np.exp(- lam * diff.coords["window"])
+        future = future - diff
+        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
+        logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
+        with TimeTracking(name="convolve"):
+            filt_diff1w_decay = xr.apply_ufunc(fir_filter_convolve_vectorized,
+                                               filter_input_data,
+                                               input_core_dims=[[new_dim]],
+                                               output_core_dims=[[new_dim]],
+                                               vectorize=True,
+                                               kwargs={"h": h})
+
+        t0 = datetime.datetime.strptime("2009-07-15 00:00", "%Y-%m-%d %H:%M")
+        delta = datetime.timedelta(hours=1)
+        for i in range(int((length - 1) / 2)):
+            plt.plot(-i, filt_nc.sel(datetime=t0 - i * delta, window=0), marker="+", color="black")
+        filt_nc.sel(datetime=t0).plot(label="noncausal")
+        filt_t0.sel(datetime=t0).plot(label="nodiff")
+        filt_diff1d.sel(datetime=t0).plot(label="diff1d")
+        filt_diff1w.sel(datetime=t0).plot(label="diff1w")
+        filt_diff1w_decay.sel(datetime=t0).plot(label="diff1wdecay")
+        plt.legend()
+
+        for i in range(int((length - 1) / 2)):
+            plt.plot(-i, filt_t0.sel(datetime=t0 - i * delta, window=0), marker="+", color="black")
+
+        z = 1
+
     @TimeTrackingWrapper
     def clim_filter_vectorized_less_memory(self, data, fs, cutoff_high, order, apriori=None, padlen_factor=0.5,
                                            sel_opts=None,
                                            sampling="1d", time_dim="datetime", var_dim="variables", window="hamming",
-                                           plot_index=None):
+                                           plot_index=None, minimum_length=None, new_dim="window"):
 
         logging.info(f"{data.coords['Stations'].values[0]}: extend apriori")
 
@@ -411,28 +524,39 @@ class ClimateFIRFilter:
         length = len(h)
 
         # create tmp dimension to apply filter, search for unused name
-        new_dim = self._create_tmp_dimension(data)
+        # new_dim = self._create_tmp_dimension(data)
 
         coll = []
 
-        for var in data.coords[var_dim].values:
+        for var in reversed(data.coords[var_dim].values):
+            # self._tmp_analysis(data, apriori, var, var_dim, length, time_dim, new_dim, h)
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): sel data")
             d = data.sel({var_dim: [var]})
             a = apriori.sel({var_dim: [var]})
 
             # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): history")
-            history = self._shift_data(d, range(int(-(length - 1) / 2), 1), time_dim, var_dim, new_dim)
-            gc.collect()
+            extend_length = length if minimum_length is None else max(length, minimum_length + int((length + 1) / 2))
+            if new_dim not in d.coords:
+                history = self._shift_data(d, range(int(-extend_length), 1), time_dim, var_dim, new_dim)
+                gc.collect()
+            else:
+                history = d.sel({new_dim: slice(int(-extend_length), 0)})
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): future")
-            future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim)
-            gc.collect()
+            diff = (a - history.sel(window=slice(-24, 1)).mean(new_dim))
+            if new_dim not in a.coords:
+                future = self._shift_data(a, range(1, int(extend_length + 1)), time_dim, var_dim, new_dim)
+                # future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim) - diff
+                gc.collect()
+            else:
+                future = a.sel({new_dim: slice(1, int(extend_length + 1))})
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): concat to filter input")
+
             filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
             # filter_input_data = history.combine_first(future)
             # history.sel(datetime=slice("2010-11-01", "2011-04-01"),variables="o3").plot()
             # filter_input_data.sel(datetime=slice("2009-11-01", "2011-04-01"),variables="temp").plot()
-
+            # ToDo: remove all other filt methods, only keep the convolve one
             time_axis = filter_input_data.coords[time_dim]
             # apply vectorized fir filter along the tmp dimension
             kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
@@ -445,14 +569,15 @@ class ClimateFIRFilter:
             #                           kwargs=kwargs)
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
             with TimeTracking(name="convolve"):
-                slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
-                filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel({new_dim: slicer}),
+                # slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
+                filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data,  # .sel({new_dim: slicer}),
                                       input_core_dims=[[new_dim]],
                                       output_core_dims=[[new_dim]],
                                       vectorize=True,
                                       kwargs={"h": h})
 
             # plot
+            # ToDo: enable plotting again
             if self.plot_path is not None:
                 for i, time_pos in enumerate([0.25, 1.5, 2.75, 4]):  # [0.25, 1.5, 2.75, 4] x 365 days
                     try:
@@ -462,20 +587,24 @@ class ClimateFIRFilter:
                         t_slice = filter_input_data.isel(
                             {time_dim: slice(pos - int((length - 1) / 2), pos + int((length - 1) / 2) + 1)}).coords[
                             time_dim].values
-                        self.plot(d, filter_example, var_dim, time_dim, t_slice, t0, f"{plot_index}_{i}")
+                        # self.plot(d, filter_example, var_dim, time_dim, t_slice, t0, f"{plot_index}_{i}")
                     except IndexError:
                         pass
 
             # select only values at tmp dimension 0 at each point in time
-            coll.append(filt.sel({new_dim: 0}, drop=True))
+            # coll.append(filt.sel({new_dim: 0}, drop=True))
+            coll.append(filt.sel({new_dim: slice(-extend_length, 0)}, drop=True))
             gc.collect()
 
         logging.info(f"{data.coords['Stations'].values[0]}: concat all variables")
         res = xr.concat(coll, var_dim)
         # create result array with same shape like input data, gabs are filled by nans
         logging.info(f"{data.coords['Stations'].values[0]}: create res_full")
-        res_full = xr.ones_like(data) * np.nan
-        res_full.loc[res.coords] = res
+
+        new_coords = {**{k: data.coords[k].values for k in data.coords if k != new_dim}, new_dim: res.coords[new_dim]}
+        dims = [*data.dims, new_dim] if new_dim not in data.dims else data.dims
+        res_full = xr.DataArray(dims=dims, coords=new_coords)
+        res_full.loc[res.coords] = res.transpose(*dims)
         return res_full, h, apriori
 
     @staticmethod
-- 
GitLab


From 23c8ade0f0fce685ed5987c0669e92dde25ed0a4 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 20 May 2021 23:25:40 +0200
Subject: [PATCH 131/175] extra loop for each year to prevent memory issues

---
 mlair/helpers/filter.py | 142 ++++++++++++++++++++++++----------------
 1 file changed, 85 insertions(+), 57 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index d9226b7c..c289df3b 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -531,69 +531,89 @@ class ClimateFIRFilter:
         for var in reversed(data.coords[var_dim].values):
             # self._tmp_analysis(data, apriori, var, var_dim, length, time_dim, new_dim, h)
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): sel data")
-            d = data.sel({var_dim: [var]})
-            a = apriori.sel({var_dim: [var]})
-
-            # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
-            logging.info(f"{data.coords['Stations'].values[0]} ({var}): history")
-            extend_length = length if minimum_length is None else max(length, minimum_length + int((length + 1) / 2))
-            if new_dim not in d.coords:
-                history = self._shift_data(d, range(int(-extend_length), 1), time_dim, var_dim, new_dim)
-                gc.collect()
-            else:
-                history = d.sel({new_dim: slice(int(-extend_length), 0)})
-            logging.info(f"{data.coords['Stations'].values[0]} ({var}): future")
-            diff = (a - history.sel(window=slice(-24, 1)).mean(new_dim))
-            if new_dim not in a.coords:
-                future = self._shift_data(a, range(1, int(extend_length + 1)), time_dim, var_dim, new_dim)
-                # future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim) - diff
-                gc.collect()
-            else:
-                future = a.sel({new_dim: slice(1, int(extend_length + 1))})
-            logging.info(f"{data.coords['Stations'].values[0]} ({var}): concat to filter input")
-
-            filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
-            # filter_input_data = history.combine_first(future)
-            # history.sel(datetime=slice("2010-11-01", "2011-04-01"),variables="o3").plot()
-            # filter_input_data.sel(datetime=slice("2009-11-01", "2011-04-01"),variables="temp").plot()
-            # ToDo: remove all other filt methods, only keep the convolve one
-            time_axis = filter_input_data.coords[time_dim]
-            # apply vectorized fir filter along the tmp dimension
-            kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
-                      "causal": False, "padlen": int(min(padlen_factor, 1) * length), "h": h}
-            # with TimeTracking(name="numpy_vec"):
-            #     filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs)
-            # with TimeTracking(name="xr_apply_ufunc"):
-            #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
-            #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
-            #                           kwargs=kwargs)
-            logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
-            with TimeTracking(name="convolve"):
-                # slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
-                filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data,  # .sel({new_dim: slicer}),
-                                      input_core_dims=[[new_dim]],
-                                      output_core_dims=[[new_dim]],
-                                      vectorize=True,
-                                      kwargs={"h": h})
+
+            _start = pd.to_datetime(data.coords[time_dim].min().values).year
+            _end = pd.to_datetime(data.coords[time_dim].max().values).year
+            filt_coll = []
+            for _year in range(_start, _end + 1):
+                logging.info(f"{data.coords['Stations'].values[0]} ({var}): year={_year}")
+                extend_length = length if minimum_length is None else max(length,
+                                                                          minimum_length + int((length + 1) / 2))
+
+                time_slice = self._create_time_range_extend(_year, sampling, extend_length)
+                d = data.sel({var_dim: [var], time_dim: time_slice})
+                a = apriori.sel({var_dim: [var], time_dim: time_slice})
+                if len(d.coords[time_dim]) == 0:  # no data at all for this year
+                    continue
+
+                # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
+                logging.info(f"{data.coords['Stations'].values[0]} ({var}): history")
+                if new_dim not in d.coords:
+                    history = self._shift_data(d, range(int(-extend_length), 1), time_dim, var_dim, new_dim)
+                    gc.collect()
+                else:
+                    history = d.sel({new_dim: slice(int(-extend_length), 0)})
+                logging.info(f"{data.coords['Stations'].values[0]} ({var}): future")
+                diff = (a - history.sel(window=slice(-24, 1)).mean(new_dim))
+                if new_dim not in a.coords:
+                    future = self._shift_data(a, range(1, int(extend_length + 1)), time_dim, var_dim, new_dim)
+                    # future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim) - diff
+                    gc.collect()
+                else:
+                    future = a.sel({new_dim: slice(1, int(extend_length + 1))})
+                logging.info(f"{data.coords['Stations'].values[0]} ({var}): concat to filter input")
+
+                filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
+                try:
+                    filter_input_data = filter_input_data.sel({time_dim: str(_year)})
+                except KeyError:  # no valid data for this year
+                    continue
+                if len(filter_input_data.coords[time_dim]) == 0:  # no valid data for this year
+                    continue
+                # filter_input_data = history.combine_first(future)
+                # history.sel(datetime=slice("2010-11-01", "2011-04-01"),variables="o3").plot()
+                # filter_input_data.sel(datetime=slice("2009-11-01", "2011-04-01"),variables="temp").plot()
+                # ToDo: remove all other filt methods, only keep the convolve one
+                time_axis = filter_input_data.coords[time_dim]
+                # apply vectorized fir filter along the tmp dimension
+                kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
+                          "causal": False, "padlen": int(min(padlen_factor, 1) * length), "h": h}
+                # with TimeTracking(name="numpy_vec"):
+                #     filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs)
+                # with TimeTracking(name="xr_apply_ufunc"):
+                #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
+                #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
+                #                           kwargs=kwargs)
+                logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
+                with TimeTracking(name="convolve"):
+                    # slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
+                    filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data,  # .sel({new_dim: slicer}),
+                                          input_core_dims=[[new_dim]],
+                                          output_core_dims=[[new_dim]],
+                                          vectorize=True,
+                                          kwargs={"h": h})
+
+                filt_coll.append(filt.sel({new_dim: slice(-extend_length, 0)}, drop=True))
 
             # plot
             # ToDo: enable plotting again
-            if self.plot_path is not None:
-                for i, time_pos in enumerate([0.25, 1.5, 2.75, 4]):  # [0.25, 1.5, 2.75, 4] x 365 days
-                    try:
-                        pos = int(time_pos * 365 * fs)
-                        filter_example = filter_input_data.isel({time_dim: pos})
-                        t0 = filter_example.coords[time_dim].values
-                        t_slice = filter_input_data.isel(
-                            {time_dim: slice(pos - int((length - 1) / 2), pos + int((length - 1) / 2) + 1)}).coords[
-                            time_dim].values
-                        # self.plot(d, filter_example, var_dim, time_dim, t_slice, t0, f"{plot_index}_{i}")
-                    except IndexError:
-                        pass
+            # if self.plot_path is not None:
+            #     for i, time_pos in enumerate([0.25, 1.5, 2.75, 4]):  # [0.25, 1.5, 2.75, 4] x 365 days
+            #         try:
+            #             pos = int(time_pos * 365 * fs)
+            #             filter_example = filter_input_data.isel({time_dim: pos})
+            #             t0 = filter_example.coords[time_dim].values
+            #             t_slice = filter_input_data.isel(
+            #                 {time_dim: slice(pos - int((length - 1) / 2), pos + int((length - 1) / 2) + 1)}).coords[
+            #                 time_dim].values
+            #             # self.plot(d, filter_example, var_dim, time_dim, t_slice, t0, f"{plot_index}_{i}")
+            #         except IndexError:
+            #             pass
 
             # select only values at tmp dimension 0 at each point in time
             # coll.append(filt.sel({new_dim: 0}, drop=True))
-            coll.append(filt.sel({new_dim: slice(-extend_length, 0)}, drop=True))
+            # coll.append(filt.sel({new_dim: slice(-extend_length, 0)}, drop=True))
+            coll.append(xr.concat(filt_coll, time_dim))
             gc.collect()
 
         logging.info(f"{data.coords['Stations'].values[0]}: concat all variables")
@@ -607,6 +627,14 @@ class ClimateFIRFilter:
         res_full.loc[res.coords] = res.transpose(*dims)
         return res_full, h, apriori
 
+    @staticmethod
+    def _create_time_range_extend(year, sampling, extend_length):
+        td_type = {"1d": "D", "1H": "h"}.get(sampling)
+        delta = np.timedelta64(extend_length + 1, td_type)
+        start = np.datetime64(f"{year}-01-01") - delta
+        end = np.datetime64(f"{year}-12-31") + delta
+        return slice(start, end)
+
     @staticmethod
     def _create_tmp_dimension(data):
         new_dim = "window"
-- 
GitLab


From 5bc7ec6da12d34e50d942e58c54361040ae235d1 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 21 May 2021 10:35:52 +0200
Subject: [PATCH 132/175] use single precision for filter, reduce data size
 always as much as possible

---
 .../data_handler/data_handler_with_filter.py  |  7 +--
 mlair/helpers/filter.py                       | 51 ++++++++++---------
 mlair/helpers/helpers.py                      |  8 +++
 3 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 576fe9d7..8824acc2 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -171,8 +171,8 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
     @TimeTrackingWrapper
     def apply_filter(self):
         """Apply FIR filter only on inputs."""
-        fir = FIRFilter(self.input_data, self.fs, self.filter_order, self.filter_cutoff_freq, self.filter_window_type,
-                        self.target_dim)
+        fir = FIRFilter(self.input_data.astype("float32"), self.fs, self.filter_order, self.filter_cutoff_freq,
+                        self.filter_window_type, self.target_dim)
         self.fir_coeff = fir.filter_coefficients()
         fir_data = fir.filtered_data()
         if self._add_unfiltered is True:
@@ -319,7 +319,8 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         """Apply FIR filter only on inputs."""
         self.apriori = self.apriori.get(str(self)) if isinstance(self.apriori, dict) else self.apriori
         logging.info(f"{self.station}: call ClimateFIRFilter")
-        climate_filter = ClimateFIRFilter(self.input_data, self.fs, self.filter_order, self.filter_cutoff_freq,
+        climate_filter = ClimateFIRFilter(self.input_data.astype("float32"), self.fs, self.filter_order,
+                                          self.filter_cutoff_freq,
                                           self.filter_window_type, time_dim=self.time_dim, var_dim=self.target_dim,
                                           apriori_type=self.apriori_type, apriori=self.apriori,
                                           apriori_diurnal=self.apriori_diurnal, sel_opts=self.apriori_sel_opts,
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index c289df3b..7b9a1752 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -164,7 +164,7 @@ class ClimateFIRFilter:
         if pos + 1 < len(order):
             next_order = order[pos + 1]
         if minimum_length is not None:
-            next_order = max(next_order, minimum_length)
+            next_order = next_order + minimum_length
         return next_order if next_order > 0 else None
 
     @staticmethod
@@ -517,6 +517,7 @@ class ClimateFIRFilter:
         # calculate apriori information from data if not given and extend its range if not sufficient long enough
         if apriori is None:
             apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
+        apriori = apriori.astype(data.dtype)
         apriori = self.extend_apriori(data, apriori, time_dim, sampling)
 
         # calculate FIR filter coefficients
@@ -526,6 +527,10 @@ class ClimateFIRFilter:
         # create tmp dimension to apply filter, search for unused name
         # new_dim = self._create_tmp_dimension(data)
 
+        # use filter length if no minimum is given, otherwise use minimum + half filter length for extension
+        extend_length_history = length if minimum_length is None else minimum_length + int((length + 1) / 2)
+        extend_length_future = int((length + 1) / 2) + 1
+
         coll = []
 
         for var in reversed(data.coords[var_dim].values):
@@ -537,10 +542,8 @@ class ClimateFIRFilter:
             filt_coll = []
             for _year in range(_start, _end + 1):
                 logging.info(f"{data.coords['Stations'].values[0]} ({var}): year={_year}")
-                extend_length = length if minimum_length is None else max(length,
-                                                                          minimum_length + int((length + 1) / 2))
 
-                time_slice = self._create_time_range_extend(_year, sampling, extend_length)
+                time_slice = self._create_time_range_extend(_year, sampling, extend_length_history)
                 d = data.sel({var_dim: [var], time_dim: time_slice})
                 a = apriori.sel({var_dim: [var], time_dim: time_slice})
                 if len(d.coords[time_dim]) == 0:  # no data at all for this year
@@ -549,18 +552,16 @@ class ClimateFIRFilter:
                 # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
                 logging.info(f"{data.coords['Stations'].values[0]} ({var}): history")
                 if new_dim not in d.coords:
-                    history = self._shift_data(d, range(int(-extend_length), 1), time_dim, var_dim, new_dim)
-                    gc.collect()
+                    history = self._shift_data(d, range(int(-extend_length_history), 1), time_dim, var_dim, new_dim)
                 else:
-                    history = d.sel({new_dim: slice(int(-extend_length), 0)})
+                    history = d.sel({new_dim: slice(int(-extend_length_history), 0)})
                 logging.info(f"{data.coords['Stations'].values[0]} ({var}): future")
                 diff = (a - history.sel(window=slice(-24, 1)).mean(new_dim))
                 if new_dim not in a.coords:
-                    future = self._shift_data(a, range(1, int(extend_length + 1)), time_dim, var_dim, new_dim)
+                    future = self._shift_data(a, range(1, extend_length_future), time_dim, var_dim, new_dim)
                     # future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim) - diff
-                    gc.collect()
                 else:
-                    future = a.sel({new_dim: slice(1, int(extend_length + 1))})
+                    future = a.sel({new_dim: slice(1, extend_length_future)})
                 logging.info(f"{data.coords['Stations'].values[0]} ({var}): concat to filter input")
 
                 filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
@@ -574,16 +575,11 @@ class ClimateFIRFilter:
                 # history.sel(datetime=slice("2010-11-01", "2011-04-01"),variables="o3").plot()
                 # filter_input_data.sel(datetime=slice("2009-11-01", "2011-04-01"),variables="temp").plot()
                 # ToDo: remove all other filt methods, only keep the convolve one
-                time_axis = filter_input_data.coords[time_dim]
-                # apply vectorized fir filter along the tmp dimension
-                kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
-                          "causal": False, "padlen": int(min(padlen_factor, 1) * length), "h": h}
-                # with TimeTracking(name="numpy_vec"):
-                #     filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs)
-                # with TimeTracking(name="xr_apply_ufunc"):
-                #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
-                #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
-                #                           kwargs=kwargs)
+                # time_axis = filter_input_data.coords[time_dim]
+                # # apply vectorized fir filter along the tmp dimension
+                # kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
+                #           "causal": False, "padlen": int(min(padlen_factor, 1) * length), "h": h}
+
                 logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
                 with TimeTracking(name="convolve"):
                     # slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
@@ -591,9 +587,13 @@ class ClimateFIRFilter:
                                           input_core_dims=[[new_dim]],
                                           output_core_dims=[[new_dim]],
                                           vectorize=True,
-                                          kwargs={"h": h})
+                                          kwargs={"h": h},
+                                          output_dtypes=[d.dtype])
 
-                filt_coll.append(filt.sel({new_dim: slice(-extend_length, 0)}, drop=True))
+                if minimum_length is None:
+                    filt_coll.append(filt.sel({new_dim: slice(-extend_length_history, 0)}, drop=True))
+                else:
+                    filt_coll.append(filt.sel({new_dim: slice(-minimum_length, 0)}, drop=True))
 
             # plot
             # ToDo: enable plotting again
@@ -623,8 +623,11 @@ class ClimateFIRFilter:
 
         new_coords = {**{k: data.coords[k].values for k in data.coords if k != new_dim}, new_dim: res.coords[new_dim]}
         dims = [*data.dims, new_dim] if new_dim not in data.dims else data.dims
-        res_full = xr.DataArray(dims=dims, coords=new_coords)
-        res_full.loc[res.coords] = res.transpose(*dims)
+        res = res.transpose(*dims)
+        # res_full = xr.DataArray(dims=dims, coords=new_coords)
+        # res_full.loc[res.coords] = res
+        # res_full.compute()
+        res_full = res.broadcast_like(xr.DataArray(dims=dims, coords=new_coords))
         return res_full, h, apriori
 
     @staticmethod
diff --git a/mlair/helpers/helpers.py b/mlair/helpers/helpers.py
index b57b733b..7eab9111 100644
--- a/mlair/helpers/helpers.py
+++ b/mlair/helpers/helpers.py
@@ -179,3 +179,11 @@ def convert2xrda(arr: Union[xr.DataArray, xr.Dataset, np.ndarray, int, float],
         return xr.DataArray(arr, **kwargs)
 
 
+def convert_size(size_bytes):
+    if size_bytes == 0:
+        return "0B"
+    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
+    i = int(math.floor(math.log(size_bytes, 1024)))
+    p = math.pow(1024, i)
+    s = round(size_bytes / p, 2)
+    return "%s %s" % (s, size_name[i])
-- 
GitLab


From 1cb0624bb76c9476869291b27da52af4451740ab Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 21 May 2021 15:53:50 +0200
Subject: [PATCH 133/175] store intermediate plot implementation, must be
 refactored

---
 mlair/helpers/filter.py | 134 ++++++++++++++++++++++++++++++++++------
 1 file changed, 115 insertions(+), 19 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 7b9a1752..75c94b36 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -531,12 +531,20 @@ class ClimateFIRFilter:
         extend_length_history = length if minimum_length is None else minimum_length + int((length + 1) / 2)
         extend_length_future = int((length + 1) / 2) + 1
 
+        # collect some data for visualization
+        plot_pos = np.array([0.25, 1.5, 2.75, 4]) * 365 * fs
+        plot_dates = [data.isel({time_dim: int(pos)}).coords[time_dim].values for pos in plot_pos if
+                      pos < len(data.coords[time_dim])]
+
         coll = []
 
         for var in reversed(data.coords[var_dim].values):
             # self._tmp_analysis(data, apriori, var, var_dim, length, time_dim, new_dim, h)
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): sel data")
 
+            # empty plot data collection
+            plot_data = []
+
             _start = pd.to_datetime(data.coords[time_dim].min().values).year
             _end = pd.to_datetime(data.coords[time_dim].max().values).year
             filt_coll = []
@@ -550,19 +558,19 @@ class ClimateFIRFilter:
                     continue
 
                 # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
-                logging.info(f"{data.coords['Stations'].values[0]} ({var}): history")
+                # logging.info(f"{data.coords['Stations'].values[0]} ({var}): history")
                 if new_dim not in d.coords:
                     history = self._shift_data(d, range(int(-extend_length_history), 1), time_dim, var_dim, new_dim)
                 else:
                     history = d.sel({new_dim: slice(int(-extend_length_history), 0)})
-                logging.info(f"{data.coords['Stations'].values[0]} ({var}): future")
-                diff = (a - history.sel(window=slice(-24, 1)).mean(new_dim))
+                # logging.info(f"{data.coords['Stations'].values[0]} ({var}): future")
+                # diff = (a - history.sel(window=slice(-24, 1)).mean(new_dim))
                 if new_dim not in a.coords:
                     future = self._shift_data(a, range(1, extend_length_future), time_dim, var_dim, new_dim)
                     # future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim) - diff
                 else:
                     future = a.sel({new_dim: slice(1, extend_length_future)})
-                logging.info(f"{data.coords['Stations'].values[0]} ({var}): concat to filter input")
+                # logging.info(f"{data.coords['Stations'].values[0]} ({var}): concat to filter input")
 
                 filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
                 try:
@@ -595,20 +603,31 @@ class ClimateFIRFilter:
                 else:
                     filt_coll.append(filt.sel({new_dim: slice(-minimum_length, 0)}, drop=True))
 
-            # plot
-            # ToDo: enable plotting again
-            # if self.plot_path is not None:
-            #     for i, time_pos in enumerate([0.25, 1.5, 2.75, 4]):  # [0.25, 1.5, 2.75, 4] x 365 days
-            #         try:
-            #             pos = int(time_pos * 365 * fs)
-            #             filter_example = filter_input_data.isel({time_dim: pos})
-            #             t0 = filter_example.coords[time_dim].values
-            #             t_slice = filter_input_data.isel(
-            #                 {time_dim: slice(pos - int((length - 1) / 2), pos + int((length - 1) / 2) + 1)}).coords[
-            #                 time_dim].values
-            #             # self.plot(d, filter_example, var_dim, time_dim, t_slice, t0, f"{plot_index}_{i}")
-            #         except IndexError:
-            #             pass
+                # visualization
+                # ToDo: move this code part into a separate plot method that is called on the fly, not afterwards
+                # just leave a call self.plot(*args) here!
+                for viz_date in set(plot_dates).intersection(filt.coords[time_dim].values):
+                    td_type = {"1d": "D", "1H": "h"}.get(sampling)
+                    t_minus = viz_date + np.timedelta64(int(-extend_length_history), td_type)
+                    t_plus = viz_date + np.timedelta64(int(extend_length_future), td_type)
+
+                    tmp_filter_data = self._shift_data(d.sel({time_dim: slice(t_minus, t_plus)}),
+                                                       range(int(-extend_length_history), int(extend_length_future)),
+                                                       time_dim, var_dim, new_dim)
+                    tmp_filt_nc = xr.apply_ufunc(fir_filter_convolve_vectorized,
+                                                 tmp_filter_data.sel({time_dim: viz_date}),
+                                                 input_core_dims=[[new_dim]],
+                                                 output_core_dims=[[new_dim]],
+                                                 vectorize=True,
+                                                 kwargs={"h": h},
+                                                 output_dtypes=[d.dtype])
+
+                    valid_range = range(int((length + 1) / 2) if minimum_length is None else minimum_length, 1)
+                    plot_data.append({"t0": viz_date,
+                                      "filt": filt.sel({time_dim: viz_date}),
+                                      "filter_input": filter_input_data.sel({time_dim: viz_date}),
+                                      "filt_nc": tmp_filt_nc,
+                                      "valid_range": valid_range})
 
             # select only values at tmp dimension 0 at each point in time
             # coll.append(filt.sel({new_dim: 0}, drop=True))
@@ -616,6 +635,27 @@ class ClimateFIRFilter:
             coll.append(xr.concat(filt_coll, time_dim))
             gc.collect()
 
+            # plot
+            # ToDo: enable plotting again
+            if self.plot_path is not None:
+                for i, viz_data in enumerate(plot_data):
+                    self.plot_new(viz_data, data.sel({var_dim: [var]}), var_dim, time_dim, new_dim, f"{plot_index}_{i}",
+                                  sampling)
+
+                # for i, time_pos in enumerate([0.25, 1.5, 2.75, 4]):  # [0.25, 1.5, 2.75, 4] x 365 days
+                #     try:
+                #
+                #         plot_data = coll[-1]
+                #         pos = int(time_pos * 365 * fs)
+                #         filter_example = plot_data.isel({time_dim: pos})
+                #         t0 = filter_example.coords[time_dim].values
+                #
+                #         slice_tmp = slice(pos - abs(plot_data.coords[new_dim].values.min()), pos + abs(plot_data.coords[new_dim].values.min()))
+                #         t_slice = plot_data.isel({time_dim: slice_tmp}).coords[time_dim].values
+                #         self.plot(data.sel({var_dim: [var]}), filter_example, var_dim, time_dim, t_slice, t0, f"{plot_index}_{i}")
+                #     except IndexError:
+                #         pass
+
         logging.info(f"{data.coords['Stations'].values[0]}: concat all variables")
         res = xr.concat(coll, var_dim)
         # create result array with same shape like input data, gabs are filled by nans
@@ -665,6 +705,61 @@ class ClimateFIRFilter:
         res.name = index_name
         return res
 
+    def plot_new(self, viz_data, orig_data, var_dim, time_dim, new_dim, plot_index, sampling):
+        try:
+            td_type = {"1d": "D", "1H": "h"}.get(sampling)
+            filter_example = viz_data["filt"]
+            filter_input = viz_data["filter_input"]
+            filter_nc = viz_data["filt_nc"]
+            valid_range = viz_data["valid_range"]
+            t0 = viz_data["t0"]
+            t_minus = t0 + np.timedelta64(filter_input.coords[new_dim].values.min(), td_type)
+            t_plus = t0 + np.timedelta64(filter_input.coords[new_dim].values.max(), td_type)
+            t_slice = slice(t_minus, t_plus)
+            data = orig_data.sel({time_dim: t_slice})
+            plot_folder = os.path.join(os.path.abspath(self.plot_path), "climFIR")
+            if not os.path.exists(plot_folder):
+                os.makedirs(plot_folder)
+
+            for var in data.coords[var_dim]:
+                time_axis = data.sel({var_dim: var, time_dim: t_slice}).coords[time_dim].values
+                rc_params = {'axes.labelsize': 'large',
+                             'xtick.labelsize': 'large',
+                             'ytick.labelsize': 'large',
+                             'legend.fontsize': 'large',
+                             'axes.titlesize': 'large',
+                             }
+                plt.rcParams.update(rc_params)
+                fig, ax = plt.subplots()
+
+                ax.axvspan(t0 + np.timedelta64(-valid_range.start, td_type),
+                           t0 + np.timedelta64(valid_range.stop - 1, td_type), color="whitesmoke", label="valid area")
+
+                ax.axvline(t0, color="lightgrey", lw=6, label="time of interest ($t_0$)")
+                ax.plot(time_axis, data.sel({var_dim: var, time_dim: t_slice}).values.flatten(),
+                        color="darkgrey", linestyle="dashed", label="original")
+                d_tmp = filter_input.sel(
+                    {var_dim: var, new_dim: slice(0, filter_input.coords[new_dim].values.max())}).values.flatten()
+                # ax.plot(time_axis[len(time_axis) - len(d_tmp):], d_tmp, color="darkgrey", linestyle=(0 ,(1, 1)), label="filter input")
+                ax.plot(time_axis[len(time_axis) - len(d_tmp):], d_tmp, color="darkgrey", linestyle="solid",
+                        label="estimated future")
+                # data.sel({var_dim: var, time_dim: time_dim_slice}).plot()
+                # tmp_comb.sel({var_dim: var}).plot()
+                # d_filt = filter_example.sel({var_dim: var}).values.flatten()
+                ax.plot(time_axis, filter_example.sel({var_dim: var}).values.flatten(),
+                        color="black", linestyle="solid", label="filter response", linewidth=2)
+                ax.plot(time_axis, filter_nc.sel({var_dim: var}).values.flatten(),
+                        color="black", linestyle="dashed", label="ideal filter response", linewidth=2)
+                plt.title(f"Input of ClimFilter ({str(var.values)})")
+                plt.legend()
+                fig.autofmt_xdate()
+                plt.tight_layout()
+                plot_name = os.path.join(plot_folder, f"climFIR_{self.plot_name}_{str(var.values)}_{plot_index}.pdf")
+                plt.savefig(plot_name, dpi=300)
+                plt.close('all')
+        except:
+            pass
+
     def plot(self, data, tmp_comb, var_dim, time_dim, time_dim_slice, t0, plot_index):
         try:
             plot_folder = os.path.join(os.path.abspath(self.plot_path), "climFIR")
@@ -683,7 +778,8 @@ class ClimateFIRFilter:
                 ax.axvline(t0, color="lightgrey", lw=6, label="time of interest ($t_0$)")
                 ax.plot(time_axis, data.sel({var_dim: var, time_dim: time_dim_slice}).values.flatten(),
                         color="darkgrey", linestyle="--", label="original")
-                ax.plot(time_axis, tmp_comb.sel({var_dim: var}).values.flatten(), color="black", label="filter input")
+                d_filt = tmp_comb.sel({var_dim: var}).values.flatten()
+                ax.plot(time_axis[:len(d_filt)], d_filt, color="black", label="filter input")
                 # data.sel({var_dim: var, time_dim: time_dim_slice}).plot()
                 # tmp_comb.sel({var_dim: var}).plot()
                 plt.title(f"Input of ClimFilter ({str(var.values)})")
-- 
GitLab


From d591487beaadd8835fec12d472a32a141a8af8a0 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Sat, 22 May 2021 18:24:15 +0200
Subject: [PATCH 134/175] disable plotting for now to have running module

---
 .../data_handler/data_handler_with_filter.py  | 10 +++
 mlair/helpers/filter.py                       | 75 ++++++++++---------
 mlair/run_modules/pre_processing.py           |  1 +
 3 files changed, 50 insertions(+), 36 deletions(-)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 8824acc2..fa94b88c 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -429,6 +429,16 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         data.coords[self.window_dim] = data.coords[self.window_dim] + self.window_history_offset
         self.history = data
 
+    def call_transform(self, inverse=False):
+        opts_input = self._transformation[0]
+        self.input_data, opts_input = self.transform(self.input_data, dim=[self.time_dim, self.window_dim],
+                                                     inverse=inverse, opts=opts_input,
+                                                     transformation_dim=self.target_dim)
+        opts_target = self._transformation[1]
+        self.target_data, opts_target = self.transform(self.target_data, dim=self.time_dim, inverse=inverse,
+                                                       opts=opts_target, transformation_dim=self.target_dim)
+        self._transformation = (opts_input, opts_target)
+
 
 class DataHandlerClimateFirFilter(DefaultDataHandler):
     """Data handler using climatic adjusted FIR filtered data."""
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 75c94b36..a4288098 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -606,28 +606,31 @@ class ClimateFIRFilter:
                 # visualization
                 # ToDo: move this code part into a separate plot method that is called on the fly, not afterwards
                 # just leave a call self.plot(*args) here!
-                for viz_date in set(plot_dates).intersection(filt.coords[time_dim].values):
-                    td_type = {"1d": "D", "1H": "h"}.get(sampling)
-                    t_minus = viz_date + np.timedelta64(int(-extend_length_history), td_type)
-                    t_plus = viz_date + np.timedelta64(int(extend_length_future), td_type)
-
-                    tmp_filter_data = self._shift_data(d.sel({time_dim: slice(t_minus, t_plus)}),
-                                                       range(int(-extend_length_history), int(extend_length_future)),
-                                                       time_dim, var_dim, new_dim)
-                    tmp_filt_nc = xr.apply_ufunc(fir_filter_convolve_vectorized,
-                                                 tmp_filter_data.sel({time_dim: viz_date}),
-                                                 input_core_dims=[[new_dim]],
-                                                 output_core_dims=[[new_dim]],
-                                                 vectorize=True,
-                                                 kwargs={"h": h},
-                                                 output_dtypes=[d.dtype])
-
-                    valid_range = range(int((length + 1) / 2) if minimum_length is None else minimum_length, 1)
-                    plot_data.append({"t0": viz_date,
-                                      "filt": filt.sel({time_dim: viz_date}),
-                                      "filter_input": filter_input_data.sel({time_dim: viz_date}),
-                                      "filt_nc": tmp_filt_nc,
-                                      "valid_range": valid_range})
+                # for viz_date in set(plot_dates).intersection(filt.coords[time_dim].values):
+                #     td_type = {"1d": "D", "1H": "h"}.get(sampling)
+                #     t_minus = viz_date + np.timedelta64(int(-extend_length_history), td_type)
+                #     t_plus = viz_date + np.timedelta64(int(extend_length_future), td_type)
+                #     if new_dim not in d.coords:
+                #         tmp_filter_data = self._shift_data(d.sel({time_dim: slice(t_minus, t_plus)}),
+                #                                            range(int(-extend_length_history),
+                #                                                  int(extend_length_future)),
+                #                                            time_dim, var_dim, new_dim)
+                #     else:
+                #         tmp_filter_data = d.sel({new_dim: slice(int(-extend_length_history), int(extend_length_future))})
+                #     tmp_filt_nc = xr.apply_ufunc(fir_filter_convolve_vectorized,
+                #                                  tmp_filter_data.sel({time_dim: viz_date}),
+                #                                  input_core_dims=[[new_dim]],
+                #                                  output_core_dims=[[new_dim]],
+                #                                  vectorize=True,
+                #                                  kwargs={"h": h},
+                #                                  output_dtypes=[d.dtype])
+                #
+                #     valid_range = range(int((length + 1) / 2) if minimum_length is None else minimum_length, 1)
+                #     plot_data.append({"t0": viz_date,
+                #                       "filt": filt.sel({time_dim: viz_date}),
+                #                       "filter_input": filter_input_data.sel({time_dim: viz_date}),
+                #                       "filt_nc": tmp_filt_nc,
+                #                       "valid_range": valid_range})
 
             # select only values at tmp dimension 0 at each point in time
             # coll.append(filt.sel({new_dim: 0}, drop=True))
@@ -637,20 +640,20 @@ class ClimateFIRFilter:
 
             # plot
             # ToDo: enable plotting again
-            if self.plot_path is not None:
-                for i, viz_data in enumerate(plot_data):
-                    self.plot_new(viz_data, data.sel({var_dim: [var]}), var_dim, time_dim, new_dim, f"{plot_index}_{i}",
-                                  sampling)
-
-                # for i, time_pos in enumerate([0.25, 1.5, 2.75, 4]):  # [0.25, 1.5, 2.75, 4] x 365 days
-                #     try:
-                #
-                #         plot_data = coll[-1]
-                #         pos = int(time_pos * 365 * fs)
-                #         filter_example = plot_data.isel({time_dim: pos})
-                #         t0 = filter_example.coords[time_dim].values
-                #
-                #         slice_tmp = slice(pos - abs(plot_data.coords[new_dim].values.min()), pos + abs(plot_data.coords[new_dim].values.min()))
+            # if self.plot_path is not None:
+            #     for i, viz_data in enumerate(plot_data):
+            #         self.plot_new(viz_data, data.sel({var_dim: [var]}), var_dim, time_dim, new_dim, f"{plot_index}_{i}",
+            #                       sampling)
+
+            # for i, time_pos in enumerate([0.25, 1.5, 2.75, 4]):  # [0.25, 1.5, 2.75, 4] x 365 days
+            #     try:
+            #
+            #         plot_data = coll[-1]
+            #         pos = int(time_pos * 365 * fs)
+            #         filter_example = plot_data.isel({time_dim: pos})
+            #         t0 = filter_example.coords[time_dim].values
+            #
+            #         slice_tmp = slice(pos - abs(plot_data.coords[new_dim].values.min()), pos + abs(plot_data.coords[new_dim].values.min()))
                 #         t_slice = plot_data.isel({time_dim: slice_tmp}).coords[time_dim].values
                 #         self.plot(data.sel({var_dim: [var]}), filter_example, var_dim, time_dim, t_slice, t0, f"{plot_index}_{i}")
                 #     except IndexError:
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index d21f8920..db9d1d5e 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -355,6 +355,7 @@ def f_proc(data_handler, station, name_affix, store, **kwargs):
         formatted_lines = traceback.format_exc().splitlines()
         logging.info(
             f"remove station {station} because it raised an error: {e} -> {' | '.join(f_inspect_error(formatted_lines))}")
+        logging.debug(f"detailed information for removal of station {station}: {traceback.format_exc()}")
         res = None
     return res, station
 
-- 
GitLab


From 0a74a59ca9d603efcb16779356a28daabc2ea19e Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 25 May 2021 11:24:41 +0200
Subject: [PATCH 135/175] add vanilla rnn

---
 .../data_handler_mixed_sampling.py            |   3 +-
 .../model_modules/fully_connected_networks.py |   2 +-
 mlair/model_modules/recurrent_networks.py     | 146 ++++++++++++++++++
 3 files changed, 149 insertions(+), 2 deletions(-)
 create mode 100644 mlair/model_modules/recurrent_networks.py

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 00408684..7446d005 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -67,7 +67,8 @@ class DataHandlerMixedSamplingSingleStation(DataHandlerSingleStation):
                                          self.station_type, self.network, self.store_data_locally, self.data_origin,
                                          self.start, self.end)
         data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method[ind],
-                                limit=self.interpolation_limit[ind])
+                                limit=self.interpolation_limit[ind], sampling=self.sampling[ind])
+
         return data
 
     def set_inputs_and_targets(self):
diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 9fb08cdf..009ff060 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -1,5 +1,5 @@
 __author__ = "Lukas Leufen"
-__date__ = '2021-02-'
+__date__ = '2021-02-18'
 
 from functools import reduce, partial
 
diff --git a/mlair/model_modules/recurrent_networks.py b/mlair/model_modules/recurrent_networks.py
new file mode 100644
index 00000000..953749c3
--- /dev/null
+++ b/mlair/model_modules/recurrent_networks.py
@@ -0,0 +1,146 @@
+__author__ = "Lukas Leufen"
+__date__ = '2021-05-25'
+
+from functools import reduce, partial
+
+from mlair.model_modules import AbstractModelClass
+from mlair.helpers import select_from_dict
+from mlair.model_modules.loss import var_loss, custom_loss
+
+import keras
+
+
+class RNN(AbstractModelClass):
+    """
+
+    """
+
+    _activation = {"relu": keras.layers.ReLU, "tanh": partial(keras.layers.Activation, "tanh"),
+                   "sigmoid": partial(keras.layers.Activation, "sigmoid"),
+                   "linear": partial(keras.layers.Activation, "linear"),
+                   "selu": partial(keras.layers.Activation, "selu"),
+                   "prelu": partial(keras.layers.PReLU, alpha_initializer=keras.initializers.constant(value=0.25))}
+    _initializer = {"tanh": "glorot_uniform", "sigmoid": "glorot_uniform", "linear": "glorot_uniform",
+                    "relu": keras.initializers.he_normal(), "selu": keras.initializers.lecun_normal(),
+                    "prelu": keras.initializers.he_normal()}
+    _optimizer = {"adam": keras.optimizers.adam, "sgd": keras.optimizers.SGD}
+    _regularizer = {"l1": keras.regularizers.l1, "l2": keras.regularizers.l2, "l1_l2": keras.regularizers.l1_l2}
+    _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov", "l1", "l2"]
+    _dropout = {"selu": keras.layers.AlphaDropout}
+
+    def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
+                 optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None,
+                 **kwargs):
+        """
+        Sets model and loss depending on the given arguments.
+
+        :param input_shape: list of input shapes (expect len=1 with shape=(window_hist, station, variables))
+        :param output_shape: list of output shapes (expect len=1 with shape=(window_forecast))
+        """
+
+        assert len(input_shape) == 1
+        assert len(output_shape) == 1
+        super().__init__(input_shape[0], output_shape[0])
+
+        # settings
+        # self.activation = self._set_activation(activation)
+        # self.activation_name = activation
+        self.activation_output = self._set_activation(activation_output)
+        self.activation_output_name = activation_output
+        self.optimizer = self._set_optimizer(optimizer, **kwargs)
+        # self.layer_configuration = (n_layer, n_hidden) if layer_configuration is None else layer_configuration
+        # self._update_model_name()
+        # self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
+        # self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
+        self.dropout, self.dropout_rate = self._set_dropout(activation, dropout)
+
+        # apply to model
+        self.set_model()
+        self.set_compile_options()
+        self.set_custom_objects(loss=self.compile_options["loss"][0], var_loss=var_loss)
+
+    def set_model(self):
+        """
+        Build the model.
+        """
+        x_input = keras.layers.Input(shape=self._input_shape)
+        x_in = keras.layers.Reshape((self._input_shape[0], reduce((lambda x, y: x * y), self._input_shape[1:])))(
+            x_input)
+        x_in = keras.layers.LSTM(32, return_sequences=True)(x_in)
+        if self.dropout is not None:
+            x_in = self.dropout(self.dropout_rate)(x_in)
+        x_in = keras.layers.LSTM(8)(x_in)
+        if self.dropout is not None:
+            x_in = self.dropout(self.dropout_rate)(x_in)
+        out = keras.layers.Dense(self._output_shape)(x_in)
+        self.model = keras.Model(inputs=x_input, outputs=[out])
+        print(self.model.summary())
+
+        # x_input = keras.layers.Input(shape=self._input_shape)
+        # x_in = keras.layers.Reshape((self._input_shape[0], reduce((lambda x, y: x * y), self._input_shape[1:])))(
+        #     x_input)
+        # x_in = keras.layers.LSTM(32)(x_in)
+        # x_in = keras.layers.RepeatVector(self._output_shape)(x_in)
+        # x_in = keras.layers.LSTM(32, return_sequences=True)(x_in)
+        # out = keras.layers.TimeDistributed(keras.layers.Dense(1))(x_in)
+        # out = keras.layers.Flatten()(out)
+        # self.model = keras.Model(inputs=x_input, outputs=[out])
+        # print(self.model.summary())
+
+    def _set_dropout(self, activation, dropout_rate):
+        if dropout_rate is None:
+            return None, None
+        assert 0 <= dropout_rate < 1
+        return self._dropout.get(activation, keras.layers.Dropout), dropout_rate
+
+    def _set_activation(self, activation):
+        try:
+            return self._activation.get(activation.lower())
+        except KeyError:
+            raise AttributeError(f"Given activation {activation} is not supported in this model class.")
+
+    def set_compile_options(self):
+        self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss])],
+                                "metrics": ["mse", "mae", var_loss]}
+
+    def _set_optimizer(self, optimizer, **kwargs):
+        try:
+            opt_name = optimizer.lower()
+            opt = self._optimizer.get(opt_name)
+            opt_kwargs = {}
+            if opt_name == "adam":
+                opt_kwargs = select_from_dict(kwargs, ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad"])
+            elif opt_name == "sgd":
+                opt_kwargs = select_from_dict(kwargs, ["lr", "momentum", "decay", "nesterov"])
+            return opt(**opt_kwargs)
+        except KeyError:
+            raise AttributeError(f"Given optimizer {optimizer} is not supported in this model class.")
+    #
+    # def _set_regularizer(self, regularizer, **kwargs):
+    #     if regularizer is None or (isinstance(regularizer, str) and regularizer.lower() == "none"):
+    #         return None
+    #     try:
+    #         reg_name = regularizer.lower()
+    #         reg = self._regularizer.get(reg_name)
+    #         reg_kwargs = {}
+    #         if reg_name in ["l1", "l2"]:
+    #             reg_kwargs = select_from_dict(kwargs, reg_name, remove_none=True)
+    #             if reg_name in reg_kwargs:
+    #                 reg_kwargs["l"] = reg_kwargs.pop(reg_name)
+    #         elif reg_name == "l1_l2":
+    #             reg_kwargs = select_from_dict(kwargs, ["l1", "l2"], remove_none=True)
+    #         return reg(**reg_kwargs)
+    #     except KeyError:
+    #         raise AttributeError(f"Given regularizer {regularizer} is not supported in this model class.")
+    #
+
+    #
+    # def _update_model_name(self):
+    #     n_input = str(reduce(lambda x, y: x * y, self._input_shape))
+    #     n_output = str(self._output_shape)
+    #     if isinstance(self.layer_configuration, tuple) and len(self.layer_configuration) == 2:
+    #         n_layer, n_hidden = self.layer_configuration
+    #         self.model_name += "_".join(["", n_input, *[f"{n_hidden}" for _ in range(n_layer)], n_output])
+    #     else:
+    #         self.model_name += "_".join(["", n_input, *[f"{n}" for n in self.layer_configuration], n_output])
+    #
-- 
GitLab


From 70d0d9dfee6c3c1454962906c4893d67d766f0cb Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 25 May 2021 11:36:14 +0200
Subject: [PATCH 136/175] small change

---
 mlair/data_handler/default_data_handler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 73b6b53d..f70f454b 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -80,7 +80,7 @@ class DefaultDataHandler(AbstractDataHandler):
     def _cleanup(self):
         directory = os.path.dirname(self._save_file)
         if os.path.exists(directory) is False:
-            os.makedirs(directory)
+            os.makedirs(directory, exist_ok=True)
         if os.path.exists(self._save_file):
             shutil.rmtree(self._save_file, ignore_errors=True)
 
-- 
GitLab


From f384b44849119cf321693f0e3c12e5ce37befda2 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 26 May 2021 13:59:35 +0200
Subject: [PATCH 137/175] plotting works better but is still too messy,
 improved rnn class

---
 mlair/helpers/filter.py                       | 233 ++++++++++++++----
 .../model_modules/fully_connected_networks.py |  55 +++--
 mlair/model_modules/recurrent_networks.py     |  95 ++++---
 3 files changed, 286 insertions(+), 97 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index a4288098..041e63c9 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -78,6 +78,7 @@ class ClimateFIRFilter:
         logging.info(f"{plot_name}: start init ClimateFIRFilter")
         self.plot_path = plot_path
         self.plot_name = plot_name
+        self.plot_data = []
         filtered = []
         h = []
         sel_opts = sel_opts if isinstance(sel_opts, dict) else {time_dim: sel_opts}
@@ -96,6 +97,9 @@ class ClimateFIRFilter:
         apriori_list = to_list(apriori)
         input_data = data.__deepcopy__()
 
+        # for viz
+        plot_dates = None
+
         # create tmp dimension to apply filter, search for unused name
         new_dim = self._create_tmp_dimension(input_data) if new_dim is None else new_dim
 
@@ -106,11 +110,13 @@ class ClimateFIRFilter:
             # ToDo: remove all methods except the vectorized version
             clim_filter: Callable = {True: self.clim_filter_vectorized_less_memory, False: self.clim_filter}[vectorized]
             _minimum_length = self._minimum_length(order, minimum_length, i)
-            fi, hi, apriori = clim_filter(input_data, fs, cutoff[i], order[i],
-                                          apriori=apriori_list[i],
-                                          sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window,
-                                          var_dim=var_dim, plot_index=i, padlen_factor=padlen_factor,
-                                          minimum_length=_minimum_length, new_dim=new_dim)
+            fi, hi, apriori, plot_data = clim_filter(input_data, fs, cutoff[i], order[i],
+                                                     apriori=apriori_list[i],
+                                                     sel_opts=sel_opts, sampling=sampling, time_dim=time_dim,
+                                                     window=window,
+                                                     var_dim=var_dim, plot_index=i, padlen_factor=padlen_factor,
+                                                     minimum_length=_minimum_length, new_dim=new_dim,
+                                                     plot_dates=plot_dates)
 
             logging.info(f"{plot_name}: finished clim_filter calculation")
             if minimum_length is None:
@@ -119,6 +125,8 @@ class ClimateFIRFilter:
                 filtered.append(fi.sel({new_dim: slice(-minimum_length, 0)}))
             h.append(hi)
             gc.collect()
+            self.plot_data.append(plot_data)
+            plot_dates = {e["t0"] for e in plot_data}
 
             # calculate residuum
             logging.info(f"{plot_name}: calculate residuum")
@@ -158,6 +166,9 @@ class ClimateFIRFilter:
         self._h = h
         self._apriori = apriori_list
 
+        # visualize
+        self._plot(sampling)
+
     @staticmethod
     def _minimum_length(order, minimum_length, pos):
         next_order = 0
@@ -510,7 +521,7 @@ class ClimateFIRFilter:
     def clim_filter_vectorized_less_memory(self, data, fs, cutoff_high, order, apriori=None, padlen_factor=0.5,
                                            sel_opts=None,
                                            sampling="1d", time_dim="datetime", var_dim="variables", window="hamming",
-                                           plot_index=None, minimum_length=None, new_dim="window"):
+                                           plot_index=None, minimum_length=None, new_dim="window", plot_dates=None):
 
         logging.info(f"{data.coords['Stations'].values[0]}: extend apriori")
 
@@ -533,8 +544,10 @@ class ClimateFIRFilter:
 
         # collect some data for visualization
         plot_pos = np.array([0.25, 1.5, 2.75, 4]) * 365 * fs
-        plot_dates = [data.isel({time_dim: int(pos)}).coords[time_dim].values for pos in plot_pos if
-                      pos < len(data.coords[time_dim])]
+        if plot_dates is None:
+            plot_dates = [data.isel({time_dim: int(pos)}).coords[time_dim].values for pos in plot_pos if
+                          pos < len(data.coords[time_dim])]
+        plot_data = []
 
         coll = []
 
@@ -542,9 +555,6 @@ class ClimateFIRFilter:
             # self._tmp_analysis(data, apriori, var, var_dim, length, time_dim, new_dim, h)
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): sel data")
 
-            # empty plot data collection
-            plot_data = []
-
             _start = pd.to_datetime(data.coords[time_dim].min().values).year
             _end = pd.to_datetime(data.coords[time_dim].max().values).year
             filt_coll = []
@@ -579,14 +589,6 @@ class ClimateFIRFilter:
                     continue
                 if len(filter_input_data.coords[time_dim]) == 0:  # no valid data for this year
                     continue
-                # filter_input_data = history.combine_first(future)
-                # history.sel(datetime=slice("2010-11-01", "2011-04-01"),variables="o3").plot()
-                # filter_input_data.sel(datetime=slice("2009-11-01", "2011-04-01"),variables="temp").plot()
-                # ToDo: remove all other filt methods, only keep the convolve one
-                # time_axis = filter_input_data.coords[time_dim]
-                # # apply vectorized fir filter along the tmp dimension
-                # kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
-                #           "causal": False, "padlen": int(min(padlen_factor, 1) * length), "h": h}
 
                 logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
                 with TimeTracking(name="convolve"):
@@ -604,37 +606,35 @@ class ClimateFIRFilter:
                     filt_coll.append(filt.sel({new_dim: slice(-minimum_length, 0)}, drop=True))
 
                 # visualization
-                # ToDo: move this code part into a separate plot method that is called on the fly, not afterwards
-                # just leave a call self.plot(*args) here!
-                # for viz_date in set(plot_dates).intersection(filt.coords[time_dim].values):
-                #     td_type = {"1d": "D", "1H": "h"}.get(sampling)
-                #     t_minus = viz_date + np.timedelta64(int(-extend_length_history), td_type)
-                #     t_plus = viz_date + np.timedelta64(int(extend_length_future), td_type)
-                #     if new_dim not in d.coords:
-                #         tmp_filter_data = self._shift_data(d.sel({time_dim: slice(t_minus, t_plus)}),
-                #                                            range(int(-extend_length_history),
-                #                                                  int(extend_length_future)),
-                #                                            time_dim, var_dim, new_dim)
-                #     else:
-                #         tmp_filter_data = d.sel({new_dim: slice(int(-extend_length_history), int(extend_length_future))})
-                #     tmp_filt_nc = xr.apply_ufunc(fir_filter_convolve_vectorized,
-                #                                  tmp_filter_data.sel({time_dim: viz_date}),
-                #                                  input_core_dims=[[new_dim]],
-                #                                  output_core_dims=[[new_dim]],
-                #                                  vectorize=True,
-                #                                  kwargs={"h": h},
-                #                                  output_dtypes=[d.dtype])
-                #
-                #     valid_range = range(int((length + 1) / 2) if minimum_length is None else minimum_length, 1)
-                #     plot_data.append({"t0": viz_date,
-                #                       "filt": filt.sel({time_dim: viz_date}),
-                #                       "filter_input": filter_input_data.sel({time_dim: viz_date}),
-                #                       "filt_nc": tmp_filt_nc,
-                #                       "valid_range": valid_range})
-
-            # select only values at tmp dimension 0 at each point in time
-            # coll.append(filt.sel({new_dim: 0}, drop=True))
-            # coll.append(filt.sel({new_dim: slice(-extend_length, 0)}, drop=True))
+                for viz_date in set(plot_dates).intersection(filt.coords[time_dim].values):
+                    try:
+                        td_type = {"1d": "D", "1H": "h"}.get(sampling)
+                        t_minus = viz_date + np.timedelta64(int(-extend_length_history), td_type)
+                        t_plus = viz_date + np.timedelta64(int(extend_length_future), td_type)
+                        if new_dim not in d.coords:
+                            tmp_filter_data = self._shift_data(d.sel({time_dim: slice(t_minus, t_plus)}),
+                                                               range(int(-extend_length_history),
+                                                                     int(extend_length_future)),
+                                                               time_dim, var_dim, new_dim).sel({time_dim: viz_date})
+                        else:
+                            # tmp_filter_data = d.sel({time_dim: viz_date,
+                            #                          new_dim: slice(int(-extend_length_history), int(extend_length_future))})
+                            tmp_filter_data = None
+                        valid_range = range(int((length + 1) / 2) if minimum_length is None else minimum_length, 1)
+                        plot_data.append({"t0": viz_date,
+                                          "var": var,
+                                          "filter_input": filter_input_data.sel({time_dim: viz_date}),
+                                          "filter_input_nc": tmp_filter_data,
+                                          "valid_range": valid_range,
+                                          "time_range": d.sel(
+                                              {time_dim: slice(t_minus, t_plus - np.timedelta64(1, td_type))}).coords[
+                                              time_dim].values,
+                                          "h": h,
+                                          "new_dim": new_dim})
+                    except:
+                        pass
+
+            # collect all filter results
             coll.append(xr.concat(filt_coll, time_dim))
             gc.collect()
 
@@ -671,7 +671,7 @@ class ClimateFIRFilter:
         # res_full.loc[res.coords] = res
         # res_full.compute()
         res_full = res.broadcast_like(xr.DataArray(dims=dims, coords=new_coords))
-        return res_full, h, apriori
+        return res_full, h, apriori, plot_data
 
     @staticmethod
     def _create_time_range_extend(year, sampling, extend_length):
@@ -708,6 +708,137 @@ class ClimateFIRFilter:
         res.name = index_name
         return res
 
+    def _plot(self, sampling):
+        new_dim = "window"
+        h = None
+        td_type = {"1d": "D", "1H": "h"}.get(sampling)
+        if self.plot_path is None:
+            return
+        plot_folder = os.path.join(os.path.abspath(self.plot_path), "climFIR")
+        if not os.path.exists(plot_folder):
+            os.makedirs(plot_folder)
+
+        rc_params = {'axes.labelsize': 'large',
+                     'xtick.labelsize': 'large',
+                     'ytick.labelsize': 'large',
+                     'legend.fontsize': 'large',
+                     'axes.titlesize': 'large',
+                     }
+        plt.rcParams.update(rc_params)
+
+        plot_dict = {}
+        for i, o in enumerate(range(len(self.plot_data))):
+            plot_data = self.plot_data[i]
+            for p_d in plot_data:
+                var = p_d.get("var")
+                t0 = p_d.get("t0")
+                filter_input = p_d.get("filter_input")
+                filter_input_nc = p_d.get("filter_input_nc")
+                valid_range = p_d.get("valid_range")
+                time_range = p_d.get("time_range")
+                new_dim = p_d.get("new_dim")
+                h = p_d.get("h")
+                plot_dict_var = plot_dict.get(var, {})
+                plot_dict_t0 = plot_dict_var.get(t0, {})
+                plot_dict_order = {"filter_input": filter_input,
+                                   "filter_input_nc": filter_input_nc,
+                                   "valid_range": valid_range,
+                                   "time_range": time_range,
+                                   "order": o, "h": h}
+                plot_dict_t0[i] = plot_dict_order
+                plot_dict_var[t0] = plot_dict_t0
+                plot_dict[var] = plot_dict_var
+
+        for var, viz_date_dict in plot_dict.items():
+            for it0, t0 in enumerate(viz_date_dict.keys()):
+                viz_data = viz_date_dict[t0]
+                residuum_true = None
+                for ifilter in sorted(viz_data.keys()):
+                    data = viz_data[ifilter]
+                    filter_input = data["filter_input"]
+                    filter_input_nc = data["filter_input_nc"] if residuum_true is None else residuum_true.sel(
+                        {new_dim: filter_input.coords[new_dim]})
+                    valid_range = data["valid_range"]
+                    time_axis = data["time_range"]
+                    # time_axis = pd.date_range(t_minus, t_plus, freq=sampling)
+                    filter_order = data["order"]
+                    h = data["h"]
+                    t_minus = t0 + np.timedelta64(-int(1.5 * valid_range.start), td_type)
+                    t_plus = t0 + np.timedelta64(int(0.5 * valid_range.start), td_type)
+                    fig, ax = plt.subplots()
+                    ax.axvspan(t0 + np.timedelta64(-valid_range.start, td_type),
+                               t0 + np.timedelta64(valid_range.stop - 1, td_type), color="whitesmoke",
+                               label="valid area")
+                    ax.axvline(t0, color="lightgrey", lw=6, label="time of interest ($t_0$)")
+
+                    # original data
+                    ax.plot(time_axis, filter_input_nc.values.flatten(), color="darkgrey", linestyle="dashed",
+                            label="original")
+
+                    # clim apriori
+                    if ifilter == 0:
+                        d_tmp = filter_input.sel(
+                            {new_dim: slice(0, filter_input.coords[new_dim].values.max())}).values.flatten()
+                    else:
+                        d_tmp = filter_input.values.flatten()
+                    ax.plot(time_axis[len(time_axis) - len(d_tmp):], d_tmp, color="darkgrey", linestyle="solid",
+                            label="estimated future")
+
+                    # clim filter response
+                    filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input,
+                                          input_core_dims=[[new_dim]],
+                                          output_core_dims=[[new_dim]],
+                                          vectorize=True,
+                                          kwargs={"h": h},
+                                          output_dtypes=[filter_input.dtype])
+                    ax.plot(time_axis, filt.values.flatten(), color="black", linestyle="solid",
+                            label="clim filter response", linewidth=2)
+                    residuum_estimated = filter_input - filt
+
+                    # ideal filter response
+                    filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_nc,
+                                          input_core_dims=[[new_dim]],
+                                          output_core_dims=[[new_dim]],
+                                          vectorize=True,
+                                          kwargs={"h": h},
+                                          output_dtypes=[filter_input.dtype])
+                    ax.plot(time_axis, filt.values.flatten(), color="black", linestyle="dashed",
+                            label="ideal filter response", linewidth=2)
+                    residuum_true = filter_input_nc - filt
+
+                    # set title, legend, and save plot
+                    ax_start = max(t_minus, time_axis[0])
+                    ax_end = min(t_plus, time_axis[-1])
+                    ax.set_xlim((ax_start, ax_end))
+                    plt.title(f"Input of ClimFilter ({str(var)})")
+                    plt.legend()
+                    fig.autofmt_xdate()
+                    plt.tight_layout()
+                    plot_name = os.path.join(plot_folder,
+                                             f"climFIR_{self.plot_name}_{str(var)}_{it0}_{ifilter}.pdf")
+                    plt.savefig(plot_name, dpi=300)
+                    plt.close('all')
+
+                    # plot residuum
+                    fig, ax = plt.subplots()
+                    ax.axvspan(t0 + np.timedelta64(-valid_range.start, td_type),
+                               t0 + np.timedelta64(valid_range.stop - 1, td_type), color="whitesmoke",
+                               label="valid area")
+                    ax.axvline(t0, color="lightgrey", lw=6, label="time of interest ($t_0$)")
+                    ax.plot(time_axis, residuum_true.values.flatten(), color="black", linestyle="dashed",
+                            label="ideal filter residuum", linewidth=2)
+                    ax.plot(time_axis, residuum_estimated.values.flatten(), color="black", linestyle="solid",
+                            label="clim filter residuum", linewidth=2)
+                    ax.set_xlim((ax_start, ax_end))
+                    plt.title(f"Residuum of ClimFilter ({str(var)})")
+                    plt.legend()
+                    fig.autofmt_xdate()
+                    plt.tight_layout()
+                    plot_name = os.path.join(plot_folder,
+                                             f"climFIR_{self.plot_name}_{str(var)}_{it0}_{ifilter}_residuum.pdf")
+                    plt.savefig(plot_name, dpi=300)
+                    plt.close('all')
+
     def plot_new(self, viz_data, orig_data, var_dim, time_dim, new_dim, plot_index, sampling):
         try:
             td_type = {"1d": "D", "1H": "h"}.get(sampling)
diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 009ff060..ff06f075 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -20,7 +20,8 @@ class FCN(AbstractModelClass):
                    "sigmoid": partial(keras.layers.Activation, "sigmoid"),
                    "linear": partial(keras.layers.Activation, "linear"),
                    "selu": partial(keras.layers.Activation, "selu"),
-                   "prelu": partial(keras.layers.PReLU, alpha_initializer=keras.initializers.constant(value=0.25))}
+                   "prelu": partial(keras.layers.PReLU, alpha_initializer=keras.initializers.constant(value=0.25)),
+                   "leakyrelu": partial(keras.layers.LeakyReLU)}
     _initializer = {"tanh": "glorot_uniform", "sigmoid": "glorot_uniform", "linear": "glorot_uniform",
                     "relu": keras.initializers.he_normal(), "selu": keras.initializers.lecun_normal(),
                     "prelu": keras.initializers.he_normal()}
@@ -31,12 +32,31 @@ class FCN(AbstractModelClass):
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
                  optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None,
-                 **kwargs):
+                 batch_normalization=False, **kwargs):
         """
         Sets model and loss depending on the given arguments.
 
         :param input_shape: list of input shapes (expect len=1 with shape=(window_hist, station, variables))
         :param output_shape: list of output shapes (expect len=1 with shape=(window_forecast))
+
+        Customize this FCN model via the following parameters:
+
+        :param activation: set your desired activation function. Chose from relu, tanh, sigmoid, linear, selu, prelu,
+            leakyrelu. (Default relu)
+        :param activation_output: same as activation parameter but exclusively applied on output layer only. (Default
+            linear)
+        :param optimizer: set optimizer method. Can be either adam or sgd. (Default adam)
+        :param n_layer: define number of hidden layers in the network. Given number of hidden neurons are used in each
+            layer. (Default 1)
+        :param n_hidden: define number of hidden units per layer. This number is used in each hidden layer. (Default 10)
+        :param layer_configuration: alternative formulation of the network's architecture. This will overwrite the
+            settings from n_layer and n_hidden. Provide a list where each element represent the number of units in the
+            hidden layer. The number of hidden layers is equal to the total length of this list.
+        :param dropout: use dropout with given rate. If no value is provided, dropout layers are not added to the
+            network at all. (Default None)
+        :param batch_normalization: use batch normalization layer in the network if enabled. These layers are inserted
+            between the linear part of a layer (the nn part) and the non-linear part (activation function). No BN layer
+            is added if set to false. (Default false)
         """
 
         assert len(input_shape) == 1
@@ -49,6 +69,7 @@ class FCN(AbstractModelClass):
         self.activation_output = self._set_activation(activation_output)
         self.activation_output_name = activation_output
         self.optimizer = self._set_optimizer(optimizer, **kwargs)
+        self.bn = batch_normalization
         self.layer_configuration = (n_layer, n_hidden) if layer_configuration is None else layer_configuration
         self._update_model_name()
         self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
@@ -115,27 +136,29 @@ class FCN(AbstractModelClass):
         """
         Build the model.
         """
-        x_input = keras.layers.Input(shape=self._input_shape)
-        x_in = keras.layers.Flatten()(x_input)
         if isinstance(self.layer_configuration, tuple) is True:
             n_layer, n_hidden = self.layer_configuration
-            for layer in range(n_layer):
-                x_in = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer,
-                                          kernel_regularizer=self.kernel_regularizer)(x_in)
-                x_in = self.activation(name=f"{self.activation_name}_{layer + 1}")(x_in)
-                if self.dropout is not None:
-                    x_in = self.dropout(self.dropout_rate)(x_in)
+            conf = [n_hidden for _ in range(n_layer)]
         else:
             assert isinstance(self.layer_configuration, list) is True
-            for layer, n_hidden in enumerate(self.layer_configuration):
-                x_in = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer,
-                                          kernel_regularizer=self.kernel_regularizer)(x_in)
-                x_in = self.activation(name=f"{self.activation_name}_{layer + 1}")(x_in)
-                if self.dropout is not None:
-                    x_in = self.dropout(self.dropout_rate)(x_in)
+            conf = self.layer_configuration
+
+        x_input = keras.layers.Input(shape=self._input_shape)
+        x_in = keras.layers.Flatten()(x_input)
+
+        for layer, n_hidden in enumerate(conf):
+            x_in = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer,
+                                      kernel_regularizer=self.kernel_regularizer)(x_in)
+            if self.bn is True:
+                x_in = keras.layers.BatchNormalization()(x_in)
+            x_in = self.activation(name=f"{self.activation_name}_{layer + 1}")(x_in)
+            if self.dropout is not None:
+                x_in = self.dropout(self.dropout_rate)(x_in)
+
         x_in = keras.layers.Dense(self._output_shape)(x_in)
         out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in)
         self.model = keras.Model(inputs=x_input, outputs=[out])
+        print(self.model.summary())
 
     def set_compile_options(self):
         self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss])],
diff --git a/mlair/model_modules/recurrent_networks.py b/mlair/model_modules/recurrent_networks.py
index 953749c3..55a3d585 100644
--- a/mlair/model_modules/recurrent_networks.py
+++ b/mlair/model_modules/recurrent_networks.py
@@ -19,7 +19,8 @@ class RNN(AbstractModelClass):
                    "sigmoid": partial(keras.layers.Activation, "sigmoid"),
                    "linear": partial(keras.layers.Activation, "linear"),
                    "selu": partial(keras.layers.Activation, "selu"),
-                   "prelu": partial(keras.layers.PReLU, alpha_initializer=keras.initializers.constant(value=0.25))}
+                   "prelu": partial(keras.layers.PReLU, alpha_initializer=keras.initializers.constant(value=0.25)),
+                   "leakyrelu": partial(keras.layers.LeakyReLU)}
     _initializer = {"tanh": "glorot_uniform", "sigmoid": "glorot_uniform", "linear": "glorot_uniform",
                     "relu": keras.initializers.he_normal(), "selu": keras.initializers.lecun_normal(),
                     "prelu": keras.initializers.he_normal()}
@@ -27,15 +28,37 @@ class RNN(AbstractModelClass):
     _regularizer = {"l1": keras.regularizers.l1, "l2": keras.regularizers.l2, "l1_l2": keras.regularizers.l1_l2}
     _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov", "l1", "l2"]
     _dropout = {"selu": keras.layers.AlphaDropout}
+    _rnn = {"lstm": keras.layers.LSTM, "gru": keras.layers.GRU}
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
                  optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None,
-                 **kwargs):
+                 batch_normalization=False, rnn_type="lstm", **kwargs):
         """
         Sets model and loss depending on the given arguments.
 
         :param input_shape: list of input shapes (expect len=1 with shape=(window_hist, station, variables))
         :param output_shape: list of output shapes (expect len=1 with shape=(window_forecast))
+
+        Customize this RNN model via the following parameters:
+
+        :param activation: set your desired activation function. Chose from relu, tanh, sigmoid, linear, selu, prelu,
+            leakyrelu. (Default relu)
+        :param activation_output: same as activation parameter but exclusively applied on output layer only. (Default
+            linear)
+        :param optimizer: set optimizer method. Can be either adam or sgd. (Default adam)
+        :param n_layer: define number of hidden layers in the network. Given number of hidden neurons are used in each
+            layer. (Default 1)
+        :param n_hidden: define number of hidden units per layer. This number is used in each hidden layer. (Default 10)
+        :param layer_configuration: alternative formulation of the network's architecture. This will overwrite the
+            settings from n_layer and n_hidden. Provide a list where each element represent the number of units in the
+            hidden layer. The number of hidden layers is equal to the total length of this list.
+        :param dropout: use dropout with given rate. If no value is provided, dropout layers are not added to the
+            network at all. (Default None)
+        :param batch_normalization: use batch normalization layer in the network if enabled. These layers are inserted
+            between the linear part of a layer (the nn part) and the non-linear part (activation function). No BN layer
+            is added if set to false. (Default false)
+        :param rnn_type: define which kind of recurrent network should be applied. Chose from either lstm or gru. All
+            units will be of this kind. (Default lstm)
         """
 
         assert len(input_shape) == 1
@@ -43,13 +66,15 @@ class RNN(AbstractModelClass):
         super().__init__(input_shape[0], output_shape[0])
 
         # settings
-        # self.activation = self._set_activation(activation)
-        # self.activation_name = activation
-        self.activation_output = self._set_activation(activation_output)
+        self.activation = self._set_activation(activation.lower())
+        self.activation_name = activation
+        self.activation_output = self._set_activation(activation_output.lower())
         self.activation_output_name = activation_output
-        self.optimizer = self._set_optimizer(optimizer, **kwargs)
-        # self.layer_configuration = (n_layer, n_hidden) if layer_configuration is None else layer_configuration
-        # self._update_model_name()
+        self.optimizer = self._set_optimizer(optimizer.lower(), **kwargs)
+        self.bn = batch_normalization
+        self.layer_configuration = (n_layer, n_hidden) if layer_configuration is None else layer_configuration
+        self.RNN = self._rnn.get(rnn_type.lower())
+        self._update_model_name(rnn_type)
         # self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
         # self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
         self.dropout, self.dropout_rate = self._set_dropout(activation, dropout)
@@ -63,29 +88,40 @@ class RNN(AbstractModelClass):
         """
         Build the model.
         """
+        if isinstance(self.layer_configuration, tuple) is True:
+            n_layer, n_hidden = self.layer_configuration
+            conf = [n_hidden for _ in range(n_layer)]
+        else:
+            assert isinstance(self.layer_configuration, list) is True
+            conf = self.layer_configuration
+
         x_input = keras.layers.Input(shape=self._input_shape)
         x_in = keras.layers.Reshape((self._input_shape[0], reduce((lambda x, y: x * y), self._input_shape[1:])))(
             x_input)
-        x_in = keras.layers.LSTM(32, return_sequences=True)(x_in)
-        if self.dropout is not None:
-            x_in = self.dropout(self.dropout_rate)(x_in)
-        x_in = keras.layers.LSTM(8)(x_in)
-        if self.dropout is not None:
-            x_in = self.dropout(self.dropout_rate)(x_in)
-        out = keras.layers.Dense(self._output_shape)(x_in)
+
+        for layer, n_hidden in enumerate(conf):
+            return_sequences = (layer < len(conf) - 1)
+            x_in = self.RNN(n_hidden, return_sequences=return_sequences)(x_in)
+            if self.bn is True:
+                x_in = keras.layers.BatchNormalization()(x_in)
+            x_in = self.activation(name=f"{self.activation_name}_{layer + 1}")(x_in)
+            if self.dropout is not None:
+                x_in = self.dropout(self.dropout_rate)(x_in)
+
+        x_in = keras.layers.Dense(self._output_shape)(x_in)
+        out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in)
         self.model = keras.Model(inputs=x_input, outputs=[out])
         print(self.model.summary())
 
-        # x_input = keras.layers.Input(shape=self._input_shape)
-        # x_in = keras.layers.Reshape((self._input_shape[0], reduce((lambda x, y: x * y), self._input_shape[1:])))(
-        #     x_input)
         # x_in = keras.layers.LSTM(32)(x_in)
+        # if self.dropout is not None:
+        #     x_in = self.dropout(self.dropout_rate)(x_in)
         # x_in = keras.layers.RepeatVector(self._output_shape)(x_in)
         # x_in = keras.layers.LSTM(32, return_sequences=True)(x_in)
+        # if self.dropout is not None:
+        #     x_in = self.dropout(self.dropout_rate)(x_in)
         # out = keras.layers.TimeDistributed(keras.layers.Dense(1))(x_in)
         # out = keras.layers.Flatten()(out)
-        # self.model = keras.Model(inputs=x_input, outputs=[out])
-        # print(self.model.summary())
 
     def _set_dropout(self, activation, dropout_rate):
         if dropout_rate is None:
@@ -134,13 +170,12 @@ class RNN(AbstractModelClass):
     #         raise AttributeError(f"Given regularizer {regularizer} is not supported in this model class.")
     #
 
-    #
-    # def _update_model_name(self):
-    #     n_input = str(reduce(lambda x, y: x * y, self._input_shape))
-    #     n_output = str(self._output_shape)
-    #     if isinstance(self.layer_configuration, tuple) and len(self.layer_configuration) == 2:
-    #         n_layer, n_hidden = self.layer_configuration
-    #         self.model_name += "_".join(["", n_input, *[f"{n_hidden}" for _ in range(n_layer)], n_output])
-    #     else:
-    #         self.model_name += "_".join(["", n_input, *[f"{n}" for n in self.layer_configuration], n_output])
-    #
+    def _update_model_name(self, rnn_type):
+        n_input = str(reduce(lambda x, y: x * y, self._input_shape))
+        n_output = str(self._output_shape)
+        self.model_name = rnn_type.upper()
+        if isinstance(self.layer_configuration, tuple) and len(self.layer_configuration) == 2:
+            n_layer, n_hidden = self.layer_configuration
+            self.model_name += "_".join(["", n_input, *[f"{n_hidden}" for _ in range(n_layer)], n_output])
+        else:
+            self.model_name += "_".join(["", n_input, *[f"{n}" for n in self.layer_configuration], n_output])
-- 
GitLab


From 24808a64f4ae1d1cadb5237b2a19ead86233a445 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 26 May 2021 14:23:13 +0200
Subject: [PATCH 138/175] use mse loss for rnn

---
 mlair/model_modules/recurrent_networks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/model_modules/recurrent_networks.py b/mlair/model_modules/recurrent_networks.py
index 55a3d585..7adc9111 100644
--- a/mlair/model_modules/recurrent_networks.py
+++ b/mlair/model_modules/recurrent_networks.py
@@ -136,7 +136,7 @@ class RNN(AbstractModelClass):
             raise AttributeError(f"Given activation {activation} is not supported in this model class.")
 
     def set_compile_options(self):
-        self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss])],
+        self.compile_options = {"loss": [keras.losses.mean_squared_error],
                                 "metrics": ["mse", "mae", var_loss]}
 
     def _set_optimizer(self, optimizer, **kwargs):
-- 
GitLab


From fd5fe608fb563d38d2891ed2b0febabc96d00067 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 26 May 2021 18:38:30 +0200
Subject: [PATCH 139/175] new method to calculate seasonal diurnal anomalies

---
 mlair/helpers/filter.py | 57 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 041e63c9..9662122b 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -85,8 +85,11 @@ class ClimateFIRFilter:
         sampling = {1: "1d", 24: "1H"}.get(int(fs))
         logging.info(f"{plot_name}: create diurnal_anomalies")
         if apriori_diurnal is True and sampling == "1H":
-            diurnal_anomalies = self.create_hourly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim,
-                                                        as_anomaly=True)
+            # diurnal_anomalies = self.create_hourly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim,
+            #                                             as_anomaly=True)
+            diurnal_anomalies = self.create_seasonal_hourly_mean(data, sel_opts=sel_opts, sampling=sampling,
+                                                                 time_dim=time_dim,
+                                                                 as_anomaly=True)
         else:
             diurnal_anomalies = 0
         logging.info(f"{plot_name}: create monthly apriori")
@@ -140,9 +143,12 @@ class ClimateFIRFilter:
             if len(apriori_list) <= i + 1:
                 logging.info(f"{plot_name}: create diurnal_anomalies")
                 if apriori_diurnal is True and sampling == "1H":
-                    diurnal_anomalies = self.create_hourly_mean(input_data.sel({new_dim: 0}, drop=True),
-                                                                sel_opts=sel_opts, sampling=sampling,
-                                                                time_dim=time_dim, as_anomaly=True)
+                    # diurnal_anomalies = self.create_hourly_mean(input_data.sel({new_dim: 0}, drop=True),
+                    #                                             sel_opts=sel_opts, sampling=sampling,
+                    #                                             time_dim=time_dim, as_anomaly=True)
+                    diurnal_anomalies = self.create_seasonal_hourly_mean(input_data.sel({new_dim: 0}, drop=True),
+                                                                         sel_opts=sel_opts, sampling=sampling,
+                                                                         time_dim=time_dim, as_anomaly=True)
                 else:
                     diurnal_anomalies = 0
                 logging.info(f"{plot_name}: create monthly apriori")
@@ -242,6 +248,43 @@ class ClimateFIRFilter:
             hourly.loc[{f"{time_dim}": loc}] = hourly_mean.sel(hour=hour)
         return hourly
 
+    def create_seasonal_hourly_mean(self, data, sel_opts=None, sampling="1H", time_dim="datetime", as_anomaly=True):
+        """Calculate hourly statistics. Either the absolute value or the anomaly (as_anomaly=True)."""
+        # can only be used for hourly sampling rate
+        assert sampling == "1H"
+
+        # apply selection if given (only use subset for seasonal hourly means)
+        if sel_opts is not None:
+            data = data.sel(**sel_opts)
+
+        # create unity xarray in monthly resolution with sampling point in mid of each month
+        monthly = self.create_unity_array(data, time_dim) * np.nan
+
+        seasonal_hourly_means = {}
+
+        for month in data.groupby(f"{time_dim}.month").groups.keys():
+            # select each month
+            single_month_data = data.sel({time_dim: (data[f"{time_dim}.month"] == month)})
+            hourly_mean = single_month_data.groupby(f"{time_dim}.hour").mean()
+            if as_anomaly is True:
+                hourly_mean = hourly_mean - hourly_mean.mean("hour")
+            seasonal_hourly_means[month] = hourly_mean
+
+        seasonal_coll = []
+        for hour in data.groupby(f"{time_dim}.hour").groups.keys():
+            h_coll = monthly.__deepcopy__()
+            for month in seasonal_hourly_means.keys():
+                hourly_mean_single_month = seasonal_hourly_means[month].sel(hour=hour, drop=True)
+                h_coll = xr.where((h_coll[f"{time_dim}.month"] == month),
+                                  hourly_mean_single_month,
+                                  h_coll)
+            h_coll = h_coll.resample({time_dim: sampling}).interpolate()
+            h_coll = h_coll.sel({time_dim: (h_coll[f"{time_dim}.hour"] == hour)})
+            seasonal_coll.append(h_coll)
+        hourly = xr.concat(seasonal_coll, time_dim).sortby(time_dim).resample({time_dim: sampling}).interpolate()
+
+        return hourly
+
     @staticmethod
     def extend_apriori(data, apriori, time_dim, sampling="1d"):
         """
@@ -721,7 +764,7 @@ class ClimateFIRFilter:
         rc_params = {'axes.labelsize': 'large',
                      'xtick.labelsize': 'large',
                      'ytick.labelsize': 'large',
-                     'legend.fontsize': 'large',
+                     'legend.fontsize': 'medium',
                      'axes.titlesize': 'large',
                      }
         plt.rcParams.update(rc_params)
@@ -831,7 +874,7 @@ class ClimateFIRFilter:
                             label="clim filter residuum", linewidth=2)
                     ax.set_xlim((ax_start, ax_end))
                     plt.title(f"Residuum of ClimFilter ({str(var)})")
-                    plt.legend()
+                    plt.legend(loc="upper left")
                     fig.autofmt_xdate()
                     plt.tight_layout()
                     plot_name = os.path.join(plot_folder,
-- 
GitLab


From 3a94611ecb5c13f0afd93baafdd66bd1c206a0fe Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 27 May 2021 11:16:27 +0200
Subject: [PATCH 140/175] add dense layer between rnn and output

---
 mlair/model_modules/recurrent_networks.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mlair/model_modules/recurrent_networks.py b/mlair/model_modules/recurrent_networks.py
index 7adc9111..ab28085b 100644
--- a/mlair/model_modules/recurrent_networks.py
+++ b/mlair/model_modules/recurrent_networks.py
@@ -32,7 +32,7 @@ class RNN(AbstractModelClass):
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
                  optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None,
-                 batch_normalization=False, rnn_type="lstm", **kwargs):
+                 batch_normalization=False, rnn_type="lstm", add_dense_layer=False, **kwargs):
         """
         Sets model and loss depending on the given arguments.
 
@@ -72,6 +72,7 @@ class RNN(AbstractModelClass):
         self.activation_output_name = activation_output
         self.optimizer = self._set_optimizer(optimizer.lower(), **kwargs)
         self.bn = batch_normalization
+        self.add_dense_layer = add_dense_layer
         self.layer_configuration = (n_layer, n_hidden) if layer_configuration is None else layer_configuration
         self.RNN = self._rnn.get(rnn_type.lower())
         self._update_model_name(rnn_type)
@@ -108,6 +109,9 @@ class RNN(AbstractModelClass):
             if self.dropout is not None:
                 x_in = self.dropout(self.dropout_rate)(x_in)
 
+        if self.add_dense_layer is True:
+            x_in = keras.layers.Dense(min(self._output_shape ** 2, conf[-1]), ame=f"Dense_{len(conf) + 1}")(x_in)
+            x_in = self.activation(name=f"{self.activation_name}_{len(conf) + 1}")(x_in)
         x_in = keras.layers.Dense(self._output_shape)(x_in)
         out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in)
         self.model = keras.Model(inputs=x_input, outputs=[out])
-- 
GitLab


From 97ca2fa32c46dda3c2125e9520c6afff5d0c6488 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 27 May 2021 11:29:27 +0200
Subject: [PATCH 141/175] fixed typo

---
 mlair/model_modules/recurrent_networks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/model_modules/recurrent_networks.py b/mlair/model_modules/recurrent_networks.py
index ab28085b..0861d416 100644
--- a/mlair/model_modules/recurrent_networks.py
+++ b/mlair/model_modules/recurrent_networks.py
@@ -110,7 +110,7 @@ class RNN(AbstractModelClass):
                 x_in = self.dropout(self.dropout_rate)(x_in)
 
         if self.add_dense_layer is True:
-            x_in = keras.layers.Dense(min(self._output_shape ** 2, conf[-1]), ame=f"Dense_{len(conf) + 1}")(x_in)
+            x_in = keras.layers.Dense(min(self._output_shape ** 2, conf[-1]), name=f"Dense_{len(conf) + 1}")(x_in)
             x_in = self.activation(name=f"{self.activation_name}_{len(conf) + 1}")(x_in)
         x_in = keras.layers.Dense(self._output_shape)(x_in)
         out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in)
-- 
GitLab


From 19e758cc206eae3956e3cdb2e9c86cf219d4d4cb Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 27 May 2021 11:47:52 +0200
Subject: [PATCH 142/175] split activation for rnn and dense layer part

---
 mlair/model_modules/recurrent_networks.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/mlair/model_modules/recurrent_networks.py b/mlair/model_modules/recurrent_networks.py
index 0861d416..cbe5d145 100644
--- a/mlair/model_modules/recurrent_networks.py
+++ b/mlair/model_modules/recurrent_networks.py
@@ -31,6 +31,7 @@ class RNN(AbstractModelClass):
     _rnn = {"lstm": keras.layers.LSTM, "gru": keras.layers.GRU}
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
+                 activation_rnn="tanh",
                  optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None,
                  batch_normalization=False, rnn_type="lstm", add_dense_layer=False, **kwargs):
         """
@@ -68,6 +69,8 @@ class RNN(AbstractModelClass):
         # settings
         self.activation = self._set_activation(activation.lower())
         self.activation_name = activation
+        self.activation_rnn = self._set_activation(activation_rnn.lower())
+        self.activation_rnn_name = activation
         self.activation_output = self._set_activation(activation_output.lower())
         self.activation_output_name = activation_output
         self.optimizer = self._set_optimizer(optimizer.lower(), **kwargs)
@@ -76,7 +79,7 @@ class RNN(AbstractModelClass):
         self.layer_configuration = (n_layer, n_hidden) if layer_configuration is None else layer_configuration
         self.RNN = self._rnn.get(rnn_type.lower())
         self._update_model_name(rnn_type)
-        # self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
+        self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
         # self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
         self.dropout, self.dropout_rate = self._set_dropout(activation, dropout)
 
@@ -105,12 +108,13 @@ class RNN(AbstractModelClass):
             x_in = self.RNN(n_hidden, return_sequences=return_sequences)(x_in)
             if self.bn is True:
                 x_in = keras.layers.BatchNormalization()(x_in)
-            x_in = self.activation(name=f"{self.activation_name}_{layer + 1}")(x_in)
+            x_in = self.activation_rnn(name=f"{self.activation_rnn_name}_{layer + 1}")(x_in)
             if self.dropout is not None:
                 x_in = self.dropout(self.dropout_rate)(x_in)
 
         if self.add_dense_layer is True:
-            x_in = keras.layers.Dense(min(self._output_shape ** 2, conf[-1]), name=f"Dense_{len(conf) + 1}")(x_in)
+            x_in = keras.layers.Dense(min(self._output_shape ** 2, conf[-1]), name=f"Dense_{len(conf) + 1}",
+                                      kernel_initializer=self.kernel_initializer, )(x_in)
             x_in = self.activation(name=f"{self.activation_name}_{len(conf) + 1}")(x_in)
         x_in = keras.layers.Dense(self._output_shape)(x_in)
         out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in)
@@ -172,7 +176,6 @@ class RNN(AbstractModelClass):
     #         return reg(**reg_kwargs)
     #     except KeyError:
     #         raise AttributeError(f"Given regularizer {regularizer} is not supported in this model class.")
-    #
 
     def _update_model_name(self, rnn_type):
         n_input = str(reduce(lambda x, y: x * y, self._input_shape))
-- 
GitLab


From e872a3888cb1135672c0e1d334a0fc3944907907 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 27 May 2021 12:14:26 +0200
Subject: [PATCH 143/175] added new parameter dropout_rnn

---
 mlair/model_modules/recurrent_networks.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/mlair/model_modules/recurrent_networks.py b/mlair/model_modules/recurrent_networks.py
index cbe5d145..95c48bc8 100644
--- a/mlair/model_modules/recurrent_networks.py
+++ b/mlair/model_modules/recurrent_networks.py
@@ -31,7 +31,7 @@ class RNN(AbstractModelClass):
     _rnn = {"lstm": keras.layers.LSTM, "gru": keras.layers.GRU}
 
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
-                 activation_rnn="tanh",
+                 activation_rnn="tanh", dropout_rnn=0,
                  optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None,
                  batch_normalization=False, rnn_type="lstm", add_dense_layer=False, **kwargs):
         """
@@ -42,8 +42,10 @@ class RNN(AbstractModelClass):
 
         Customize this RNN model via the following parameters:
 
-        :param activation: set your desired activation function. Chose from relu, tanh, sigmoid, linear, selu, prelu,
-            leakyrelu. (Default relu)
+        :param activation: set your desired activation function for appended dense layers (add_dense_layer=True=. Choose
+            from relu, tanh, sigmoid, linear, selu, prelu, leakyrelu. (Default relu)
+        :param activation_rnn: set your desired activation function of the rnn output. Choose from relu, tanh, sigmoid,
+            linear, selu, prelu, leakyrelu. (Default tanh)
         :param activation_output: same as activation parameter but exclusively applied on output layer only. (Default
             linear)
         :param optimizer: set optimizer method. Can be either adam or sgd. (Default adam)
@@ -55,6 +57,8 @@ class RNN(AbstractModelClass):
             hidden layer. The number of hidden layers is equal to the total length of this list.
         :param dropout: use dropout with given rate. If no value is provided, dropout layers are not added to the
             network at all. (Default None)
+        :param dropout_rnn: use recurrent dropout with given rate. This is applied along the recursion and not after
+            a rnn layer. (Default 0)
         :param batch_normalization: use batch normalization layer in the network if enabled. These layers are inserted
             between the linear part of a layer (the nn part) and the non-linear part (activation function). No BN layer
             is added if set to false. (Default false)
@@ -82,6 +86,8 @@ class RNN(AbstractModelClass):
         self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
         # self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
         self.dropout, self.dropout_rate = self._set_dropout(activation, dropout)
+        assert 0 <= dropout_rnn <= 1
+        self.dropout_rnn = dropout_rnn
 
         # apply to model
         self.set_model()
@@ -105,7 +111,7 @@ class RNN(AbstractModelClass):
 
         for layer, n_hidden in enumerate(conf):
             return_sequences = (layer < len(conf) - 1)
-            x_in = self.RNN(n_hidden, return_sequences=return_sequences)(x_in)
+            x_in = self.RNN(n_hidden, return_sequences=return_sequences, recurrent_dropout=self.dropout_rnn)(x_in)
             if self.bn is True:
                 x_in = keras.layers.BatchNormalization()(x_in)
             x_in = self.activation_rnn(name=f"{self.activation_rnn_name}_{layer + 1}")(x_in)
-- 
GitLab


From a6cd5f5cef34984e3d9f0dbd5d03579e3c293dd4 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 28 May 2021 10:06:49 +0200
Subject: [PATCH 144/175] all filter data handlers can now return either filter
 portions as dimension or distinct branches

---
 .../data_handler_mixed_sampling.py            | 16 +++--
 .../data_handler/data_handler_with_filter.py  | 66 ++++++++++++++++---
 2 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 7446d005..62a354a2 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -4,6 +4,8 @@ __date__ = '2020-11-05'
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
 from mlair.data_handler.data_handler_with_filter import DataHandlerKzFilterSingleStation, \
     DataHandlerFirFilterSingleStation, DataHandlerFilterSingleStation, DataHandlerClimateFirFilterSingleStation
+from mlair.data_handler.data_handler_with_filter import DataHandlerClimateFirFilter, DataHandlerFirFilter, \
+    DataHandlerKzFilter
 from mlair.data_handler import DefaultDataHandler
 from mlair import helpers
 from mlair.helpers import remove_items
@@ -176,11 +178,12 @@ class DataHandlerMixedSamplingWithKzFilterSingleStation(DataHandlerMixedSampling
         return int(self.kz_filter_length[0] * np.sqrt(self.kz_filter_iter[0]) * 2)
 
     def _extract_lazy(self, lazy_data):
-        _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
+        _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days, \
+        self.filter_dim_order = lazy_data
         super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data))
 
 
-class DataHandlerMixedSamplingWithKzFilter(DefaultDataHandler):
+class DataHandlerMixedSamplingWithKzFilter(DataHandlerKzFilter):
     """Data handler using mixed sampling for input and target. Inputs are temporal filtered."""
 
     data_handler = DataHandlerMixedSamplingWithKzFilterSingleStation
@@ -199,7 +202,7 @@ class DataHandlerMixedSamplingWithFirFilterSingleStation(DataHandlerMixedSamplin
         return max(self.filter_order)
 
     def _extract_lazy(self, lazy_data):
-        _data, _meta, _input_data, _target_data, self.fir_coeff = lazy_data
+        _data, _meta, _input_data, _target_data, self.fir_coeff, self.filter_dim_order = lazy_data
         super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data))
 
     @staticmethod
@@ -214,7 +217,7 @@ class DataHandlerMixedSamplingWithFirFilterSingleStation(DataHandlerMixedSamplin
             raise ValueError(f"Unknown sampling rate {sampling}. Only daily and hourly resolution is supported.")
 
 
-class DataHandlerMixedSamplingWithFirFilter(DefaultDataHandler):
+class DataHandlerMixedSamplingWithFirFilter(DataHandlerFirFilter):
     """Data handler using mixed sampling for input and target. Inputs are temporal filtered."""
 
     data_handler = DataHandlerMixedSamplingWithFirFilterSingleStation
@@ -236,7 +239,8 @@ class DataHandlerMixedSamplingWithClimateFirFilterSingleStation(DataHandlerMixed
         super().__init__(*args, **kwargs)
 
     def _extract_lazy(self, lazy_data):
-        _data, _meta, _input_data, _target_data, self.climate_filter_coeff, self.apriori, self.all_apriori = lazy_data
+        _data, _meta, _input_data, _target_data, self.climate_filter_coeff, self.apriori, self.all_apriori, \
+        self.filter_dim_order = lazy_data
         DataHandlerMixedSamplingWithFilterSingleStation._extract_lazy(self, (_data, _meta, _input_data, _target_data))
 
     @staticmethod
@@ -251,7 +255,7 @@ class DataHandlerMixedSamplingWithClimateFirFilterSingleStation(DataHandlerMixed
             raise ValueError(f"Unknown sampling rate {sampling}. Only daily and hourly resolution is supported.")
 
 
-class DataHandlerMixedSamplingWithClimateFirFilter(DefaultDataHandler):
+class DataHandlerMixedSamplingWithClimateFirFilter(DataHandlerClimateFirFilter):
     """Data handler using mixed sampling for input and target. Inputs are temporal filtered."""
 
     data_handler = DataHandlerMixedSamplingWithClimateFirFilterSingleStation
diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index fa94b88c..5da1b893 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -47,6 +47,7 @@ class DataHandlerFilterSingleStation(DataHandlerSingleStation):
     def __init__(self, *args, filter_dim=DEFAULT_FILTER_DIM, **kwargs):
         # self.original_data = None  # ToDo: implement here something to store unfiltered data
         self.filter_dim = filter_dim
+        self.filter_dim_order = None
         super().__init__(*args, **kwargs)
 
     def setup_transformation(self, transformation: Union[None, dict, Tuple]) -> Tuple[Optional[dict], Optional[dict]]:
@@ -104,6 +105,38 @@ class DataHandlerFilterSingleStation(DataHandlerSingleStation):
         self._data, self.input_data, self.target_data = list(map(f_prep, [_data, _input_data, _target_data]))
 
 
+class DataHandlerFilter(DefaultDataHandler):
+    """Data handler using FIR filtered data."""
+
+    data_handler = DataHandlerFilterSingleStation
+    data_handler_transformation = DataHandlerFilterSingleStation
+    _requirements = data_handler.requirements()
+
+    def __init__(self, *args, use_filter_branches=False, **kwargs):
+        self.use_filter_branches = use_filter_branches
+        super().__init__(*args, **kwargs)
+
+    @classmethod
+    def own_args(cls, *args):
+        """Return all arguments (including kwonlyargs)."""
+        super_own_args = DefaultDataHandler.own_args(*args)
+        arg_spec = inspect.getfullargspec(cls)
+        list_of_args = arg_spec.args + arg_spec.kwonlyargs + super_own_args
+        return remove_items(list_of_args, ["self"] + list(args))
+
+    def get_X_original(self):
+        if self.use_filter_branches is True:
+            X = []
+            for data in self._collection:
+                X_total = data.get_X()
+                filter_dim = data.filter_dim
+                for filter_name in data.filter_dim_order:
+                    X.append(X_total.sel({filter_dim: filter_name}, drop=True))
+            return X
+        else:
+            return super().get_X_original()
+
+
 class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
     """Data handler for a single station to be used by a superior data handler. Inputs are FIR filtered."""
 
@@ -203,17 +236,18 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
                 band_num += 1
         if self._add_unfiltered:
             index.append("unfiltered")
+        self.filter_dim_order = index
         return pd.Index(index, name=self.filter_dim)
 
     def _create_lazy_data(self):
-        return [self._data, self.meta, self.input_data, self.target_data, self.fir_coeff]
+        return [self._data, self.meta, self.input_data, self.target_data, self.fir_coeff, self.filter_dim_order]
 
     def _extract_lazy(self, lazy_data):
-        _data, _meta, _input_data, _target_data, self.fir_coeff = lazy_data
+        _data, _meta, _input_data, _target_data, self.fir_coeff, self.filter_dim_order = lazy_data
         super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data))
 
 
-class DataHandlerFirFilter(DefaultDataHandler):
+class DataHandlerFirFilter(DataHandlerFilter):
     """Data handler using FIR filtered data."""
 
     data_handler = DataHandlerFirFilterSingleStation
@@ -262,17 +296,20 @@ class DataHandlerKzFilterSingleStation(DataHandlerFilterSingleStation):
         f = lambda x: int(np.round(x)) if x >= 10 else np.round(x, 1)
         index = list(map(f, index.tolist()))
         index = list(map(lambda x: str(x) + "d", index)) + ["res"]
+        self.filter_dim_order = index
         return pd.Index(index, name=self.filter_dim)
 
     def _create_lazy_data(self):
-        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days]
+        return [self._data, self.meta, self.input_data, self.target_data, self.cutoff_period, self.cutoff_period_days,
+                self.filter_dim_order]
 
     def _extract_lazy(self, lazy_data):
-        _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
+        _data, _meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days, \
+        self.filter_dim_order = lazy_data
         super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data))
 
 
-class DataHandlerKzFilter(DefaultDataHandler):
+class DataHandlerKzFilter(DataHandlerFilter):
     """Data handler using kz filtered data."""
 
     data_handler = DataHandlerKzFilterSingleStation
@@ -372,14 +409,16 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         index = list(map(lambda x: str(x) + "d", index)) + ["res"]
         if self._add_unfiltered:
             index.append("unfiltered")
+        self.filter_dim_order = index
         return pd.Index(index, name=self.filter_dim)
 
     def _create_lazy_data(self):
         return [self._data, self.meta, self.input_data, self.target_data, self.climate_filter_coeff,
-                self.apriori, self.all_apriori]
+                self.apriori, self.all_apriori, self.filter_dim_order]
 
     def _extract_lazy(self, lazy_data):
-        _data, _meta, _input_data, _target_data, self.climate_filter_coeff, self.apriori, self.all_apriori = lazy_data
+        _data, _meta, _input_data, _target_data, self.climate_filter_coeff, self.apriori, self.all_apriori, \
+        self.filter_dim_order = lazy_data
         DataHandlerSingleStation._extract_lazy(self, (_data, _meta, _input_data, _target_data))
 
     @staticmethod
@@ -440,10 +479,19 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         self._transformation = (opts_input, opts_target)
 
 
-class DataHandlerClimateFirFilter(DefaultDataHandler):
+class DataHandlerClimateFirFilter(DataHandlerFilter):
     """Data handler using climatic adjusted FIR filtered data."""
 
     data_handler = DataHandlerClimateFirFilterSingleStation
     data_handler_transformation = DataHandlerClimateFirFilterSingleStation
     _requirements = data_handler.requirements()
     _store_attributes = data_handler.store_attributes()
+
+    # def get_X_original(self):
+    #     X = []
+    #     for data in self._collection:
+    #         X_total = data.get_X()
+    #         filter_dim = data.filter_dim
+    #         for filter in data.filter_dim_order:
+    #             X.append(X_total.sel({filter_dim: filter}, drop=True))
+    #     return X
-- 
GitLab


From e3de37e423c5d6e5a106181c2d3ab235fe476b19 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 28 May 2021 10:07:57 +0200
Subject: [PATCH 145/175] new FCN class using branched inputs (can be combined
 with branched filter data handler)

---
 .../model_modules/fully_connected_networks.py | 192 +++++++++++++++++-
 mlair/model_modules/loss.py                   |   4 +-
 2 files changed, 192 insertions(+), 4 deletions(-)

diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index ff06f075..21455383 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -5,7 +5,7 @@ from functools import reduce, partial
 
 from mlair.model_modules import AbstractModelClass
 from mlair.helpers import select_from_dict
-from mlair.model_modules.loss import var_loss, custom_loss
+from mlair.model_modules.loss import var_loss, custom_loss, l_p_loss
 
 import keras
 
@@ -79,7 +79,7 @@ class FCN(AbstractModelClass):
         # apply to model
         self.set_model()
         self.set_compile_options()
-        self.set_custom_objects(loss=self.compile_options["loss"][0], var_loss=var_loss)
+        self.set_custom_objects(loss=self.compile_options["loss"][0], var_loss=var_loss, l_p_loss=l_p_loss(.5))
 
     def _set_activation(self, activation):
         try:
@@ -190,3 +190,191 @@ class FCN_64_32_16(FCN):
     def _update_model_name(self):
         self.model_name = "FCN"
         super()._update_model_name()
+
+
+class BranchedInputFCN(AbstractModelClass):
+    """
+    A customisable fully connected network (64, 32, 16, window_lead_time), where the last layer is the output layer depending
+    on the window_lead_time parameter.
+    """
+
+    _activation = {"relu": keras.layers.ReLU, "tanh": partial(keras.layers.Activation, "tanh"),
+                   "sigmoid": partial(keras.layers.Activation, "sigmoid"),
+                   "linear": partial(keras.layers.Activation, "linear"),
+                   "selu": partial(keras.layers.Activation, "selu"),
+                   "prelu": partial(keras.layers.PReLU, alpha_initializer=keras.initializers.constant(value=0.25)),
+                   "leakyrelu": partial(keras.layers.LeakyReLU)}
+    _initializer = {"tanh": "glorot_uniform", "sigmoid": "glorot_uniform", "linear": "glorot_uniform",
+                    "relu": keras.initializers.he_normal(), "selu": keras.initializers.lecun_normal(),
+                    "prelu": keras.initializers.he_normal()}
+    _optimizer = {"adam": keras.optimizers.adam, "sgd": keras.optimizers.SGD}
+    _regularizer = {"l1": keras.regularizers.l1, "l2": keras.regularizers.l2, "l1_l2": keras.regularizers.l1_l2}
+    _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov", "l1", "l2"]
+    _dropout = {"selu": keras.layers.AlphaDropout}
+
+    def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
+                 optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None,
+                 batch_normalization=False, **kwargs):
+        """
+        Sets model and loss depending on the given arguments.
+
+        :param input_shape: list of input shapes (expect len=1 with shape=(window_hist, station, variables))
+        :param output_shape: list of output shapes (expect len=1 with shape=(window_forecast))
+
+        Customize this FCN model via the following parameters:
+
+        :param activation: set your desired activation function. Chose from relu, tanh, sigmoid, linear, selu, prelu,
+            leakyrelu. (Default relu)
+        :param activation_output: same as activation parameter but exclusively applied on output layer only. (Default
+            linear)
+        :param optimizer: set optimizer method. Can be either adam or sgd. (Default adam)
+        :param n_layer: define number of hidden layers in the network. Given number of hidden neurons are used in each
+            layer. (Default 1)
+        :param n_hidden: define number of hidden units per layer. This number is used in each hidden layer. (Default 10)
+        :param layer_configuration: alternative formulation of the network's architecture. This will overwrite the
+            settings from n_layer and n_hidden. Provide a list where each element represent the number of units in the
+            hidden layer. The number of hidden layers is equal to the total length of this list.
+        :param dropout: use dropout with given rate. If no value is provided, dropout layers are not added to the
+            network at all. (Default None)
+        :param batch_normalization: use batch normalization layer in the network if enabled. These layers are inserted
+            between the linear part of a layer (the nn part) and the non-linear part (activation function). No BN layer
+            is added if set to false. (Default false)
+        """
+
+        super().__init__(input_shape, output_shape[0])
+
+        # settings
+        self.activation = self._set_activation(activation)
+        self.activation_name = activation
+        self.activation_output = self._set_activation(activation_output)
+        self.activation_output_name = activation_output
+        self.optimizer = self._set_optimizer(optimizer, **kwargs)
+        self.bn = batch_normalization
+        self.layer_configuration = (n_layer, n_hidden) if layer_configuration is None else layer_configuration
+        self._update_model_name()
+        self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
+        self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
+        self.dropout, self.dropout_rate = self._set_dropout(activation, dropout)
+
+        # apply to model
+        self.set_model()
+        self.set_compile_options()
+        self.set_custom_objects(loss=self.compile_options["loss"][0], var_loss=var_loss)
+
+    def _set_activation(self, activation):
+        try:
+            return self._activation.get(activation.lower())
+        except KeyError:
+            raise AttributeError(f"Given activation {activation} is not supported in this model class.")
+
+    def _set_optimizer(self, optimizer, **kwargs):
+        try:
+            opt_name = optimizer.lower()
+            opt = self._optimizer.get(opt_name)
+            opt_kwargs = {}
+            if opt_name == "adam":
+                opt_kwargs = select_from_dict(kwargs, ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad"])
+            elif opt_name == "sgd":
+                opt_kwargs = select_from_dict(kwargs, ["lr", "momentum", "decay", "nesterov"])
+            return opt(**opt_kwargs)
+        except KeyError:
+            raise AttributeError(f"Given optimizer {optimizer} is not supported in this model class.")
+
+    def _set_regularizer(self, regularizer, **kwargs):
+        if regularizer is None or (isinstance(regularizer, str) and regularizer.lower() == "none"):
+            return None
+        try:
+            reg_name = regularizer.lower()
+            reg = self._regularizer.get(reg_name)
+            reg_kwargs = {}
+            if reg_name in ["l1", "l2"]:
+                reg_kwargs = select_from_dict(kwargs, reg_name, remove_none=True)
+                if reg_name in reg_kwargs:
+                    reg_kwargs["l"] = reg_kwargs.pop(reg_name)
+            elif reg_name == "l1_l2":
+                reg_kwargs = select_from_dict(kwargs, ["l1", "l2"], remove_none=True)
+            return reg(**reg_kwargs)
+        except KeyError:
+            raise AttributeError(f"Given regularizer {regularizer} is not supported in this model class.")
+
+    def _set_dropout(self, activation, dropout_rate):
+        if dropout_rate is None:
+            return None, None
+        assert 0 <= dropout_rate < 1
+        return self._dropout.get(activation, keras.layers.Dropout), dropout_rate
+
+    def _update_model_name(self):
+        n_input = f"{len(self._input_shape)}x{str(reduce(lambda x, y: x * y, self._input_shape[0]))}"
+        n_output = str(self._output_shape)
+
+        if isinstance(self.layer_configuration, tuple) and len(self.layer_configuration) == 2:
+            n_layer, n_hidden = self.layer_configuration
+            branch = [f"{n_hidden}" for _ in range(n_layer)]
+        else:
+            branch = [f"{n}" for n in self.layer_configuration]
+
+        concat = []
+        n_neurons_concat = int(branch[-1]) * len(self._input_shape)
+        for exp in reversed(range(2, len(self._input_shape) + 1)):
+            n_neurons = self._output_shape ** exp
+            if n_neurons < n_neurons_concat:
+                if len(concat) == 0:
+                    concat.append(f"1x{n_neurons}")
+                else:
+                    concat.append(str(n_neurons))
+        self.model_name += "_".join(["", n_input, *branch, *concat, n_output])
+
+    def set_model(self):
+        """
+        Build the model.
+        """
+
+        if isinstance(self.layer_configuration, tuple) is True:
+            n_layer, n_hidden = self.layer_configuration
+            conf = [n_hidden for _ in range(n_layer)]
+        else:
+            assert isinstance(self.layer_configuration, list) is True
+            conf = self.layer_configuration
+
+        x_input = []
+        x_in = []
+
+        for branch in range(len(self._input_shape)):
+            x_input_b = keras.layers.Input(shape=self._input_shape[branch])
+            x_input.append(x_input_b)
+            x_in_b = keras.layers.Flatten()(x_input_b)
+
+            for layer, n_hidden in enumerate(conf):
+                x_in_b = keras.layers.Dense(n_hidden, kernel_initializer=self.kernel_initializer,
+                                            kernel_regularizer=self.kernel_regularizer,
+                                            name=f"Dense_branch{branch + 1}_{layer + 1}")(x_in_b)
+                if self.bn is True:
+                    x_in_b = keras.layers.BatchNormalization()(x_in_b)
+                x_in_b = self.activation(name=f"{self.activation_name}_branch{branch + 1}_{layer + 1}")(x_in_b)
+                if self.dropout is not None:
+                    x_in_b = self.dropout(self.dropout_rate)(x_in_b)
+            x_in.append(x_in_b)
+        x_concat = keras.layers.Concatenate()(x_in)
+
+        n_neurons_concat = int(conf[-1]) * len(self._input_shape)
+        layer_concat = 0
+        for exp in reversed(range(2, len(self._input_shape) + 1)):
+            n_neurons = self._output_shape ** exp
+            if n_neurons < n_neurons_concat:
+                layer_concat += 1
+                x_concat = keras.layers.Dense(n_neurons, name=f"Dense_{layer_concat}")(x_concat)
+                if self.bn is True:
+                    x_concat = keras.layers.BatchNormalization()(x_concat)
+                x_concat = self.activation(name=f"{self.activation_name}_{layer_concat}")(x_concat)
+                if self.dropout is not None:
+                    x_concat = self.dropout(self.dropout_rate)(x_concat)
+        x_concat = keras.layers.Dense(self._output_shape)(x_concat)
+        out = self.activation_output(name=f"{self.activation_output_name}_output")(x_concat)
+        self.model = keras.Model(inputs=x_input, outputs=[out])
+        print(self.model.summary())
+
+    def set_compile_options(self):
+        # self.compile_options = {"loss": [keras.losses.mean_squared_error],
+        #                         "metrics": ["mse", "mae", var_loss]}
+        self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss], loss_weights=[2, 1])],
+                                "metrics": ["mse", "mae", var_loss]}
diff --git a/mlair/model_modules/loss.py b/mlair/model_modules/loss.py
index ba871e98..2034c5a7 100644
--- a/mlair/model_modules/loss.py
+++ b/mlair/model_modules/loss.py
@@ -16,10 +16,10 @@ def l_p_loss(power: int) -> Callable:
     :return: loss for given power
     """
 
-    def loss(y_true, y_pred):
+    def l_p_loss(y_true, y_pred):
         return K.mean(K.pow(K.abs(y_pred - y_true), power), axis=-1)
 
-    return loss
+    return l_p_loss
 
 
 def var_loss(y_true, y_pred) -> Callable:
-- 
GitLab


From 92763f13dd1bc4772f198c240364ec8ed204b9f5 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 28 May 2021 12:35:03 +0200
Subject: [PATCH 146/175] added param max_number_multiprocessing to set maximum
 number of processes to use, /close #308 on test success

---
 mlair/configuration/defaults.py            | 1 +
 mlair/data_handler/default_data_handler.py | 8 ++++++--
 mlair/run_modules/experiment_setup.py      | 7 +++++--
 mlair/run_modules/pre_processing.py        | 7 ++++---
 4 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py
index 785aab88..bfbef521 100644
--- a/mlair/configuration/defaults.py
+++ b/mlair/configuration/defaults.py
@@ -55,6 +55,7 @@ DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA
                        "pm10": "", "so2": ""}
 DEFAULT_USE_MULTIPROCESSING = True
 DEFAULT_USE_MULTIPROCESSING_ON_DEBUG = False
+DEFAULT_MAX_NUMBER_MULTIPROCESSING = 16
 
 
 def get_defaults():
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index f70f454b..a17de954 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -37,11 +37,12 @@ class DefaultDataHandler(AbstractDataHandler):
 
     DEFAULT_ITER_DIM = "Stations"
     DEFAULT_TIME_DIM = "datetime"
+    MAX_NUMBER_MULTIPROCESSING = 16
 
     def __init__(self, id_class: data_handler, experiment_path: str, min_length: int = 0,
                  extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None,
                  store_processed_data=True, iter_dim=DEFAULT_ITER_DIM, time_dim=DEFAULT_TIME_DIM,
-                 use_multiprocessing=True):
+                 use_multiprocessing=True, max_number_multiprocessing=MAX_NUMBER_MULTIPROCESSING):
         super().__init__()
         self.id_class = id_class
         self.time_dim = time_dim
@@ -52,6 +53,7 @@ class DefaultDataHandler(AbstractDataHandler):
         self._X_extreme = None
         self._Y_extreme = None
         self._use_multiprocessing = use_multiprocessing
+        self._max_number_multiprocessing = max_number_multiprocessing
         _name_affix = str(f"{str(self.id_class)}_{name_affix}" if name_affix is not None else id(self))
         self._save_file = os.path.join(experiment_path, "data", f"{_name_affix}.pickle")
         self._collection = self._create_collection()
@@ -301,7 +303,9 @@ class DefaultDataHandler(AbstractDataHandler):
                         if "feature_range" in opts.keys():
                             transformation_dict[i][var]["feature_range"] = opts.get("feature_range", None)
 
-        if multiprocessing.cpu_count() > 1 and kwargs.get("use_multiprocessing", True) is True:  # parallel solution
+        max_process = kwargs.get("max_number_multiprocessing", 16)
+        n_process = min([psutil.cpu_count(logical=False), len(set_stations), max_process])  # use only physical cpus
+        if n_process > 1 and kwargs.get("use_multiprocessing", True) is True:  # parallel solution
             logging.info("use parallel transformation approach")
             pool = multiprocessing.Pool(
                 min([psutil.cpu_count(logical=False), len(set_stations), 16]))  # use only physical cpus
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index 24fedaa8..bd06914f 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -19,7 +19,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
     DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
     DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
     DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \
-    DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG
+    DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG, DEFAULT_MAX_NUMBER_MULTIPROCESSING
 from mlair.data_handler import DefaultDataHandler
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel
@@ -215,7 +215,8 @@ class ExperimentSetup(RunEnvironment):
                  create_new_bootstraps=None, data_path: str = None, batch_path: str = None, login_nodes=None,
                  hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None,
                  data_origin: Dict = None, competitors: list = None, competitor_path: str = None,
-                 use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None, **kwargs):
+                 use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None,
+                 max_number_multiprocessing: int = None, **kwargs):
 
         # create run framework
         super().__init__()
@@ -260,6 +261,8 @@ class ExperimentSetup(RunEnvironment):
                             default=DEFAULT_USE_MULTIPROCESSING_ON_DEBUG)
         else:
             self._set_param("use_multiprocessing", use_multiprocessing, default=DEFAULT_USE_MULTIPROCESSING)
+        self._set_param("max_number_multiprocessing", max_number_multiprocessing,
+                        default=DEFAULT_MAX_NUMBER_MULTIPROCESSING)
 
         # batch path (temporary)
         self._set_param("batch_path", batch_path, default=os.path.join(experiment_path, "batch_data"))
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index db9d1d5e..11d73276 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -243,10 +243,11 @@ class PreProcessing(RunEnvironment):
         kwargs = self.data_store.create_args_dict(data_handler.requirements(), scope=set_name)
         use_multiprocessing = self.data_store.get("use_multiprocessing")
 
-        if multiprocessing.cpu_count() > 1 and use_multiprocessing:  # parallel solution
+        max_process = self.data_store.get("max_number_multiprocessing")
+        n_process = min([psutil.cpu_count(logical=False), len(set_stations), max_process])  # use only physical cpus
+        if n_process > 1 and use_multiprocessing:  # parallel solution
             logging.info("use parallel validate station approach")
-            pool = multiprocessing.Pool(
-                min([psutil.cpu_count(logical=False), len(set_stations), 16]))  # use only physical cpus
+            pool = multiprocessing.Pool(n_process)
             logging.info(f"running {getattr(pool, '_processes')} processes in parallel")
             output = [
                 pool.apply_async(f_proc, args=(data_handler, station, set_name, store_processed_data), kwds=kwargs)
-- 
GitLab


From c846a7fe350da0a71cb8aa91ec60bc2f77615cc9 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 28 May 2021 12:59:13 +0200
Subject: [PATCH 147/175] adjusted preprocessing tests

---
 conftest.py                                  | 2 +-
 mlair/run_modules/pre_processing.py          | 2 +-
 test/test_run_modules/test_pre_processing.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/conftest.py b/conftest.py
index abb0c0f5..b63d3efb 100644
--- a/conftest.py
+++ b/conftest.py
@@ -66,5 +66,5 @@ def default_session_fixture(request):
 
     # request.addfinalizer(unpatch)
 
-    with mock.patch("multiprocessing.cpu_count", return_value=1):
+    with mock.patch("psutil.cpu_count", return_value=1):
         yield
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 11d73276..c7d7f920 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -245,7 +245,7 @@ class PreProcessing(RunEnvironment):
 
         max_process = self.data_store.get("max_number_multiprocessing")
         n_process = min([psutil.cpu_count(logical=False), len(set_stations), max_process])  # use only physical cpus
-        if n_process > 1 and use_multiprocessing:  # parallel solution
+        if n_process > 1 and use_multiprocessing is True:  # parallel solution
             logging.info("use parallel validate station approach")
             pool = multiprocessing.Pool(n_process)
             logging.info(f"running {getattr(pool, '_processes')} processes in parallel")
diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py
index 5ae64bf3..0f2ee7a1 100644
--- a/test/test_run_modules/test_pre_processing.py
+++ b/test/test_run_modules/test_pre_processing.py
@@ -109,7 +109,7 @@ class TestPreProcessing:
         assert caplog.record_tuples[-1] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 6 '
                                                                     r'station\(s\). Found 5/6 valid stations.'))
 
-    @mock.patch("multiprocessing.cpu_count", return_value=3)
+    @mock.patch("psutil.cpu_count", return_value=3)
     @mock.patch("multiprocessing.Pool", return_value=multiprocessing.Pool(3))
     def test_validate_station_parallel(self, mock_pool, mock_cpu, caplog, obj_with_exp_setup):
         pre = obj_with_exp_setup
-- 
GitLab


From e4c9fdb58fe94529abe1332abdf19a35c7acaf1c Mon Sep 17 00:00:00 2001
From: "v.gramlich1" <v.gramlichfz-juelich.de>
Date: Mon, 28 Jun 2021 11:28:58 +0200
Subject: [PATCH 148/175] Implementation of IntelliO3_ts_architecture

---
 mlair/model_modules/model_class.py | 55 +++++++++++-------------------
 1 file changed, 20 insertions(+), 35 deletions(-)

diff --git a/mlair/model_modules/model_class.py b/mlair/model_modules/model_class.py
index f8e3a21a..9a0e97db 100644
--- a/mlair/model_modules/model_class.py
+++ b/mlair/model_modules/model_class.py
@@ -126,6 +126,7 @@ from mlair.model_modules import AbstractModelClass
 from mlair.model_modules.inception_model import InceptionModelBase
 from mlair.model_modules.flatten import flatten_tail
 from mlair.model_modules.advanced_paddings import PadUtils, Padding2D, SymmetricPadding2D
+from mlair.model_modules.loss import l_p_loss
 
 
 class MyLittleModelHourly(AbstractModelClass):
@@ -349,7 +350,7 @@ class MyTowerModel(AbstractModelClass):
         self.compile_options = {"loss": [keras.losses.mean_squared_error], "metrics": ["mse"]}
 
 
-class MyPaperModel(AbstractModelClass):
+class IntelliO3_ts_architecture(AbstractModelClass):
 
     def __init__(self, input_shape: list, output_shape: list):
         """
@@ -366,9 +367,9 @@ class MyPaperModel(AbstractModelClass):
         from mlair.model_modules.keras_extensions import LearningRateDecay
 
         # settings
-        self.dropout_rate = .3
-        self.regularizer = keras.regularizers.l2(0.001)
-        self.initial_lr = 1e-3
+        self.dropout_rate = .35
+        self.regularizer = keras.regularizers.l2(0.01)
+        self.initial_lr = 1e-4
         self.lr_decay = LearningRateDecay(base_lr=self.initial_lr, drop=.94, epochs_drop=10)
         self.activation = keras.layers.ELU
         self.padding = "SymPad2D"
@@ -398,35 +399,22 @@ class MyPaperModel(AbstractModelClass):
         conv_settings_dict1 = {
             'tower_1': {'reduction_filter': 8, 'tower_filter': 16 * 2, 'tower_kernel': (3, 1),
                         'activation': activation},
-            # 'tower_2': {'reduction_filter': 8, 'tower_filter': 16 * 2, 'tower_kernel': (5, 1),
-            #             'activation': activation},
-            # 'tower_3': {'reduction_filter': 8, 'tower_filter': 16 * 2, 'tower_kernel': (1, 1),
-            #             'activation': activation},
-            # 'tower_4':{'reduction_filter':8, 'tower_filter':8*2, 'tower_kernel':(7,1), 'activation':activation},
+            'tower_2': {'reduction_filter': 8, 'tower_filter': 16 * 2, 'tower_kernel': (5, 1),
+                        'activation': activation},
+            'tower_3': {'reduction_filter': 8, 'tower_filter': 16 * 2, 'tower_kernel': (1, 1),
+                        'activation': activation}
         }
         pool_settings_dict1 = {'pool_kernel': (3, 1), 'tower_filter': 16, 'activation': activation}
 
         conv_settings_dict2 = {
             'tower_1': {'reduction_filter': 64, 'tower_filter': 32 * 2, 'tower_kernel': (3, 1),
                         'activation': activation},
-            # 'tower_2': {'reduction_filter': 64, 'tower_filter': 32 * 2, 'tower_kernel': (5, 1),
-            #             'activation': activation},
-            # 'tower_3': {'reduction_filter': 64, 'tower_filter': 32 * 2, 'tower_kernel': (1, 1),
-            #             'activation': activation},
-            # 'tower_4':{'reduction_filter':8*2, 'tower_filter':16*2, 'tower_kernel':(7,1), 'activation':activation},
-        }
-        pool_settings_dict2 = {'pool_kernel': (3, 1), 'tower_filter': 32, 'activation': activation}
-
-        conv_settings_dict3 = {
-            'tower_1': {'reduction_filter': 64 * 2, 'tower_filter': 32 * 4, 'tower_kernel': (3, 1),
-                        'activation': activation},
-            'tower_2': {'reduction_filter': 64 * 2, 'tower_filter': 32 * 4, 'tower_kernel': (5, 1),
+            'tower_2': {'reduction_filter': 64, 'tower_filter': 32 * 2, 'tower_kernel': (5, 1),
                         'activation': activation},
-            'tower_3': {'reduction_filter': 64 * 2, 'tower_filter': 32 * 4, 'tower_kernel': (1, 1),
-                        'activation': activation},
-            # 'tower_4':{'reduction_filter':16*4, 'tower_filter':32, 'tower_kernel':(7,1), 'activation':activation},
+            'tower_3': {'reduction_filter': 64, 'tower_filter': 32 * 2, 'tower_kernel': (1, 1),
+                        'activation': activation}
         }
-        pool_settings_dict3 = {'pool_kernel': (3, 1), 'tower_filter': 32, 'activation': activation}
+        pool_settings_dict2 = {'pool_kernel': (3, 1), 'tower_filter': 32, 'activation': activation}
 
         ##########################################
         inception_model = InceptionModelBase()
@@ -445,10 +433,9 @@ class MyPaperModel(AbstractModelClass):
                                                regularizer=self.regularizer,
                                                batch_normalisation=True,
                                                padding=self.padding)
-        # out_minor1 = flatten_tail(X_in, 'minor_1', False, self.dropout_rate, self.window_lead_time,
-        #                           self.activation, 32, 64)
+
         out_minor1 = flatten_tail(X_in, inner_neurons=64, activation=activation, output_neurons=self._output_shape,
-                                  output_activation='linear', reduction_filter=32,
+                                  output_activation='linear', reduction_filter=32 * 2,
                                   name='minor_1', bound_weight=False, dropout_rate=self.dropout_rate,
                                   kernel_regularizer=self.regularizer
                                   )
@@ -459,10 +446,6 @@ class MyPaperModel(AbstractModelClass):
                                                regularizer=self.regularizer,
                                                batch_normalisation=True, padding=self.padding)
 
-        # X_in = keras.layers.Dropout(self.dropout_rate)(X_in)
-        #
-        # X_in = inception_model.inception_block(X_in, conv_settings_dict3, pool_settings_dict3, regularizer=self.regularizer,
-        #                                        batch_normalisation=True)
         #############################################
 
         out_main = flatten_tail(X_in, inner_neurons=64 * 2, activation=activation, output_neurons=self._output_shape,
@@ -474,6 +457,8 @@ class MyPaperModel(AbstractModelClass):
         self.model = keras.Model(inputs=X_input, outputs=[out_minor1, out_main])
 
     def set_compile_options(self):
-        self.optimizer = keras.optimizers.SGD(lr=self.initial_lr, momentum=0.9)
-        self.compile_options = {"loss": [keras.losses.mean_squared_error, keras.losses.mean_squared_error],
-                                "metrics": ['mse', 'mae']}
+        self.compile_options = {"optimizer": keras.optimizers.adam(lr=self.initial_lr, amsgrad=True),
+                                "loss": [l_p_loss(4), keras.losses.mean_squared_error],
+                                "metrics": ['mse'],
+                                "loss_weights": [.01, .99]
+                                }
\ No newline at end of file
-- 
GitLab


From 6ee128172f90501d378bd361259fdcd1c4c98018 Mon Sep 17 00:00:00 2001
From: "v.gramlich1" <v.gramlichfz-juelich.de>
Date: Mon, 28 Jun 2021 11:40:46 +0200
Subject: [PATCH 149/175] Updated test to test on IntelliO3_ts_architecture
 instead of the old MyPaperModel

---
 test/test_model_modules/test_model_class.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_model_modules/test_model_class.py b/test/test_model_modules/test_model_class.py
index cbff4cec..b05fd990 100644
--- a/test/test_model_modules/test_model_class.py
+++ b/test/test_model_modules/test_model_class.py
@@ -1,14 +1,14 @@
 import keras
 import pytest
 
-from mlair.model_modules.model_class import MyPaperModel
+from mlair.model_modules.model_class import IntelliO3_ts_architecture
 
 
-class TestMyPaperModel:
+class TestIntelliO3_ts_architecture:
 
     @pytest.fixture
     def mpm(self):
-        return MyPaperModel(input_shape=[(7, 1, 9)], output_shape=[(4,)])
+        return IntelliO3_ts_architecture(input_shape=[(7, 1, 9)], output_shape=[(4,)])
 
     def test_init(self, mpm):
         # check if loss number of loss functions fit to model outputs
-- 
GitLab


From 8324b33807d7122f4dcfa08974ea78b3f90a2b3d Mon Sep 17 00:00:00 2001
From: lukas leufen <l.leufen@fz-juelich.de>
Date: Thu, 1 Jul 2021 09:16:28 +0000
Subject: [PATCH 150/175] added bugfix, /close #311 on pipeline success

---
 mlair/data_handler/default_data_handler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 11461ad7..181e6604 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -79,7 +79,7 @@ class DefaultDataHandler(AbstractDataHandler):
     def _cleanup(self):
         directory = os.path.dirname(self._save_file)
         if os.path.exists(directory) is False:
-            os.makedirs(directory)
+            os.makedirs(directory, exist_ok=True)
         if os.path.exists(self._save_file):
             shutil.rmtree(self._save_file, ignore_errors=True)
 
-- 
GitLab


From 658f4ddfeb0b3e7e237341bb1fe6d6b55172a833 Mon Sep 17 00:00:00 2001
From: Felix Kleinert <f.kleinert@fz-juelich.de>
Date: Mon, 5 Jul 2021 07:53:20 +0200
Subject: [PATCH 151/175] first draft of callback

---
 mlair/model_modules/keras_extensions.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/mlair/model_modules/keras_extensions.py b/mlair/model_modules/keras_extensions.py
index 33358e56..bc868765 100644
--- a/mlair/model_modules/keras_extensions.py
+++ b/mlair/model_modules/keras_extensions.py
@@ -8,6 +8,7 @@ import math
 import pickle
 from typing import Union, List
 from typing_extensions import TypedDict
+from time import time
 
 import numpy as np
 from keras import backend as K
@@ -111,6 +112,19 @@ class LearningRateDecay(History):
         return K.get_value(self.model.optimizer.lr)
 
 
+class TimingCallback(Callback):
+    def __init__(self):
+        self.logs = []
+        self.starttime = None
+        super().__init__()
+
+    def on_epoch_begin(self, logs={}):
+        self.starttime = time()
+
+    def on_epoch_end(self, logs={}):
+        self.logs.append(time()-self.starttime)
+
+
 class ModelCheckpointAdvanced(ModelCheckpoint):
     """
     Enhance the standard ModelCheckpoint class by additional saves of given callbacks.
-- 
GitLab


From 160bf15f11ba4e711ca394d79b4d5dab786f9957 Mon Sep 17 00:00:00 2001
From: Felix Kleinert <f.kleinert@fz-juelich.de>
Date: Mon, 5 Jul 2021 08:49:59 +0200
Subject: [PATCH 152/175] update timing callback

---
 mlair/model_modules/keras_extensions.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/mlair/model_modules/keras_extensions.py b/mlair/model_modules/keras_extensions.py
index bc868765..e0f54282 100644
--- a/mlair/model_modules/keras_extensions.py
+++ b/mlair/model_modules/keras_extensions.py
@@ -112,17 +112,18 @@ class LearningRateDecay(History):
         return K.get_value(self.model.optimizer.lr)
 
 
-class TimingCallback(Callback):
+class EpoTimingCallback(Callback):
     def __init__(self):
+        self.epo_timing = {'epo_timing': []}
         self.logs = []
         self.starttime = None
         super().__init__()
 
-    def on_epoch_begin(self, logs={}):
+    def on_epoch_begin(self, epoch: int, logs=None):
         self.starttime = time()
 
-    def on_epoch_end(self, logs={}):
-        self.logs.append(time()-self.starttime)
+    def on_epoch_end(self, epoch: int, logs=None):
+        self.epo_timing["epo_timing"].append(time()-self.starttime)
 
 
 class ModelCheckpointAdvanced(ModelCheckpoint):
-- 
GitLab


From 72c1d3d240b04280854b0dcfe1a3be1ff76cf937 Mon Sep 17 00:00:00 2001
From: Felix Kleinert <f.kleinert@fz-juelich.de>
Date: Mon, 5 Jul 2021 09:44:41 +0200
Subject: [PATCH 153/175] include epotimingcallback into workflow

---
 mlair/run_modules/model_setup.py          |  5 ++++-
 mlair/run_modules/training.py             | 11 ++++++++--
 test/test_run_modules/test_model_setup.py |  4 ++--
 test/test_run_modules/test_training.py    | 26 +++++++++++++++--------
 4 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/mlair/run_modules/model_setup.py b/mlair/run_modules/model_setup.py
index 8fae430f..83f4a2bd 100644
--- a/mlair/run_modules/model_setup.py
+++ b/mlair/run_modules/model_setup.py
@@ -12,7 +12,7 @@ import keras
 import pandas as pd
 import tensorflow as tf
 
-from mlair.model_modules.keras_extensions import HistoryAdvanced, CallbackHandler
+from mlair.model_modules.keras_extensions import HistoryAdvanced, EpoTimingCallback, CallbackHandler
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.configuration import path_config
 
@@ -119,11 +119,14 @@ class ModelSetup(RunEnvironment):
         """
         lr = self.data_store.get_default("lr_decay", scope=self.scope, default=None)
         hist = HistoryAdvanced()
+        epo_timing = EpoTimingCallback()
         self.data_store.set("hist", hist, scope="model")
+        self.data_store.set("epo_timing", epo_timing, scope="model")
         callbacks = CallbackHandler()
         if lr is not None:
             callbacks.add_callback(lr, self.callbacks_name % "lr", "lr")
         callbacks.add_callback(hist, self.callbacks_name % "hist", "hist")
+        callbacks.add_callback(epo_timing, self.callbacks_name % "epo_timing", "epo_timing")
         callbacks.create_model_checkpoint(filepath=self.checkpoint_name, verbose=1, monitor='val_loss',
                                           save_best_only=True, mode='auto')
         self.data_store.set("callbacks", callbacks, self.scope)
diff --git a/mlair/run_modules/training.py b/mlair/run_modules/training.py
index 5f895b77..00e8eae1 100644
--- a/mlair/run_modules/training.py
+++ b/mlair/run_modules/training.py
@@ -166,7 +166,11 @@ class Training(RunEnvironment):
             lr = self.callbacks.get_callback_by_name("lr")
         except IndexError:
             lr = None
-        self.save_callbacks_as_json(history, lr)
+        try:
+            epo_timing = self.callbacks.get_callback_by_name("epo_timing")
+        except IndexError:
+            epo_timing = None
+        self.save_callbacks_as_json(history, lr, epo_timing)
         self.load_best_model(checkpoint.filepath)
         self.create_monitoring_plots(history, lr)
 
@@ -190,7 +194,7 @@ class Training(RunEnvironment):
         except OSError:
             logging.info('no weights to reload...')
 
-    def save_callbacks_as_json(self, history: Callback, lr_sc: Callback) -> None:
+    def save_callbacks_as_json(self, history: Callback, lr_sc: Callback, epo_timing: Callback) -> None:
         """
         Save callbacks (history, learning rate) of training.
 
@@ -207,6 +211,9 @@ class Training(RunEnvironment):
         if lr_sc:
             with open(os.path.join(path, "history_lr.json"), "w") as f:
                 json.dump(lr_sc.lr, f)
+        if epo_timing is not None:
+            with open(os.path.join(path, "epo_timing.json"), "w") as f:
+                json.dump(epo_timing.epo_timing, f)
 
     def create_monitoring_plots(self, history: Callback, lr_sc: Callback) -> None:
         """
diff --git a/test/test_run_modules/test_model_setup.py b/test/test_run_modules/test_model_setup.py
index 8a757214..7cefd0e5 100644
--- a/test/test_run_modules/test_model_setup.py
+++ b/test/test_run_modules/test_model_setup.py
@@ -80,7 +80,7 @@ class TestModelSetup:
         setup._set_callbacks()
         assert "general.model" in setup.data_store.search_name("callbacks")
         callbacks = setup.data_store.get("callbacks", "general.model")
-        assert len(callbacks.get_callbacks()) == 3
+        assert len(callbacks.get_callbacks()) == 4
 
     def test_set_callbacks_no_lr_decay(self, setup):
         setup.data_store.set("lr_decay", None, "general.model")
@@ -88,7 +88,7 @@ class TestModelSetup:
         setup.checkpoint_name = "TestName"
         setup._set_callbacks()
         callbacks: CallbackHandler = setup.data_store.get("callbacks", "general.model")
-        assert len(callbacks.get_callbacks()) == 2
+        assert len(callbacks.get_callbacks()) == 3
         with pytest.raises(IndexError):
             callbacks.get_callback_by_name("lr_decay")
 
diff --git a/test/test_run_modules/test_training.py b/test/test_run_modules/test_training.py
index c2b58cbd..ed0d8264 100644
--- a/test/test_run_modules/test_training.py
+++ b/test/test_run_modules/test_training.py
@@ -13,7 +13,7 @@ from mlair.data_handler import DataCollection, KerasIterator, DefaultDataHandler
 from mlair.helpers import PyTestRegex
 from mlair.model_modules.flatten import flatten_tail
 from mlair.model_modules.inception_model import InceptionModelBase
-from mlair.model_modules.keras_extensions import LearningRateDecay, HistoryAdvanced, CallbackHandler
+from mlair.model_modules.keras_extensions import LearningRateDecay, HistoryAdvanced, CallbackHandler, EpoTimingCallback
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.run_modules.training import Training
 
@@ -100,6 +100,12 @@ class TestTraining:
         h.model = mock.MagicMock()
         return h
 
+    @pytest.fixture
+    def epo_timing(self):
+        epo_timing = EpoTimingCallback()
+        epo_timing.epoch = [0, 1]
+        epo_timing.epo_timing = {"epo_timing": [0.1, 0.2]}
+
     @pytest.fixture
     def path(self):
         return os.path.join(os.path.dirname(__file__), "TestExperiment")
@@ -144,9 +150,11 @@ class TestTraining:
     def callbacks(self, path):
         clbk = CallbackHandler()
         hist = HistoryAdvanced()
+        epo_timing = EpoTimingCallback()
         clbk.add_callback(hist, os.path.join(path, "hist_checkpoint.pickle"), "hist")
         lr = LearningRateDecay()
         clbk.add_callback(lr, os.path.join(path, "lr_checkpoint.pickle"), "lr")
+        clbk.add_callback(epo_timing, os.path.join(path, "epo_timing.pickle"), "epo_timing")
         clbk.create_model_checkpoint(filepath=os.path.join(path, "model_checkpoint"), monitor='val_loss',
                                      save_best_only=True)
         return clbk, hist, lr
@@ -256,22 +264,22 @@ class TestTraining:
         assert caplog.record_tuples[0] == ("root", 10, PyTestRegex("load best model: notExisting"))
         assert caplog.record_tuples[1] == ("root", 20, PyTestRegex("no weights to reload..."))
 
-    def test_save_callbacks_history_created(self, init_without_run, history, learning_rate, model_path):
-        init_without_run.save_callbacks_as_json(history, learning_rate)
+    def test_save_callbacks_history_created(self, init_without_run, history, learning_rate, epo_timing, model_path):
+        init_without_run.save_callbacks_as_json(history, learning_rate, epo_timing)
         assert "history.json" in os.listdir(model_path)
 
-    def test_save_callbacks_lr_created(self, init_without_run, history, learning_rate, model_path):
-        init_without_run.save_callbacks_as_json(history, learning_rate)
+    def test_save_callbacks_lr_created(self, init_without_run, history, learning_rate, epo_timing, model_path):
+        init_without_run.save_callbacks_as_json(history, learning_rate, epo_timing)
         assert "history_lr.json" in os.listdir(model_path)
 
-    def test_save_callbacks_inspect_history(self, init_without_run, history, learning_rate, model_path):
-        init_without_run.save_callbacks_as_json(history, learning_rate)
+    def test_save_callbacks_inspect_history(self, init_without_run, history, learning_rate, epo_timing, model_path):
+        init_without_run.save_callbacks_as_json(history, learning_rate, epo_timing)
         with open(os.path.join(model_path, "history.json")) as jfile:
             hist = json.load(jfile)
             assert hist == history.history
 
-    def test_save_callbacks_inspect_lr(self, init_without_run, history, learning_rate, model_path):
-        init_without_run.save_callbacks_as_json(history, learning_rate)
+    def test_save_callbacks_inspect_lr(self, init_without_run, history, learning_rate, epo_timing, model_path):
+        init_without_run.save_callbacks_as_json(history, learning_rate, epo_timing)
         with open(os.path.join(model_path, "history_lr.json")) as jfile:
             lr = json.load(jfile)
             assert lr == learning_rate.lr
-- 
GitLab


From a2a6b331dd291927c41458f1c04299e74d1d08cc Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 7 Jul 2021 12:12:02 +0200
Subject: [PATCH 154/175] new plot class PlotClimateFirFilter

---
 mlair/plotting/data_insight_plotting.py | 216 ++++++++++++++++++++++++
 1 file changed, 216 insertions(+)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index f51d9c49..95b482df 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -3,6 +3,7 @@ __author__ = "Lukas Leufen, Felix Kleinert"
 __date__ = '2021-04-13'
 
 from typing import List, Dict
+import dill
 import os
 import logging
 import multiprocessing
@@ -862,3 +863,218 @@ def f_proc_hist(data, variables, n_bins, variables_dim):  # pragma: no cover
         res[var], bin_edges[var] = np.histogram(d.values, n_bins)
         interval_width[var] = bin_edges[var][1] - bin_edges[var][0]
     return res, interval_width, bin_edges
+
+
+class PlotClimateFirFilter(AbstractPlotClass):
+    """
+    Plot climate FIR filter components.
+
+    * Creates a separate folder climFIR inside the given plot directory.
+    * For each station up to 4 examples are shown (1 for each season).
+    * Each filtered component and its residuum is drawn in a separate plot.
+    * A filter component plot includes the climate FIR input, the filter response, the true non-causal (ideal) filter
+      input, and the corresponding ideal response (containing information about future)
+    * A filter residuum plot include the climate FIR residuum and the ideal filter residuum.
+    """
+
+    def __init__(self, plot_folder, plot_data, sampling, name):
+
+        from mlair.helpers.filter import fir_filter_convolve
+
+        # adjust default plot parameters
+        rc_params = {
+            'axes.labelsize': 'large',
+            'xtick.labelsize': 'large',
+            'ytick.labelsize': 'large',
+            'legend.fontsize': 'medium',
+            'axes.titlesize': 'large'}
+        if plot_folder is None:
+            return
+
+        self.style_dict = {
+            "original": {"color": "darkgrey", "linestyle": "dashed", "label": "original"},
+            "apriori": {"color": "darkgrey", "linestyle": "solid", "label": "estimated future"},
+            "clim": {"color": "black", "linestyle": "solid", "label": "clim filter", "linewidth": 2},
+            "ideal": {"color": "black", "linestyle": "dashed", "label": "ideal filter", "linewidth": 2},
+            "valid_area": {"color": "whitesmoke", "label": "valid area"},
+            "t0": {"color": "lightgrey", "lw": 6, "label": "$t_0$"}
+        }
+
+        plot_folder = os.path.join(os.path.abspath(plot_folder), "climFIR")
+        self.fir_filter_convolve = fir_filter_convolve
+        super().__init__(plot_folder, plot_name=None, rc_params=rc_params)
+        plot_dict, new_dim = self._prepare_data(plot_data)
+        self._name = name
+        self._plot(plot_dict, sampling, new_dim)
+        self._store_plot_data(plot_data)
+
+    def _prepare_data(self, data):
+        """Restructure plot data."""
+        plot_dict = {}
+        new_dim = None
+        for i, o in enumerate(range(len(data))):
+            plot_data = data[i]
+            for p_d in plot_data:
+                var = p_d.get("var")
+                t0 = p_d.get("t0")
+                filter_input = p_d.get("filter_input")
+                filter_input_nc = p_d.get("filter_input_nc")
+                valid_range = p_d.get("valid_range")
+                time_range = p_d.get("time_range")
+                if new_dim is None:
+                    new_dim = p_d.get("new_dim")
+                else:
+                    assert new_dim == p_d.get("new_dim")
+                h = p_d.get("h")
+                plot_dict_var = plot_dict.get(var, {})
+                plot_dict_t0 = plot_dict_var.get(t0, {})
+                plot_dict_order = {"filter_input": filter_input,
+                                   "filter_input_nc": filter_input_nc,
+                                   "valid_range": valid_range,
+                                   "time_range": time_range,
+                                   "order": len(h), "h": h}
+                plot_dict_t0[i] = plot_dict_order
+                plot_dict_var[t0] = plot_dict_t0
+                plot_dict[var] = plot_dict_var
+        return plot_dict, new_dim
+
+    def _plot(self, plot_dict, sampling, new_dim="window"):
+        td_type = {"1d": "D", "1H": "h"}.get(sampling)
+        for var, viz_date_dict in plot_dict.items():
+            for it0, t0 in enumerate(viz_date_dict.keys()):
+                viz_data = viz_date_dict[t0]
+                residuum_true = None
+                for ifilter in sorted(viz_data.keys()):
+                    data = viz_data[ifilter]
+                    filter_input = data["filter_input"]
+                    filter_input_nc = data["filter_input_nc"] if residuum_true is None else residuum_true.sel(
+                        {new_dim: filter_input.coords[new_dim]})
+                    valid_range = data["valid_range"]
+                    time_axis = data["time_range"]
+                    filter_order = data["order"]
+                    h = data["h"]
+                    fig, ax = plt.subplots()
+
+                    # plot backgrounds
+                    self._plot_valid_area(ax, t0, valid_range, td_type)
+                    self._plot_t0(ax, t0)
+
+                    # original data
+                    self._plot_original_data(ax, time_axis, filter_input_nc)
+
+                    # clim apriori
+                    self._plot_apriori(ax, time_axis, filter_input, new_dim, ifilter)
+
+                    # clim filter response
+                    residuum_estimated = self._plot_clim_filter(ax, time_axis, filter_input, new_dim, h,
+                                                                output_dtypes=filter_input.dtype)
+
+                    # ideal filter response
+                    residuum_true = self._plot_ideal_filter(ax, time_axis, filter_input_nc, new_dim, h,
+                                                            output_dtypes=filter_input.dtype)
+
+                    # set title, legend, and save plot
+                    xlims = self._set_xlim(ax, t0, filter_order, valid_range, td_type, time_axis)
+
+                    plt.title(f"Input of ClimFilter ({str(var)})")
+                    plt.legend()
+                    fig.autofmt_xdate()
+                    plt.tight_layout()
+                    self.plot_name = f"climFIR_{self._name}_{str(var)}_{it0}_{ifilter}"
+                    self._save()
+
+                    # plot residuum
+                    fig, ax = plt.subplots()
+                    self._plot_valid_area(ax, t0, valid_range, td_type)
+                    self._plot_t0(ax, t0)
+                    self._plot_series(ax, time_axis, residuum_true.values.flatten(), style="ideal")
+                    self._plot_series(ax, time_axis, residuum_estimated.values.flatten(), style="clim")
+                    ax.set_xlim(xlims)
+                    plt.title(f"Residuum of ClimFilter ({str(var)})")
+                    plt.legend(loc="upper left")
+                    fig.autofmt_xdate()
+                    plt.tight_layout()
+
+                    self.plot_name = f"climFIR_{self._name}_{str(var)}_{it0}_{ifilter}_residuum"
+                    self._save()
+
+    def _set_xlim(self, ax, t0, order, valid_range, td_type, time_axis):
+        """
+        Set xlims
+
+        Use order and valid_range to find a good zoom in that hides edges of filter values that are effected by reduced
+        filter order. Limits are returned to be usable for other plots.
+        """
+        t_minus_delta = max(1.5 * valid_range.start, 0.3 * order)
+        t_plus_delta = max(0.5 * valid_range.start, 0.3 * order)
+        t_minus = t0 + np.timedelta64(-int(t_minus_delta), td_type)
+        t_plus = t0 + np.timedelta64(int(t_plus_delta), td_type)
+        ax_start = max(t_minus, time_axis[0])
+        ax_end = min(t_plus, time_axis[-1])
+        ax.set_xlim((ax_start, ax_end))
+        return ax_start, ax_end
+
+    def _plot_valid_area(self, ax, t0, valid_range, td_type):
+        ax.axvspan(t0 + np.timedelta64(-valid_range.start, td_type),
+                   t0 + np.timedelta64(valid_range.stop - 1, td_type), **self.style_dict["valid_area"])
+
+    def _plot_t0(self, ax, t0):
+        ax.axvline(t0, **self.style_dict["t0"])
+
+    def _plot_series(self, ax, time_axis, data, style):
+        ax.plot(time_axis, data, **self.style_dict[style])
+
+    def _plot_original_data(self, ax, time_axis, data):
+        # original data
+        filter_input_nc = data
+        self._plot_series(ax, time_axis, filter_input_nc.values.flatten(), style="original")
+        # self._plot_series(ax, time_axis, filter_input_nc.values.flatten(), color="darkgrey", linestyle="dashed",
+        #                   label="original")
+
+    def _plot_apriori(self, ax, time_axis, data, new_dim, ifilter):
+        # clim apriori
+        filter_input = data
+        if ifilter == 0:
+            d_tmp = filter_input.sel(
+                {new_dim: slice(0, filter_input.coords[new_dim].values.max())}).values.flatten()
+        else:
+            d_tmp = filter_input.values.flatten()
+        self._plot_series(ax, time_axis[len(time_axis) - len(d_tmp):], d_tmp, style="apriori")
+        # self._plot_series(ax, time_axis[len(time_axis) - len(d_tmp):], d_tmp, color="darkgrey", linestyle="solid",
+        #                   label="estimated future")
+
+    def _plot_clim_filter(self, ax, time_axis, data, new_dim, h, output_dtypes):
+        filter_input = data
+        # clim filter response
+        filt = xr.apply_ufunc(self.fir_filter_convolve, filter_input,
+                              input_core_dims=[[new_dim]],
+                              output_core_dims=[[new_dim]],
+                              vectorize=True,
+                              kwargs={"h": h},
+                              output_dtypes=[output_dtypes])
+        self._plot_series(ax, time_axis, filt.values.flatten(), style="clim")
+        # self._plot_series(ax, time_axis, filt.values.flatten(), color="black", linestyle="solid",
+        #                   label="clim filter response", linewidth=2)
+        residuum_estimated = filter_input - filt
+        return residuum_estimated
+
+    def _plot_ideal_filter(self, ax, time_axis, data, new_dim, h, output_dtypes):
+        filter_input_nc = data
+        # ideal filter response
+        filt = xr.apply_ufunc(self.fir_filter_convolve, filter_input_nc,
+                              input_core_dims=[[new_dim]],
+                              output_core_dims=[[new_dim]],
+                              vectorize=True,
+                              kwargs={"h": h},
+                              output_dtypes=[output_dtypes])
+        self._plot_series(ax, time_axis, filt.values.flatten(), style="ideal")
+        # self._plot_series(ax, time_axis, filt.values.flatten(), color="black", linestyle="dashed",
+        #                   label="ideal filter response", linewidth=2)
+        residuum_true = filter_input_nc - filt
+        return residuum_true
+
+    def _store_plot_data(self, data):
+        """Store plot data. Could be loaded in a notebook to redraw."""
+        file = os.path.join(self.plot_folder, "plot_data.pickle")
+        with open(file, "wb") as f:
+            dill.dump(data, f)
-- 
GitLab


From 8ea8852933fb040b9c6f28d740e9eb447df18097 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 7 Jul 2021 12:12:32 +0200
Subject: [PATCH 155/175] cleanup, remove outdated validate_station_old method

---
 mlair/run_modules/pre_processing.py | 32 -----------------------------
 1 file changed, 32 deletions(-)

diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index c7d7f920..08bff85c 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -285,38 +285,6 @@ class PreProcessing(RunEnvironment):
             for k, v in attrs.items():
                 self.data_store.set(k, v)
 
-    def validate_station_old(self, data_handler: AbstractDataHandler, set_stations, set_name=None,
-                             store_processed_data=True):
-        """
-        Check if all given stations in `all_stations` are valid.
-
-        Valid means, that there is data available for the given time range (is included in `kwargs`). The shape and the
-        loading time are logged in debug mode.
-
-        :return: Corrected list containing only valid station IDs.
-        """
-        t_outer = TimeTracking()
-        logging.info(f"check valid stations started{' (%s)' % (set_name if set_name is not None else 'all')}")
-        # calculate transformation using train data
-        if set_name == "train":
-            logging.info("setup transformation using train data exclusively")
-            self.transformation(data_handler, set_stations)
-        # start station check
-        collection = DataCollection()
-        valid_stations = []
-        kwargs = self.data_store.create_args_dict(data_handler.requirements(), scope=set_name)
-        for station in set_stations:
-            try:
-                dp = data_handler.build(station, name_affix=set_name, store_processed_data=store_processed_data,
-                                        **kwargs)
-                collection.add(dp)
-                valid_stations.append(station)
-            except (AttributeError, EmptyQueryResult):
-                continue
-        logging.info(f"run for {t_outer} to check {len(set_stations)} station(s). Found {len(collection)}/"
-                     f"{len(set_stations)} valid stations.")
-        return collection, valid_stations
-
     def transformation(self, data_handler: AbstractDataHandler, stations):
         if hasattr(data_handler, "transformation"):
             kwargs = self.data_store.create_args_dict(data_handler.requirements(), scope="train")
-- 
GitLab


From fef96a031b9bd0897faf2d307ec9ac3425c47af7 Mon Sep 17 00:00:00 2001
From: Falco Weichselbaum <f.weichselbaum@fz-juelich.de>
Date: Wed, 7 Jul 2021 14:44:26 +0200
Subject: [PATCH 156/175] changed helpers/statistics.py 299 AND 325 ahead_names
 to the actual times, because non monotonically rising window lead times can
 occur (it was a simple range before)

---
 mlair/helpers/statistics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index 30391998..15cef734 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -296,7 +296,7 @@ class SkillScores:
 
         :return: skill score for each comparison and forecast step
         """
-        ahead_names = list(range(1, window_lead_time + 1))
+        ahead_names = list(self.external_data.ahead.data)
         combinations, combination_strings = self.get_model_name_combinations()
         skill_score = pd.DataFrame(index=combination_strings)
         for iahead in ahead_names:
@@ -322,7 +322,7 @@ class SkillScores:
 
         :return: all CASES as well as all terms
         """
-        ahead_names = list(range(1, window_lead_time + 1))
+        ahead_names = list(self.external_data.ahead.data)
 
         all_terms = ['AI', 'AII', 'AIII', 'AIV', 'BI', 'BII', 'BIV', 'CI', 'CIV', 'CASE I', 'CASE II', 'CASE III',
                      'CASE IV']
-- 
GitLab


From 6f433745a55bed99bd8c000ae9f41aa03794ee98 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 8 Jul 2021 11:00:57 +0200
Subject: [PATCH 157/175] data handler with filters can use kzf coeffs,
 improved a priori creation

---
 mlair/configuration/defaults.py               |   2 +-
 .../data_handler_mixed_sampling.py            |   6 +-
 .../data_handler/data_handler_with_filter.py  |  40 +-
 mlair/helpers/filter.py                       | 550 +++---------------
 mlair/helpers/helpers.py                      |   8 +-
 5 files changed, 121 insertions(+), 485 deletions(-)

diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py
index bfbef521..088a504a 100644
--- a/mlair/configuration/defaults.py
+++ b/mlair/configuration/defaults.py
@@ -48,7 +48,7 @@ DEFAULT_CREATE_NEW_BOOTSTRAPS = False
 DEFAULT_NUMBER_OF_BOOTSTRAPS = 20
 DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
                      "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
-                     "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram"]
+                     "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram", "PlotPeriodogram"]
 DEFAULT_SAMPLING = "daily"
 DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA", "press": "REA", "relhum": "REA",
                        "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "",
diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 62a354a2..8205ae6c 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -10,6 +10,7 @@ from mlair.data_handler import DefaultDataHandler
 from mlair import helpers
 from mlair.helpers import remove_items
 from mlair.configuration.defaults import DEFAULT_SAMPLING, DEFAULT_INTERPOLATION_LIMIT, DEFAULT_INTERPOLATION_METHOD
+from mlair.helpers.filter import filter_width_kzf
 
 import inspect
 from typing import Callable
@@ -233,7 +234,10 @@ class DataHandlerMixedSamplingWithClimateFirFilterSingleStation(DataHandlerMixed
 
     def estimate_filter_width(self):
         """Filter width is determined by the filter with the highest order."""
-        return max(self.filter_order)
+        if isinstance(self.filter_order[0], tuple):
+            return max([filter_width_kzf(*e) for e in self.filter_order])
+        else:
+            return max(self.filter_order)
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 5da1b893..80253b0a 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -14,7 +14,7 @@ from mlair.data_handler.data_handler_single_station import DataHandlerSingleStat
 from mlair.data_handler import DefaultDataHandler
 from mlair.helpers import remove_items, to_list, TimeTrackingWrapper
 from mlair.helpers.filter import KolmogorovZurbenkoFilterMovingWindow as KZFilter
-from mlair.helpers.filter import FIRFilter, ClimateFIRFilter
+from mlair.helpers.filter import FIRFilter, ClimateFIRFilter, omega_null_kzf
 
 # define a more general date type for type hinting
 str_or_list = Union[str, List[str]]
@@ -152,6 +152,8 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
         # self._check_sampling(**kwargs)
         # self.original_data = None  # ToDo: implement here something to store unfiltered data
         self.fs = self._get_fs(**kwargs)
+        if filter_window_type == "kzf":
+            filter_cutoff_period = self._get_kzf_cutoff_period(filter_order, self.fs)
         self.filter_cutoff_period, removed_index = self._prepare_filter_cutoff_period(filter_cutoff_period, self.fs)
         self.filter_cutoff_freq = self._period_to_freq(self.filter_cutoff_period)
         assert len(self.filter_cutoff_period) == (len(filter_order) - len(removed_index))
@@ -165,8 +167,11 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
         order = []
         for i, o in enumerate(filter_order):
             if i not in removed_index:
-                fo = int(o * fs)
-                fo = fo + 1 if fo % 2 == 0 else fo
+                if isinstance(o, tuple):
+                    fo = (o[0] * fs, o[1])
+                else:
+                    fo = int(o * fs)
+                    fo = fo + 1 if fo % 2 == 0 else fo
                 order.append(fo)
         return order
 
@@ -185,6 +190,14 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
                 removed.append(i)
         return cutoff, removed
 
+    @staticmethod
+    def _get_kzf_cutoff_period(kzf_settings, fs):
+        cutoff = []
+        for (m, k) in kzf_settings:
+            w0 = omega_null_kzf(m * fs, k) * fs
+            cutoff.append(1. / w0)
+        return cutoff
+
     @staticmethod
     def _period_to_freq(cutoff_p):
         return list(map(lambda x: (1. / x[0] if x[0] is not None else None, 1. / x[1] if x[1] is not None else None),
@@ -325,7 +338,7 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
     0 for all residuum components.
 
     :param apriori: Data to use as apriori information. This should be either a xarray dataarray containing monthly or
-        any other heuristic to support the clim filter, or a list of such arrays containint heuristics for all residua
+        any other heuristic to support the clim filter, or a list of such arrays containing heuristics for all residua
         in addition. The 2nd can be used together with apriori_type `residuum_stats` which estimates the error of the
         residuum when the clim filter should be applied with exogenous parameters. If apriori_type is None/`zeros` data
         can be provided, but this is not required in this case.
@@ -341,7 +354,7 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
     _store_attributes = DataHandlerFirFilterSingleStation.store_attributes() + ["apriori"]
 
     def __init__(self, *args, apriori=None, apriori_type=None, apriori_diurnal=False, apriori_sel_opts=None,
-                 plot_path=None, **kwargs):
+                 plot_path=None, name_affix=None, **kwargs):
         self.apriori_type = apriori_type
         self.climate_filter_coeff = None  # coefficents of the used FIR filter
         self.apriori = apriori  # exogenous apriori information or None to calculate from data (endogenous)
@@ -349,6 +362,7 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         self.all_apriori = None  # collection of all apriori information
         self.apriori_sel_opts = apriori_sel_opts  # ensure to separate exogenous and endogenous information
         self.plot_path = plot_path  # use this path to create insight plots
+        self.plot_name_affix = name_affix
         super().__init__(*args, **kwargs)
 
     @TimeTrackingWrapper
@@ -356,12 +370,13 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         """Apply FIR filter only on inputs."""
         self.apriori = self.apriori.get(str(self)) if isinstance(self.apriori, dict) else self.apriori
         logging.info(f"{self.station}: call ClimateFIRFilter")
+        plot_name = str(self)  # if self.plot_name_affix is None else f"{str(self)}_{self.plot_name_affix}"
         climate_filter = ClimateFIRFilter(self.input_data.astype("float32"), self.fs, self.filter_order,
                                           self.filter_cutoff_freq,
                                           self.filter_window_type, time_dim=self.time_dim, var_dim=self.target_dim,
                                           apriori_type=self.apriori_type, apriori=self.apriori,
                                           apriori_diurnal=self.apriori_diurnal, sel_opts=self.apriori_sel_opts,
-                                          plot_path=self.plot_path, plot_name=str(self),
+                                          plot_path=self.plot_path, plot_name=plot_name,
                                           minimum_length=self.window_history_size, new_dim=self.window_dim)
         self.climate_filter_coeff = climate_filter.filter_coefficients
 
@@ -374,11 +389,9 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
 
         climate_filter_data = [c.sel({self.window_dim: slice(-self.window_history_size, 0)}) for c in
                                climate_filter.filtered_data]
-        # climate_filter_data = climate_filter.filtered_data
 
         # create input data with filter index
         input_data = xr.concat(climate_filter_data, pd.Index(self.create_filter_index(), name=self.filter_dim))
-        # self.input_data = xr.concat([c.sel(window=slice(-self.window_history_size, 0)) for c in climate_filter_data], pd.Index(self.create_filter_index(), name=self.filter_dim))
 
         # add unfiltered raw data
         if self._add_unfiltered is True:
@@ -388,7 +401,6 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
 
         self.input_data = input_data
 
-        # self.history = self.shift(data, dim_name_of_shift, window, offset=self.window_history_offset)
         # this is just a code snippet to check the results of the filter
         # import matplotlib
         # matplotlib.use("TkAgg")
@@ -421,16 +433,6 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         self.filter_dim_order = lazy_data
         DataHandlerSingleStation._extract_lazy(self, (_data, _meta, _input_data, _target_data))
 
-    @staticmethod
-    def _prepare_filter_order(filter_order, removed_index, fs):
-        order = []
-        for i, o in enumerate(filter_order):
-            if i not in removed_index:
-                fo = int(o * fs)
-                fo = fo + 1 if fo % 2 == 0 else fo
-                order.append(fo)
-        return order
-
     @staticmethod
     def _prepare_filter_cutoff_period(filter_cutoff_period, fs):
         """Frequency must be smaller than the sampling frequency fs. Otherwise remove given cutoff period pair."""
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 9662122b..a551bec4 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -1,6 +1,6 @@
 import gc
 import warnings
-from typing import Union, Callable
+from typing import Union, Callable, Tuple
 import logging
 import os
 import time
@@ -55,10 +55,11 @@ class FIRFilter:
 
 
 class ClimateFIRFilter:
+    from mlair.plotting.data_insight_plotting import PlotClimateFirFilter
 
     def __init__(self, data, fs, order, cutoff, window, time_dim, var_dim, apriori=None, apriori_type=None,
-                 apriori_diurnal=False, sel_opts=None, plot_path=None, plot_name=None, vectorized=True,
-                 padlen_factor=0.8, minimum_length=None, new_dim=None):
+                 apriori_diurnal=False, sel_opts=None, plot_path=None, plot_name=None,
+                 minimum_length=None, new_dim=None):
         """
         :param data: data to filter
         :param fs: sampling frequency in 1/days -> 1d: fs=1 -> 1H: fs=24
@@ -81,9 +82,10 @@ class ClimateFIRFilter:
         self.plot_data = []
         filtered = []
         h = []
-        sel_opts = sel_opts if isinstance(sel_opts, dict) else {time_dim: sel_opts}
+        if sel_opts is not None:
+            sel_opts = sel_opts if isinstance(sel_opts, dict) else {time_dim: sel_opts}
         sampling = {1: "1d", 24: "1H"}.get(int(fs))
-        logging.info(f"{plot_name}: create diurnal_anomalies")
+        logging.debug(f"{plot_name}: create diurnal_anomalies")
         if apriori_diurnal is True and sampling == "1H":
             # diurnal_anomalies = self.create_hourly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim,
             #                                             as_anomaly=True)
@@ -92,11 +94,11 @@ class ClimateFIRFilter:
                                                                  as_anomaly=True)
         else:
             diurnal_anomalies = 0
-        logging.info(f"{plot_name}: create monthly apriori")
+        logging.debug(f"{plot_name}: create monthly apriori")
         if apriori is None:
             apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling,
                                                time_dim=time_dim) + diurnal_anomalies
-            logging.info(f"{plot_name}: apriori shape = {apriori.shape}")
+            logging.debug(f"{plot_name}: apriori shape = {apriori.shape}")
         apriori_list = to_list(apriori)
         input_data = data.__deepcopy__()
 
@@ -109,17 +111,14 @@ class ClimateFIRFilter:
         for i in range(len(order)):
             logging.info(f"{plot_name}: start filter for order {order[i]}")
             # calculate climatological filter
-            # clim_filter: Callable = {True: self.clim_filter_vectorized, False: self.clim_filter}[vectorized]
             # ToDo: remove all methods except the vectorized version
-            clim_filter: Callable = {True: self.clim_filter_vectorized_less_memory, False: self.clim_filter}[vectorized]
-            _minimum_length = self._minimum_length(order, minimum_length, i)
-            fi, hi, apriori, plot_data = clim_filter(input_data, fs, cutoff[i], order[i],
-                                                     apriori=apriori_list[i],
-                                                     sel_opts=sel_opts, sampling=sampling, time_dim=time_dim,
-                                                     window=window,
-                                                     var_dim=var_dim, plot_index=i, padlen_factor=padlen_factor,
-                                                     minimum_length=_minimum_length, new_dim=new_dim,
-                                                     plot_dates=plot_dates)
+            _minimum_length = self._minimum_length(order, minimum_length, i, window)
+            fi, hi, apriori, plot_data = self.clim_filter(input_data, fs, cutoff[i], order[i],
+                                                          apriori=apriori_list[i],
+                                                          sel_opts=sel_opts, sampling=sampling, time_dim=time_dim,
+                                                          window=window, var_dim=var_dim,
+                                                          minimum_length=_minimum_length, new_dim=new_dim,
+                                                          plot_dates=plot_dates)
 
             logging.info(f"{plot_name}: finished clim_filter calculation")
             if minimum_length is None:
@@ -173,13 +172,17 @@ class ClimateFIRFilter:
         self._apriori = apriori_list
 
         # visualize
-        self._plot(sampling)
+        if self.plot_path is not None:
+            self.PlotClimateFirFilter(self.plot_path, self.plot_data, sampling, plot_name)
+            # self._plot(sampling, new_dim=new_dim)
 
     @staticmethod
-    def _minimum_length(order, minimum_length, pos):
+    def _minimum_length(order, minimum_length, pos, window):
         next_order = 0
         if pos + 1 < len(order):
             next_order = order[pos + 1]
+            if window == "kzf" and isinstance(next_order, tuple):
+                next_order = filter_width_kzf(*next_order)
         if minimum_length is not None:
             next_order = next_order + minimum_length
         return next_order if next_order > 0 else None
@@ -290,283 +293,61 @@ class ClimateFIRFilter:
         """
         Extend time range of apriori information.
 
-        This method will fail, if apriori is available for a shorter period than the gab to fill.
+        This method may not working properly if length of apriori is less then one year.
         """
         dates = data.coords[time_dim].values
         td_type = {"1d": "D", "1H": "h"}.get(sampling)
 
         # apriori starts after data
         if dates[0] < apriori.coords[time_dim].values[0]:
-            logging.info(f"{data.coords['Stations'].values[0]}: apriori starts after data")
+            logging.debug(f"{data.coords['Stations'].values[0]}: apriori starts after data")
+
             # add difference in full years
             date_diff = abs(dates[0] - apriori.coords[time_dim].values[0]).astype("timedelta64[D]")
             extend_range = np.ceil(date_diff / (np.timedelta64(1, "D") * 365)).astype(int) * 365
-            coords = apriori.coords
+            factor = 1 if td_type == "D" else 24
 
-            # create new time axis
-            # start = coords[time_dim][0].values.astype("datetime64[%s]" % td_type) - np.timedelta64(extend_range, "D")
-            # end = coords[time_dim][0].values.astype("datetime64[%s]" % td_type)
-            # new_time_axis = np.arange(start, end).astype("datetime64[ns]")
+            # get fill data range
+            start = apriori.coords[time_dim][0].values.astype("datetime64[%s]" % td_type)
+            end = apriori.coords[time_dim][0].values.astype("datetime64[%s]" % td_type) + np.timedelta64(
+                366 * factor + 1, td_type)
 
-            factor = 1 if td_type == "D" else 24
-            start = coords[time_dim][0].values.astype("datetime64[%s]" % td_type) - np.timedelta64(
-                extend_range * factor + 1,
-                td_type)
-            end = coords[time_dim][0].values.astype("datetime64[%s]" % td_type)
-            new_time_axis = np.arange(start, end).astype("datetime64[ns]")
-            logging.info(f"{data.coords['Stations'].values[0]}: shape of new_time_axis = {new_time_axis.shape}")
-
-            # extract old values to use with new axis
-            # start = coords[time_dim][0].values.astype("datetime64[D]")
-            # end = coords[time_dim][0].values.astype("datetime64[D]") + np.timedelta64(extend_range - 1, "D")
-            # new_values = apriori.sel({time_dim: slice(start, end)})
-            # new_values.coords[time_dim] = new_time_axis
-
-            start = coords[time_dim][0].values.astype("datetime64[%s]" % td_type)
-            end = coords[time_dim][0].values.astype("datetime64[%s]" % td_type) + np.timedelta64(
-                extend_range * factor - 1, td_type)
-            new_values = apriori.sel({time_dim: slice(start, end)})
-            logging.info(f"{data.coords['Stations'].values[0]}: shape of new_values = {new_values.shape}")
-            new_values.coords[time_dim] = new_time_axis
-
-            # add new values to apriori
-            apriori = apriori.combine_first(new_values)
+            # fill year by year
+            for i in range(365, extend_range + 365, 365):
+                apriori_tmp = apriori.sel({time_dim: slice(start, end)})  # hint: slice includes end date
+                new_time_axis = apriori_tmp.coords[time_dim] - np.timedelta64(i * factor, td_type)
+                apriori_tmp.coords[time_dim] = new_time_axis
+                apriori = apriori.combine_first(apriori_tmp)
 
         # apriori ends before data
         if dates[-1] + np.timedelta64(365, "D") > apriori.coords[time_dim].values[-1]:
-            logging.info(f"{data.coords['Stations'].values[0]}: apriori ends before data")
+            logging.debug(f"{data.coords['Stations'].values[0]}: apriori ends before data")
+
             # add difference in full years + 1 year (because apriori is used as future estimate)
             date_diff = abs(dates[-1] - apriori.coords[time_dim].values[-1]).astype("timedelta64[D]")
             extend_range = np.ceil(date_diff / (np.timedelta64(1, "D") * 365)).astype(int) * 365 + 365
-            coords = apriori.coords
-
-            # create new time axis
             factor = 1 if td_type == "D" else 24
-            start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type)
-            end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) + np.timedelta64(
-                extend_range * factor + 1,
-                td_type)
-            new_time_axis = np.arange(start, end).astype("datetime64[ns]")  # hint: arange does not include end date
-            logging.info(f"{data.coords['Stations'].values[0]}: shape of new_time_axis = {new_time_axis.shape}")
-            logging.info(f"{data.coords['Stations'].values[0]}: start of new_time_axis = {start}")
-            logging.info(f"{data.coords['Stations'].values[0]}: end of new_time_axis = {end}")
-            logging.info(f"{data.coords['Stations'].values[0]}: delta of new_time_axis = {end - start}")
-
-            # extract old values to use with new axis
-            start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) - np.timedelta64(
-                extend_range * factor, td_type)
-            # start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) - np.timedelta64(
-            #     extend_range * factor, td_type)
-            end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type)
-            new_values = apriori.sel({time_dim: slice(start, end)})  # hint: slice includes end date
-            logging.info(f"{data.coords['Stations'].values[0]}: shape of new_values = {new_values.shape}")
-            logging.info(f"{data.coords['Stations'].values[0]}: start of new_values = {start}")
-            logging.info(f"{data.coords['Stations'].values[0]}: end of new_values = {end}")
-            logging.info(f"{data.coords['Stations'].values[0]}: delta of new_values = {end - start}")
-
-            logging.info(f"{data.coords['Stations'].values[0]}: set new_time_axis")
-            new_values.coords[time_dim] = new_time_axis
-
-            # add new values to apriori
-            logging.info(f"{data.coords['Stations'].values[0]}: add to apriori")
-            apriori = apriori.combine_first(new_values)
 
-        return apriori
+            # get fill data range
+            start = apriori.coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) - np.timedelta64(
+                366 * factor + 1, td_type)
+            end = apriori.coords[time_dim][-1].values.astype("datetime64[%s]" % td_type)
 
-    @TimeTrackingWrapper
-    def clim_filter(self, data, fs, cutoff_high, order, apriori=None, padlen_factor=0.5, sel_opts=None, sampling="1d",
-                    time_dim="datetime", var_dim="variables", window="hamming", plot_index=None):
-
-        # calculate apriori information from data if not given and extend its range if not sufficient long enough
-        if apriori is None:
-            apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
-        apriori = self.extend_apriori(data, apriori, time_dim, sampling)
-
-        # calculate FIR filter coefficients
-        h = signal.firwin(order, cutoff_high, pass_zero="lowpass", fs=fs, window=window)
-        length = len(h)
+            # fill year by year
+            for i in range(365, extend_range + 365, 365):
+                apriori_tmp = apriori.sel({time_dim: slice(start, end)})  # hint: slice includes end date
+                new_time_axis = apriori_tmp.coords[time_dim] + np.timedelta64(i * factor, td_type)
+                apriori_tmp.coords[time_dim] = new_time_axis
+                apriori = apriori.combine_first(apriori_tmp)
 
-        # start loop on all timestamps
-        dt = data.coords[time_dim].values
-        res = xr.zeros_like(data)
-        logging.info("start iteration")
-        for i in range(0, len(dt)):
-            t0 = dt[i]
-            pd_date = pd.to_datetime(t0)
-            if pd_date.day == 1 and pd_date.month == 1:
-                print(t0)
-            try:
-                i_m = max(0, i - length)
-                i_p = min(i + length, len(dt) - 2)
-                t_hist = slice(dt[i_m], dt[i])
-                t_fut = slice(dt[i + 1], dt[i_p + 1])
-                tmp_hist = data.sel({time_dim: t_hist})
-                tmp_fut = apriori.sel({time_dim: t_fut})
-                tmp_comb = xr.concat([tmp_hist, tmp_fut], dim=time_dim)
-                _padlen = int(min(padlen_factor, 1) * len(tmp_comb.coords[time_dim]))
-                tmp_filter, _ = fir_filter(tmp_comb, fs, cutoff_high=cutoff_high, order=order, causal=False,
-                                           padlen=_padlen, dim=var_dim, window=window, h=h)
-                res.loc[{time_dim: t0}] = tmp_filter.loc[{time_dim: t0}]
-                if i == 720 and self.plot_path is not None:
-                    self.plot(data, tmp_comb, var_dim, time_dim, slice(dt[i_m], dt[i_p + 1]), t0, plot_index)
-            except IndexError:
-                res.loc[{time_dim: t0}] = np.nan
-        return res, h, apriori
-
-    @TimeTrackingWrapper
-    def clim_filter_vectorized(self, data, fs, cutoff_high, order, apriori=None, padlen_factor=0.5, sel_opts=None,
-                               sampling="1d", time_dim="datetime", var_dim="variables", window="hamming",
-                               plot_index=None):
-
-        # calculate apriori information from data if not given and extend its range if not sufficient long enough
-        if apriori is None:
-            apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
-        apriori = self.extend_apriori(data, apriori, time_dim, sampling)
-
-        # calculate FIR filter coefficients
-        h = signal.firwin(order, cutoff_high, pass_zero="lowpass", fs=fs, window=window)
-        length = len(h)
-
-        # create tmp dimension to apply filter, search for unused name
-        new_dim = self._create_tmp_dimension(data)
-
-        # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
-        history = self._shift_data(data, range(int(-(length - 1) / 2), 1), time_dim, var_dim, new_dim)
-        future = self._shift_data(apriori, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim)
-        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
-        # filter_input_data = history.combine_first(future)
-        # history.sel(datetime=slice("2010-11-01", "2011-04-01"),variables="o3").plot()
-        # filter_input_data.sel(datetime=slice("2009-11-01", "2011-04-01"),variables="temp").plot()
-
-        time_axis = filter_input_data.coords[time_dim]
-        # apply vectorized fir filter along the tmp dimension
-        kwargs = {"fs": fs, "cutoff_high": cutoff_high, "order": order,
-                  "causal": False, "padlen": int(min(padlen_factor, 1) * length), "h": h}
-        # with TimeTracking(name="numpy_vec"):
-        #     filt = fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs)
-        # with TimeTracking(name="xr_apply_ufunc"):
-        #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
-        #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
-        #                           kwargs=kwargs)
-        with TimeTracking(name="convolve"):
-            slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
-            filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel({new_dim: slicer}),
-                                  input_core_dims=[[new_dim]],
-                                  output_core_dims=[[new_dim]],
-                                  vectorize=True,
-                                  kwargs={"h": h})
-
-        # plot
-        if self.plot_path is not None:
-            for i, time_pos in enumerate([0.25, 1.5, 2.75, 4]):  # [0.25, 1.5, 2.75, 4] x 365 days
-                try:
-                    pos = int(time_pos * 365 * fs)
-                    filter_example = filter_input_data.isel({time_dim: pos})
-                    t0 = filter_example.coords[time_dim].values
-                    t_slice = filter_input_data.isel(
-                        {time_dim: slice(pos - int((length - 1) / 2), pos + int((length - 1) / 2) + 1)}).coords[
-                        time_dim].values
-                    self.plot(data, filter_example, var_dim, time_dim, t_slice, t0, f"{plot_index}_{i}")
-                except IndexError:
-                    pass
-
-        # select only values at tmp dimension 0 at each point in time
-        res = filt.sel({new_dim: 0}, drop=True)
-        # create result array with same shape like input data, gabs are filled by nans
-        res_full = xr.ones_like(data) * np.nan
-        res_full.loc[res.coords] = res
-        return res_full, h, apriori
-
-    def _tmp_analysis(self, data, apriori, var, var_dim, length, time_dim, new_dim, h):
-        logging.info(f"{data.coords['Stations'].values[0]} ({var}): sel data")
-        d = data.sel({var_dim: [var]}).sel(datetime=slice("2007", "2010"))
-        a = apriori.sel({var_dim: [var]}).sel(datetime=slice("2007", "2010"))
-
-        # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
-        history = self._shift_data(d, range(-length, 1), time_dim, var_dim, new_dim)
-
-        future = self._shift_data(d, range(1, length), time_dim, var_dim, new_dim)
-        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
-        logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
-        with TimeTracking(name="convolve"):
-            filt_nc = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data,
-                                     input_core_dims=[[new_dim]],
-                                     output_core_dims=[[new_dim]],
-                                     vectorize=True,
-                                     kwargs={"h": h})
-
-        future = self._shift_data(a, range(1, length), time_dim, var_dim, new_dim)
-        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
-        logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
-        with TimeTracking(name="convolve"):
-            filt_t0 = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data,
-                                     input_core_dims=[[new_dim]],
-                                     output_core_dims=[[new_dim]],
-                                     vectorize=True,
-                                     kwargs={"h": h})
-
-        diff = (a - history.sel(window=slice(-24, 1)).mean(new_dim))
-        future = self._shift_data(a, range(1, length), time_dim, var_dim, new_dim) - diff
-        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
-        logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
-        with TimeTracking(name="convolve"):
-            filt_diff1d = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data,
-                                         input_core_dims=[[new_dim]],
-                                         output_core_dims=[[new_dim]],
-                                         vectorize=True,
-                                         kwargs={"h": h})
-
-        diff = (a - history.sel(window=slice(-24 * 7, 1)).mean(new_dim))
-        future = self._shift_data(a, range(1, length), time_dim, var_dim, new_dim) - diff
-        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
-        logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
-        with TimeTracking(name="convolve"):
-            filt_diff1w = xr.apply_ufunc(fir_filter_convolve_vectorized,
-                                         filter_input_data,
-                                         input_core_dims=[[new_dim]],
-                                         output_core_dims=[[new_dim]],
-                                         vectorize=True,
-                                         kwargs={"h": h})
-
-        diff = (a - history.sel(window=slice(-24 * 7, 1)).mean(new_dim))
-        future = self._shift_data(a, range(1, length), time_dim, var_dim, new_dim)
-        diff = xr.zeros_like(future) + diff
-        lam = np.log(2) / (7 * 24)
-        diff = diff * np.exp(- lam * diff.coords["window"])
-        future = future - diff
-        filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
-        logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
-        with TimeTracking(name="convolve"):
-            filt_diff1w_decay = xr.apply_ufunc(fir_filter_convolve_vectorized,
-                                               filter_input_data,
-                                               input_core_dims=[[new_dim]],
-                                               output_core_dims=[[new_dim]],
-                                               vectorize=True,
-                                               kwargs={"h": h})
-
-        t0 = datetime.datetime.strptime("2009-07-15 00:00", "%Y-%m-%d %H:%M")
-        delta = datetime.timedelta(hours=1)
-        for i in range(int((length - 1) / 2)):
-            plt.plot(-i, filt_nc.sel(datetime=t0 - i * delta, window=0), marker="+", color="black")
-        filt_nc.sel(datetime=t0).plot(label="noncausal")
-        filt_t0.sel(datetime=t0).plot(label="nodiff")
-        filt_diff1d.sel(datetime=t0).plot(label="diff1d")
-        filt_diff1w.sel(datetime=t0).plot(label="diff1w")
-        filt_diff1w_decay.sel(datetime=t0).plot(label="diff1wdecay")
-        plt.legend()
-
-        for i in range(int((length - 1) / 2)):
-            plt.plot(-i, filt_t0.sel(datetime=t0 - i * delta, window=0), marker="+", color="black")
-
-        z = 1
+        return apriori
 
     @TimeTrackingWrapper
-    def clim_filter_vectorized_less_memory(self, data, fs, cutoff_high, order, apriori=None, padlen_factor=0.5,
-                                           sel_opts=None,
-                                           sampling="1d", time_dim="datetime", var_dim="variables", window="hamming",
-                                           plot_index=None, minimum_length=None, new_dim="window", plot_dates=None):
+    def clim_filter(self, data, fs, cutoff_high, order, apriori=None, sel_opts=None,
+                    sampling="1d", time_dim="datetime", var_dim="variables", window: Union[str, Tuple] = "hamming",
+                    minimum_length=None, new_dim="window", plot_dates=None):
 
-        logging.info(f"{data.coords['Stations'].values[0]}: extend apriori")
+        logging.debug(f"{data.coords['Stations'].values[0]}: extend apriori")
 
         # calculate apriori information from data if not given and extend its range if not sufficient long enough
         if apriori is None:
@@ -575,12 +356,12 @@ class ClimateFIRFilter:
         apriori = self.extend_apriori(data, apriori, time_dim, sampling)
 
         # calculate FIR filter coefficients
-        h = signal.firwin(order, cutoff_high, pass_zero="lowpass", fs=fs, window=window)
+        if window == "kzf":
+            h = firwin_kzf(*order)
+        else:
+            h = signal.firwin(order, cutoff_high, pass_zero="lowpass", fs=fs, window=window)
         length = len(h)
 
-        # create tmp dimension to apply filter, search for unused name
-        # new_dim = self._create_tmp_dimension(data)
-
         # use filter length if no minimum is given, otherwise use minimum + half filter length for extension
         extend_length_history = length if minimum_length is None else minimum_length + int((length + 1) / 2)
         extend_length_future = int((length + 1) / 2) + 1
@@ -595,7 +376,6 @@ class ClimateFIRFilter:
         coll = []
 
         for var in reversed(data.coords[var_dim].values):
-            # self._tmp_analysis(data, apriori, var, var_dim, length, time_dim, new_dim, h)
             logging.info(f"{data.coords['Stations'].values[0]} ({var}): sel data")
 
             _start = pd.to_datetime(data.coords[time_dim].min().values).year
@@ -611,20 +391,14 @@ class ClimateFIRFilter:
                     continue
 
                 # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
-                # logging.info(f"{data.coords['Stations'].values[0]} ({var}): history")
                 if new_dim not in d.coords:
                     history = self._shift_data(d, range(int(-extend_length_history), 1), time_dim, var_dim, new_dim)
                 else:
                     history = d.sel({new_dim: slice(int(-extend_length_history), 0)})
-                # logging.info(f"{data.coords['Stations'].values[0]} ({var}): future")
-                # diff = (a - history.sel(window=slice(-24, 1)).mean(new_dim))
                 if new_dim not in a.coords:
                     future = self._shift_data(a, range(1, extend_length_future), time_dim, var_dim, new_dim)
-                    # future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim) - diff
                 else:
                     future = a.sel({new_dim: slice(1, extend_length_future)})
-                # logging.info(f"{data.coords['Stations'].values[0]} ({var}): concat to filter input")
-
                 filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
                 try:
                     filter_input_data = filter_input_data.sel({time_dim: str(_year)})
@@ -633,10 +407,9 @@ class ClimateFIRFilter:
                 if len(filter_input_data.coords[time_dim]) == 0:  # no valid data for this year
                     continue
 
-                logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
-                with TimeTracking(name="convolve"):
-                    # slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
-                    filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data,  # .sel({new_dim: slicer}),
+                logging.debug(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
+                with TimeTracking(name=f"{data.coords['Stations'].values[0]} ({var}): filter convolve"):
+                    filt = xr.apply_ufunc(fir_filter_convolve, filter_input_data,
                                           input_core_dims=[[new_dim]],
                                           output_core_dims=[[new_dim]],
                                           vectorize=True,
@@ -681,31 +454,10 @@ class ClimateFIRFilter:
             coll.append(xr.concat(filt_coll, time_dim))
             gc.collect()
 
-            # plot
-            # ToDo: enable plotting again
-            # if self.plot_path is not None:
-            #     for i, viz_data in enumerate(plot_data):
-            #         self.plot_new(viz_data, data.sel({var_dim: [var]}), var_dim, time_dim, new_dim, f"{plot_index}_{i}",
-            #                       sampling)
-
-            # for i, time_pos in enumerate([0.25, 1.5, 2.75, 4]):  # [0.25, 1.5, 2.75, 4] x 365 days
-            #     try:
-            #
-            #         plot_data = coll[-1]
-            #         pos = int(time_pos * 365 * fs)
-            #         filter_example = plot_data.isel({time_dim: pos})
-            #         t0 = filter_example.coords[time_dim].values
-            #
-            #         slice_tmp = slice(pos - abs(plot_data.coords[new_dim].values.min()), pos + abs(plot_data.coords[new_dim].values.min()))
-                #         t_slice = plot_data.isel({time_dim: slice_tmp}).coords[time_dim].values
-                #         self.plot(data.sel({var_dim: [var]}), filter_example, var_dim, time_dim, t_slice, t0, f"{plot_index}_{i}")
-                #     except IndexError:
-                #         pass
-
-        logging.info(f"{data.coords['Stations'].values[0]}: concat all variables")
+        logging.debug(f"{data.coords['Stations'].values[0]}: concat all variables")
         res = xr.concat(coll, var_dim)
         # create result array with same shape like input data, gabs are filled by nans
-        logging.info(f"{data.coords['Stations'].values[0]}: create res_full")
+        logging.debug(f"{data.coords['Stations'].values[0]}: create res_full")
 
         new_coords = {**{k: data.coords[k].values for k in data.coords if k != new_dim}, new_dim: res.coords[new_dim]}
         dims = [*data.dims, new_dim] if new_dim not in data.dims else data.dims
@@ -751,8 +503,7 @@ class ClimateFIRFilter:
         res.name = index_name
         return res
 
-    def _plot(self, sampling):
-        new_dim = "window"
+    def _plot(self, sampling, new_dim="window"):
         h = None
         td_type = {"1d": "D", "1H": "h"}.get(sampling)
         if self.plot_path is None:
@@ -761,6 +512,7 @@ class ClimateFIRFilter:
         if not os.path.exists(plot_folder):
             os.makedirs(plot_folder)
 
+        # set plot parameter
         rc_params = {'axes.labelsize': 'large',
                      'xtick.labelsize': 'large',
                      'ytick.labelsize': 'large',
@@ -828,7 +580,7 @@ class ClimateFIRFilter:
                             label="estimated future")
 
                     # clim filter response
-                    filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input,
+                    filt = xr.apply_ufunc(fir_filter_convolve, filter_input,
                                           input_core_dims=[[new_dim]],
                                           output_core_dims=[[new_dim]],
                                           vectorize=True,
@@ -839,7 +591,7 @@ class ClimateFIRFilter:
                     residuum_estimated = filter_input - filt
 
                     # ideal filter response
-                    filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_nc,
+                    filt = xr.apply_ufunc(fir_filter_convolve, filter_input_nc,
                                           input_core_dims=[[new_dim]],
                                           output_core_dims=[[new_dim]],
                                           vectorize=True,
@@ -882,93 +634,6 @@ class ClimateFIRFilter:
                     plt.savefig(plot_name, dpi=300)
                     plt.close('all')
 
-    def plot_new(self, viz_data, orig_data, var_dim, time_dim, new_dim, plot_index, sampling):
-        try:
-            td_type = {"1d": "D", "1H": "h"}.get(sampling)
-            filter_example = viz_data["filt"]
-            filter_input = viz_data["filter_input"]
-            filter_nc = viz_data["filt_nc"]
-            valid_range = viz_data["valid_range"]
-            t0 = viz_data["t0"]
-            t_minus = t0 + np.timedelta64(filter_input.coords[new_dim].values.min(), td_type)
-            t_plus = t0 + np.timedelta64(filter_input.coords[new_dim].values.max(), td_type)
-            t_slice = slice(t_minus, t_plus)
-            data = orig_data.sel({time_dim: t_slice})
-            plot_folder = os.path.join(os.path.abspath(self.plot_path), "climFIR")
-            if not os.path.exists(plot_folder):
-                os.makedirs(plot_folder)
-
-            for var in data.coords[var_dim]:
-                time_axis = data.sel({var_dim: var, time_dim: t_slice}).coords[time_dim].values
-                rc_params = {'axes.labelsize': 'large',
-                             'xtick.labelsize': 'large',
-                             'ytick.labelsize': 'large',
-                             'legend.fontsize': 'large',
-                             'axes.titlesize': 'large',
-                             }
-                plt.rcParams.update(rc_params)
-                fig, ax = plt.subplots()
-
-                ax.axvspan(t0 + np.timedelta64(-valid_range.start, td_type),
-                           t0 + np.timedelta64(valid_range.stop - 1, td_type), color="whitesmoke", label="valid area")
-
-                ax.axvline(t0, color="lightgrey", lw=6, label="time of interest ($t_0$)")
-                ax.plot(time_axis, data.sel({var_dim: var, time_dim: t_slice}).values.flatten(),
-                        color="darkgrey", linestyle="dashed", label="original")
-                d_tmp = filter_input.sel(
-                    {var_dim: var, new_dim: slice(0, filter_input.coords[new_dim].values.max())}).values.flatten()
-                # ax.plot(time_axis[len(time_axis) - len(d_tmp):], d_tmp, color="darkgrey", linestyle=(0 ,(1, 1)), label="filter input")
-                ax.plot(time_axis[len(time_axis) - len(d_tmp):], d_tmp, color="darkgrey", linestyle="solid",
-                        label="estimated future")
-                # data.sel({var_dim: var, time_dim: time_dim_slice}).plot()
-                # tmp_comb.sel({var_dim: var}).plot()
-                # d_filt = filter_example.sel({var_dim: var}).values.flatten()
-                ax.plot(time_axis, filter_example.sel({var_dim: var}).values.flatten(),
-                        color="black", linestyle="solid", label="filter response", linewidth=2)
-                ax.plot(time_axis, filter_nc.sel({var_dim: var}).values.flatten(),
-                        color="black", linestyle="dashed", label="ideal filter response", linewidth=2)
-                plt.title(f"Input of ClimFilter ({str(var.values)})")
-                plt.legend()
-                fig.autofmt_xdate()
-                plt.tight_layout()
-                plot_name = os.path.join(plot_folder, f"climFIR_{self.plot_name}_{str(var.values)}_{plot_index}.pdf")
-                plt.savefig(plot_name, dpi=300)
-                plt.close('all')
-        except:
-            pass
-
-    def plot(self, data, tmp_comb, var_dim, time_dim, time_dim_slice, t0, plot_index):
-        try:
-            plot_folder = os.path.join(os.path.abspath(self.plot_path), "climFIR")
-            if not os.path.exists(plot_folder):
-                os.makedirs(plot_folder)
-            for var in data.coords[var_dim]:
-                time_axis = data.sel({var_dim: var, time_dim: time_dim_slice}).coords[time_dim].values
-                rc_params = {'axes.labelsize': 'large',
-                             'xtick.labelsize': 'large',
-                             'ytick.labelsize': 'large',
-                             'legend.fontsize': 'large',
-                             'axes.titlesize': 'large',
-                             }
-                plt.rcParams.update(rc_params)
-                fig, ax = plt.subplots()
-                ax.axvline(t0, color="lightgrey", lw=6, label="time of interest ($t_0$)")
-                ax.plot(time_axis, data.sel({var_dim: var, time_dim: time_dim_slice}).values.flatten(),
-                        color="darkgrey", linestyle="--", label="original")
-                d_filt = tmp_comb.sel({var_dim: var}).values.flatten()
-                ax.plot(time_axis[:len(d_filt)], d_filt, color="black", label="filter input")
-                # data.sel({var_dim: var, time_dim: time_dim_slice}).plot()
-                # tmp_comb.sel({var_dim: var}).plot()
-                plt.title(f"Input of ClimFilter ({str(var.values)})")
-                plt.legend()
-                fig.autofmt_xdate()
-                plt.tight_layout()
-                plot_name = os.path.join(plot_folder, f"climFIR_{self.plot_name}_{str(var.values)}_{plot_index}.pdf")
-                plt.savefig(plot_name, dpi=300)
-                plt.close('all')
-        except:
-            pass
-
     @property
     def filter_coefficients(self):
         return self._h
@@ -1016,69 +681,10 @@ def fir_filter(data, fs, order=5, cutoff_low=None, cutoff_high=None, window="ham
     return filtered, h
 
 
-def fir_filter_numpy_vectorized(filter_input_data, var_dim, new_dim, kwargs):
-    filt_np = xr.DataArray(np.nan, coords=filter_input_data.coords)
-    for var in filter_input_data.coords[var_dim]:
-        logging.info(
-            f"{filter_input_data.coords['Stations'].values[0]}: {str(var.values)}")  # ToDo must be removed, just for debug
-        a = np.apply_along_axis(fir_filter_vectorized, filter_input_data.dims.index(new_dim),
-                                filter_input_data.sel({var_dim: var}).values, **kwargs)
-        filt_np.loc[{var_dim: var}] = a
-    return filt_np
-
-
-def fir_filter_convolve_vectorized(data, h):
+def fir_filter_convolve(data, h):
     return signal.convolve(data, h, mode='same', method="direct") / sum(h)
 
 
-def fir_filter_vectorized(data, time_stamp=None, fs=1, order=5, cutoff_low=None, cutoff_high=None, window="hamming",
-                          h=None,
-                          causal=True,
-                          padlen=None):
-    """Expects numpy array."""
-    if time_stamp is not None:
-        pd_date = pd.to_datetime(time_stamp)
-        if pd_date.day == 1 and pd_date.month in [1, 7]:
-            logging.info(time_stamp)
-    # sel = ~np.isnan(data)
-    # res = np.empty_like(data)
-    if h is None:
-        cutoff = []
-        if cutoff_low is not None:
-            cutoff += [cutoff_low]
-        if cutoff_high is not None:
-            cutoff += [cutoff_high]
-        if len(cutoff) == 2:
-            filter_type = "bandpass"
-        elif len(cutoff) == 1 and cutoff_low is not None:
-            filter_type = "highpass"
-        elif len(cutoff) == 1 and cutoff_high is not None:
-            filter_type = "lowpass"
-        else:
-            raise ValueError("Please provide either cutoff_low or cutoff_high.")
-        h = signal.firwin(order, cutoff, pass_zero=filter_type, fs=fs, window=window)
-    if causal:
-        # y = signal.lfilter(h, 1., data[sel])
-        y = signal.lfilter(h, 1., data)
-    else:
-        padlen = padlen if padlen is not None else 3 * len(h)
-        # print(sum(sel))
-        # if sum(sel) <= padlen:
-        #     y = np.empty_like(data[sel])
-        # else:
-        #     y = signal.filtfilt(h, 1., data[sel], padlen=padlen)
-        y = signal.filtfilt(h, 1., data, padlen=padlen)
-    # res[sel] = y
-    # return res
-    return y
-
-
-def fir_filter_vectorized_short(data, h=None, padlen=None):
-    """Expects numpy array."""
-    y = signal.filtfilt(h, 1., data, padlen=padlen)
-    return y
-
-
 class KolmogorovZurbenkoBaseClass:
 
     def __init__(self, df, wl, itr, is_child=False, filter_dim="window"):
@@ -1287,3 +893,25 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
             return df_itr
         except ValueError:
             raise ValueError
+
+
+def firwin_kzf(m, k):
+    coef = np.ones(m)
+    for i in range(1, k):
+        t = np.zeros((m, m + i * (m - 1)))
+        for km in range(m):
+            t[km, km:km + coef.size] = coef
+        coef = np.sum(t, axis=0)
+    return coef / m ** k
+
+
+def omega_null_kzf(m, k, alpha=0.5):
+    a = np.sqrt(6) / np.pi
+    b = 1 / (2 * np.array(k))
+    c = 1 - alpha ** b
+    d = np.array(m) ** 2 - alpha ** b
+    return a * np.sqrt(c / d)
+
+
+def filter_width_kzf(m, k):
+    return k * (m - 1) + 1
diff --git a/mlair/helpers/helpers.py b/mlair/helpers/helpers.py
index 7eab9111..499f8258 100644
--- a/mlair/helpers/helpers.py
+++ b/mlair/helpers/helpers.py
@@ -9,7 +9,7 @@ import numpy as np
 import xarray as xr
 import dask.array as da
 
-from typing import Dict, Callable, Union, List, Any
+from typing import Dict, Callable, Union, List, Any, Tuple
 
 
 def to_list(obj: Any) -> List:
@@ -68,9 +68,9 @@ def float_round(number: float, decimals: int = 0, round_type: Callable = math.ce
     return round_type(number * multiplier) / multiplier
 
 
-def remove_items(obj: Union[List, Dict], items: Any):
+def remove_items(obj: Union[List, Dict, Tuple], items: Any):
     """
-    Remove item(s) from either list or dictionary.
+    Remove item(s) from either list, tuple or dictionary.
 
     :param obj: object to remove items from (either dictionary or list)
     :param items: elements to remove from obj. Can either be a list or single entry / key
@@ -99,6 +99,8 @@ def remove_items(obj: Union[List, Dict], items: Any):
         return remove_from_list(obj, items)
     elif isinstance(obj, dict):
         return remove_from_dict(obj, items)
+    elif isinstance(obj, tuple):
+        return tuple(remove_from_list(to_list(obj), items))
     else:
         raise TypeError(f"{inspect.stack()[0][3]} does not support type {type(obj)}.")
 
-- 
GitLab


From 4eee713dd85efc5b26b98f2a950ee5b20ce15ac5 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 8 Jul 2021 13:30:52 +0200
Subject: [PATCH 158/175] periodogram works with window dim

---
 mlair/plotting/data_insight_plotting.py  | 10 +++++++---
 test/test_configuration/test_defaults.py |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index 95b482df..c0e014fb 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -17,7 +17,7 @@ from matplotlib import lines as mlines, pyplot as plt, patches as mpatches, date
 from astropy.timeseries import LombScargle
 
 from mlair.data_handler import DataCollection
-from mlair.helpers import TimeTrackingWrapper, to_list
+from mlair.helpers import TimeTrackingWrapper, to_list, remove_items
 from mlair.plotting.abstract_plot_class import AbstractPlotClass
 
 
@@ -829,9 +829,13 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         plt.close('all')
 
 
-def f_proc(var, d_var, f_index):  # pragma: no cover
+def f_proc(var, d_var, f_index, time_dim="datetime"):  # pragma: no cover
     var_str = str(var)
-    t = (d_var.datetime - d_var.datetime[0]).astype("timedelta64[h]").values / np.timedelta64(1, "D")
+    t = (d_var[time_dim] - d_var[time_dim][0]).astype("timedelta64[h]").values / np.timedelta64(1, "D")
+    if len(d_var.shape) > 1:  # use only max value if dimensions are remaining (e.g. max(window) -> latest value)
+        to_remove = remove_items(d_var.coords.dims, time_dim)
+        for e in to_list(to_remove):
+            d_var = d_var.sel({e: d_var[e].max()})
     pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").power(f_index)
     # f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").autopower()
     return var_str, f_index, pgram
diff --git a/test/test_configuration/test_defaults.py b/test/test_configuration/test_defaults.py
index 16606d8f..b6bdd955 100644
--- a/test/test_configuration/test_defaults.py
+++ b/test/test_configuration/test_defaults.py
@@ -68,4 +68,4 @@ class TestAllDefaults:
         assert DEFAULT_PLOT_LIST == ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore",
                                      "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore",
                                      "PlotConditionalQuantiles", "PlotAvailability", "PlotAvailabilityHistogram",
-                                     "PlotDataHistogram"]
+                                     "PlotDataHistogram", "PlotPeriodogram"]
-- 
GitLab


From 85a74b588a564f0c75a5f37a56cbe0d5c04a013c Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 8 Jul 2021 13:57:59 +0200
Subject: [PATCH 159/175] histogram can handle branched inputs

---
 mlair/plotting/data_insight_plotting.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index c0e014fb..513f64f2 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -464,16 +464,18 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
         self.variables_dim = variables_dim
         self.time_dim = time_dim
         self.window_dim = window_dim
-        self.inputs, self.targets = self._get_inputs_targets(generators, self.variables_dim)
+        self.inputs, self.targets, number_of_branches = self._get_inputs_targets(generators, self.variables_dim)
         self.bins = {}
         self.interval_width = {}
         self.bin_edges = {}
 
         # input plots
-        self._calculate_hist(generators, self.inputs, input_data=True)
-        for subset in generators.keys():
-            self._plot(add_name="input", subset=subset)
-        self._plot_combined(add_name="input")
+        for branch_pos in range(number_of_branches):
+            self._calculate_hist(generators, self.inputs, input_data=True, branch_pos=branch_pos)
+            add_name = "input" if number_of_branches == 1 else f"input_branch_{branch_pos}"
+            for subset in generators.keys():
+                self._plot(add_name=add_name, subset=subset)
+            self._plot_combined(add_name=add_name)
 
         # target plots
         self._calculate_hist(generators, self.targets, input_data=False)
@@ -487,16 +489,17 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
         gen = gens[k][0]
         inputs = to_list(gen.get_X(as_numpy=False)[0].coords[dim].values.tolist())
         targets = to_list(gen.get_Y(as_numpy=False).coords[dim].values.tolist())
-        return inputs, targets
+        n_branches = len(gen.get_X(as_numpy=False))
+        return inputs, targets, n_branches
 
-    def _calculate_hist(self, generators, variables, input_data=True):
+    def _calculate_hist(self, generators, variables, input_data=True, branch_pos=0):
         n_bins = 100
         for set_type, generator in generators.items():
             tmp_bins = {}
             tmp_edges = {}
             end = {}
             start = {}
-            f = lambda x: x.get_X(as_numpy=False)[0] if input_data is True else x.get_Y(as_numpy=False)
+            f = lambda x: x.get_X(as_numpy=False)[branch_pos] if input_data is True else x.get_Y(as_numpy=False)
             for gen in generator:
                 w = min(abs(f(gen).coords[self.window_dim].values))
                 data = f(gen).sel({self.window_dim: w})
-- 
GitLab


From f68ecb48b2432a039ed12da5a40a327b0addcfb2 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 8 Jul 2021 16:25:30 +0200
Subject: [PATCH 160/175] /close #306 on pipeline success

---
 mlair/run_modules/post_processing.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 89a6f205..0d7bfeb4 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -85,6 +85,7 @@ class PostProcessing(RunEnvironment):
         self.competitor_path = self.data_store.get("competitor_path")
         self.competitors = to_list(self.data_store.get_default("competitors", default=[]))
         self.forecast_indicator = "nn"
+        self.ahead_dim = "ahead"
         self._run()
 
     def _run(self):
@@ -172,7 +173,7 @@ class PostProcessing(RunEnvironment):
             bootstrap_path = self.data_store.get("bootstrap_path")
             forecast_path = self.data_store.get("forecast_path")
             number_of_bootstraps = self.data_store.get("number_of_bootstraps", "postprocessing")
-            dims = ["index", "ahead", "type"]
+            dims = ["index", self.ahead_dim, "type"]
             for station in self.test_data:
                 logging.info(str(station))
                 X, Y = None, None
@@ -467,7 +468,8 @@ class PostProcessing(RunEnvironment):
                                    "obs": observation,
                                    "ols": ols_prediction}
                 all_predictions = self.create_forecast_arrays(full_index, list(target_data.indexes[window_dim]),
-                                                              time_dimension, **prediction_dict)
+                                                              time_dimension, ahead_dim=self.ahead_dim,
+                                                              **prediction_dict)
 
                 # save all forecasts locally
                 path = self.data_store.get("forecast_path")
@@ -618,7 +620,8 @@ class PostProcessing(RunEnvironment):
         return index
 
     @staticmethod
-    def create_forecast_arrays(index: pd.DataFrame, ahead_names: List[Union[str, int]], time_dimension, **kwargs):
+    def create_forecast_arrays(index: pd.DataFrame, ahead_names: List[Union[str, int]], time_dimension,
+                               ahead_dim="ahead", **kwargs):
         """
         Combine different forecast types into single xarray.
 
@@ -631,7 +634,7 @@ class PostProcessing(RunEnvironment):
         """
         keys = list(kwargs.keys())
         res = xr.DataArray(np.full((len(index.index), len(ahead_names), len(keys)), np.nan),
-                           coords=[index.index, ahead_names, keys], dims=['index', 'ahead', 'type'])
+                           coords=[index.index, ahead_names, keys], dims=['index', ahead_dim, 'type'])
         for k, v in kwargs.items():
             intersection = set(res.index.values) & set(v.indexes[time_dimension].values)
             match_index = np.array(list(intersection))
-- 
GitLab


From 1858c716ac5303cbe3e435d86405fe4f5e1a69b9 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 8 Jul 2021 16:41:04 +0200
Subject: [PATCH 161/175] removed unused helper method

---
 mlair/helpers/helpers.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/mlair/helpers/helpers.py b/mlair/helpers/helpers.py
index 499f8258..5ddaa3ee 100644
--- a/mlair/helpers/helpers.py
+++ b/mlair/helpers/helpers.py
@@ -179,13 +179,3 @@ def convert2xrda(arr: Union[xr.DataArray, xr.Dataset, np.ndarray, int, float],
             kwargs.update({'dims': dims, 'coords': coords})
 
         return xr.DataArray(arr, **kwargs)
-
-
-def convert_size(size_bytes):
-    if size_bytes == 0:
-        return "0B"
-    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
-    i = int(math.floor(math.log(size_bytes, 1024)))
-    p = math.pow(1024, i)
-    s = round(size_bytes / p, 2)
-    return "%s %s" % (s, size_name[i])
-- 
GitLab


From b358896149ad49234c2641a10b25810c4657ee56 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 15 Jul 2021 12:16:40 +0200
Subject: [PATCH 162/175] updated PlotBootstrapSkillScore

---
 mlair/plotting/postprocessing_plotting.py | 71 +++++++++++++----------
 mlair/run_modules/post_processing.py      |  3 +-
 2 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index 491aa52e..75249e11 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -608,7 +608,8 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
 
     """
 
-    def __init__(self, data: Dict, plot_folder: str = ".", model_setup: str = "", separate_vars: List = None):
+    def __init__(self, data: Dict, plot_folder: str = ".", model_setup: str = "", separate_vars: List = None,
+                 sampling: str = "daily", ahead_dim: str = "ahead"):
         """
         Set attributes and create plot.
 
@@ -616,20 +617,23 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         :param plot_folder: path to save the plot (default: current directory)
         :param model_setup: architecture type to specify plot name (default "CNN")
         :param separate_vars: variables to plot separated (default: ['o3'])
+        :param sampling: type of sampling rate, should be either hourly or daily (default: "daily")
+        :param ahead_dim: name of the ahead dimensions (default: "ahead")
         """
         super().__init__(plot_folder, f"skill_score_bootstrap_{model_setup}")
         if separate_vars is None:
             separate_vars = ['o3']
         self._labels = None
         self._x_name = "boot_var"
-        self._data = self._prepare_data(data)
+        self._ahead_dim = ahead_dim
+        self._data = self._prepare_data(data, sampling)
         self._plot()
         self._save()
         self.plot_name += '_separated'
         self._plot(separate_vars=separate_vars)
         self._save(bbox_inches='tight')
 
-    def _prepare_data(self, data: Dict) -> pd.DataFrame:
+    def _prepare_data(self, data: Dict, sampling: str) -> pd.DataFrame:
         """
         Shrink given data, if only scores are relevant.
 
@@ -640,23 +644,33 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         :return: pre-processed data set
         """
         data = helpers.dict_to_xarray(data, "station").sortby(self._x_name)
-        new_boot_coords = self._return_vars_without_number_tag(data.coords['boot_var'].values, split_by='_', keep=1)
-        data = data.assign_coords({'boot_var': new_boot_coords})
-        self._labels = [str(i) + "d" for i in data.coords["ahead"].values]
+        new_boot_coords = self._return_vars_without_number_tag(data.coords[self._x_name].values, split_by='_', keep=1)
+        data = data.assign_coords({self._x_name: new_boot_coords})
+        _, sampling_letter = self._get_target_sampling(sampling, 1)
+        # sampling = (sampling, sampling) if isinstance(sampling, str) else sampling
+        # sampling_letter = {"hourly": "H", "daily": "d"}.get(sampling[1], "")
+        self._labels = [str(i) + sampling_letter for i in data.coords[self._ahead_dim].values]
         if "station" not in data.dims:
             data = data.expand_dims("station")
         return data.to_dataframe("data").reset_index(level=[0, 1, 2])
 
+    @staticmethod
+    def _get_target_sampling(sampling, pos):
+        sampling = (sampling, sampling) if isinstance(sampling, str) else sampling
+        sampling_letter = {"hourly": "H", "daily": "d"}.get(sampling[pos], "")
+        return sampling, sampling_letter
+
     def _return_vars_without_number_tag(self, values, split_by, keep):
         arr = np.array([v.split(split_by) for v in values])
         num = arr[:, 0]
+        if arr.shape[keep] == 1:  # keep dim has only length 1, no number tags required
+            return num
         new_val = arr[:, keep]
         if self._all_values_are_equal(num, axis=0):
             return new_val
         else:
             raise NotImplementedError
 
-
     @staticmethod
     def _all_values_are_equal(arr, axis=0):
         if np.all(arr == arr[0], axis=axis):
@@ -681,37 +695,29 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
             self._plot_selected_variables(separate_vars)
 
     def _plot_selected_variables(self, separate_vars: List):
-        # if separate_vars is None:
-        #     separate_vars = ['o3']
         data = self._data
-        self.raise_error_if_separate_vars_do_not_exist(data, separate_vars)
-        all_variables = self._get_unique_values_from_column_of_df(data, 'boot_var')
+        self.raise_error_if_separate_vars_do_not_exist(data, separate_vars, self._x_name)
+        all_variables = self._get_unique_values_from_column_of_df(data, self._x_name)
         # remaining_vars = helpers.list_pop(all_variables, separate_vars) #remove_items
         remaining_vars = helpers.remove_items(all_variables, separate_vars)
-        data_first = self._select_data(df=data, variables=separate_vars, column_name='boot_var')
-        data_second = self._select_data(df=data, variables=remaining_vars, column_name='boot_var')
-
-        fig, ax = plt.subplots(nrows=1, ncols=2,
-                               gridspec_kw={'width_ratios': [len(separate_vars),
-                                                             len(remaining_vars)
-                                                             ]
-                                            }
-                               )
+        data_first = self._select_data(df=data, variables=separate_vars, column_name=self._x_name)
+        data_second = self._select_data(df=data, variables=remaining_vars, column_name=self._x_name)
+
+        fig, ax = plt.subplots(nrows=1, ncols=2, gridspec_kw={'width_ratios': [len(separate_vars),
+                                                                               len(remaining_vars)]})
         if len(separate_vars) > 1:
             first_box_width = .8
         else:
             first_box_width = 2.
 
-        sns.boxplot(x=self._x_name, y="data", hue="ahead", data=data_first, ax=ax[0], whis=1., palette="Blues_d",
-                    showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
-                    flierprops={"marker": "."}, width=first_box_width
-                    )
+        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=data_first, ax=ax[0], whis=1.,
+                    palette="Blues_d", showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
+                    flierprops={"marker": "."}, width=first_box_width)
         ax[0].set(ylabel=f"skill score", xlabel="")
 
-        sns.boxplot(x=self._x_name, y="data", hue="ahead", data=data_second, ax=ax[1], whis=1., palette="Blues_d",
-                    showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
-                    flierprops={"marker": "."},
-                    )
+        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=data_second, ax=ax[1], whis=1.,
+                    palette="Blues_d", showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
+                    flierprops={"marker": "."})
         ax[1].set(ylabel="", xlabel="")
         ax[1].yaxis.tick_right()
         handles, _ = ax[1].get_legend_handles_labels()
@@ -749,6 +755,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
 
     @staticmethod
     def _select_data(df: pd.DataFrame, variables: List[str], column_name: str) -> pd.DataFrame:
+        selected_data = None
         for i, variable in enumerate(variables):
             if i == 0:
                 selected_data = df.loc[df[column_name] == variable]
@@ -757,15 +764,15 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
                 selected_data = pd.concat([selected_data, tmp_var], axis=0)
         return selected_data
 
-    def raise_error_if_separate_vars_do_not_exist(self, data, separate_vars):
-        if not self._variables_exist_in_df(df=data, variables=separate_vars):
+    def raise_error_if_separate_vars_do_not_exist(self, data, separate_vars, column_name):
+        if not self._variables_exist_in_df(df=data, variables=separate_vars, column_name=column_name):
             raise ValueError(f"At least one entry of `separate_vars' does not exist in `self.data' ")
 
     @staticmethod
     def _get_unique_values_from_column_of_df(df: pd.DataFrame, column_name: str) -> List:
         return list(df[column_name].unique())
 
-    def _variables_exist_in_df(self, df: pd.DataFrame, variables: List[str], column_name: str = 'boot_var'):
+    def _variables_exist_in_df(self, df: pd.DataFrame, variables: List[str], column_name: str):
         vars_in_df = set(self._get_unique_values_from_column_of_df(df, column_name))
         return set(variables).issubset(vars_in_df)
 
@@ -774,7 +781,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
 
         """
         fig, ax = plt.subplots()
-        sns.boxplot(x=self._x_name, y="data", hue="ahead", data=self._data, ax=ax, whis=1., palette="Blues_d",
+        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=self._data, ax=ax, whis=1., palette="Blues_d",
                     showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
         ax.axhline(y=0, color="grey", linewidth=.5)
         plt.xticks(rotation=45)
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 0d7bfeb4..f6eec3c8 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -318,7 +318,8 @@ class PostProcessing(RunEnvironment):
         try:
             if (self.bootstrap_skill_scores is not None) and ("PlotBootstrapSkillScore" in plot_list):
                 PlotBootstrapSkillScore(self.bootstrap_skill_scores, plot_folder=self.plot_path,
-                                        model_setup=self.forecast_indicator)
+                                        model_setup=self.forecast_indicator, sampling=self._sampling,
+                                        ahead_dim=self.ahead_dim, separate_vars=to_list(self.target_var))
         except Exception as e:
             logging.error(f"Could not create plot PlotBootstrapSkillScore due to the following error: {e}")
 
-- 
GitLab


From 54c5f1c1bc12c3593335e5674fbebc295bcd806b Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 16 Jul 2021 14:46:50 +0200
Subject: [PATCH 163/175] added new bootstrap method "zero mean" and type
 "single input" and "variable"

---
 mlair/data_handler/bootstraps.py           | 172 ++++++++++++++++-----
 mlair/plotting/postprocessing_plotting.py  |  87 ++++++++---
 mlair/run_modules/post_processing.py       | 127 +++++++++------
 test/test_data_handler/old_t_bootstraps.py |   2 +-
 4 files changed, 285 insertions(+), 103 deletions(-)

diff --git a/mlair/data_handler/bootstraps.py b/mlair/data_handler/bootstraps.py
index 68a4bbc4..0ae88599 100644
--- a/mlair/data_handler/bootstraps.py
+++ b/mlair/data_handler/bootstraps.py
@@ -15,69 +15,156 @@ __date__ = '2020-02-07'
 import os
 from collections import Iterator, Iterable
 from itertools import chain
+from typing import Union, List
 
 import numpy as np
 import xarray as xr
 
 from mlair.data_handler.abstract_data_handler import AbstractDataHandler
+from mlair.helpers.helpers import to_list
 
 
 class BootstrapIterator(Iterator):
 
     _position: int = None
 
-    def __init__(self, data: "BootStraps"):
+    def __init__(self, data: "BootStraps", method):
         assert isinstance(data, BootStraps)
         self._data = data
         self._dimension = data.bootstrap_dimension
-        self._collection = self._data.bootstraps()
+        self.boot_dim = "boots"
+        self._method = method
+        self._collection = self.create_collection(self._data.data, self._dimension)
         self._position = 0
 
+    def __next__(self):
+        """Return next element or stop iteration."""
+        raise NotImplementedError
+
+    @classmethod
+    def create_collection(cls, data, dim):
+        raise NotImplementedError
+
+    def _reshape(self, d):
+        if isinstance(d, list):
+            return list(map(lambda x: self._reshape(x), d))
+            # return list(map(lambda x: np.rollaxis(x, -1, 0).reshape(x.shape[0] * x.shape[-1], *x.shape[1:-1]), d))
+        else:
+            shape = d.shape
+            return np.rollaxis(d, -1, 0).reshape(shape[0] * shape[-1], *shape[1:-1])
+
+    def _to_numpy(self, d):
+        if isinstance(d, list):
+            return list(map(lambda x: self._to_numpy(x), d))
+        else:
+            return d.values
+
+    def apply_bootstrap_method(self, data: np.ndarray) -> Union[np.ndarray, List[np.ndarray]]:
+        """
+        Apply predefined bootstrap method from given data.
+
+        :param data: data to apply bootstrap method on
+        :return: processed data as numpy array
+        """
+        if isinstance(data, list):
+            return list(map(lambda x: self.apply_bootstrap_method(x.values), data))
+        else:
+            return self._method.apply(data)
+
+
+class BootstrapIteratorSingleInput(BootstrapIterator):
+    _position: int = None
+
+    def __init__(self, *args):
+        super().__init__(*args)
+
     def __next__(self):
         """Return next element or stop iteration."""
         try:
             index, dimension = self._collection[self._position]
             nboot = self._data.number_of_bootstraps
             _X, _Y = self._data.data.get_data(as_numpy=False)
-            _X = list(map(lambda x: x.expand_dims({'boots': range(nboot)}, axis=-1), _X))
-            _Y = _Y.expand_dims({"boots": range(nboot)}, axis=-1)
+            _X = list(map(lambda x: x.expand_dims({self.boot_dim: range(nboot)}, axis=-1), _X))
+            _Y = _Y.expand_dims({self.boot_dim: range(nboot)}, axis=-1)
             single_variable = _X[index].sel({self._dimension: [dimension]})
-            shuffled_variable = self.shuffle(single_variable.values)
-            shuffled_data = xr.DataArray(shuffled_variable, coords=single_variable.coords, dims=single_variable.dims)
-            _X[index] = shuffled_data.combine_first(_X[index]).reindex_like(_X[index])
+            bootstrapped_variable = self.apply_bootstrap_method(single_variable.values)
+            bootstrapped_data = xr.DataArray(bootstrapped_variable, coords=single_variable.coords,
+                                             dims=single_variable.dims)
+            _X[index] = bootstrapped_data.combine_first(_X[index]).reindex_like(_X[index])
             self._position += 1
         except IndexError:
             raise StopIteration()
         _X, _Y = self._to_numpy(_X), self._to_numpy(_Y)
         return self._reshape(_X), self._reshape(_Y), (index, dimension)
 
-    @staticmethod
-    def _reshape(d):
-        if isinstance(d, list):
-            return list(map(lambda x: np.rollaxis(x, -1, 0).reshape(x.shape[0] * x.shape[-1], *x.shape[1:-1]), d))
-        else:
-            shape = d.shape
-            return np.rollaxis(d, -1, 0).reshape(shape[0] * shape[-1], *shape[1:-1])
+    @classmethod
+    def create_collection(cls, data, dim):
+        l = []
+        for i, x in enumerate(data.get_X(as_numpy=False)):
+            l.append(list(map(lambda y: (i, y), x.indexes[dim])))
+        return list(chain(*l))
 
-    @staticmethod
-    def _to_numpy(d):
-        if isinstance(d, list):
-            return list(map(lambda x: x.values, d))
-        else:
-            return d.values
 
-    @staticmethod
-    def shuffle(data: np.ndarray) -> np.ndarray:
-        """
-        Shuffle randomly from given data (draw elements with replacement).
+class BootstrapIteratorVariable(BootstrapIterator):
 
-        :param data: data to shuffle
-        :return: shuffled data as numpy array
-        """
+    def __init__(self, *args):
+        super().__init__(*args)
+
+    def __next__(self):
+        """Return next element or stop iteration."""
+        try:
+            dimension = self._collection[self._position]
+            nboot = self._data.number_of_bootstraps
+            _X, _Y = self._data.data.get_data(as_numpy=False)
+            _X = list(map(lambda x: x.expand_dims({self.boot_dim: range(nboot)}, axis=-1), _X))
+            _Y = _Y.expand_dims({self.boot_dim: range(nboot)}, axis=-1)
+            for index in range(len(_X)):
+                single_variable = _X[index].sel({self._dimension: [dimension]})
+                bootstrapped_variable = self.apply_bootstrap_method(single_variable.values)
+                bootstrapped_data = xr.DataArray(bootstrapped_variable, coords=single_variable.coords,
+                                                 dims=single_variable.dims)
+                _X[index] = bootstrapped_data.combine_first(_X[index]).transpose(*_X[index].dims)
+            self._position += 1
+        except IndexError:
+            raise StopIteration()
+        _X, _Y = self._to_numpy(_X), self._to_numpy(_Y)
+        return self._reshape(_X), self._reshape(_Y), (None, dimension)
+
+    @classmethod
+    def create_collection(cls, data, dim):
+        l = set()
+        for i, x in enumerate(data.get_X(as_numpy=False)):
+            l.update(x.indexes[dim].to_list())
+        return to_list(l)
+
+
+class BootstrapIteratorBranch(BootstrapIterator):
+
+    def __init__(self, *args):
+        super().__init__(*args)
+
+    def __next__(self):
+        pass
+    # TODO: implement here: permute entire branch at once
+
+
+class ShuffleBootstraps:
+
+    @staticmethod
+    def apply(data):
         size = data.shape
         return np.random.choice(data.reshape(-1, ), size=size)
 
 
+class MeanBootstraps:
+
+    def __init__(self, mean):
+        self._mean = mean
+
+    def apply(self, data):
+        return np.ones_like(data) * self._mean
+
+
 class BootStraps(Iterable):
     """
     Main class to perform bootstrap operations.
@@ -89,10 +176,19 @@ class BootStraps(Iterable):
     this variable). The tuple is interesting if X consists on mutliple input streams X_i (e.g. two or more stations)
     because it shows which variable of which input X_i has been bootstrapped. All bootstrap combinations can be
     retrieved by calling the .bootstraps() method. Further more, by calling the .get_orig_prediction() this class
-    imitates according to the set number of bootstraps the original prediction
+    imitates according to the set number of bootstraps the original prediction.
+
+    As bootstrap method, this class can currently make use of the ShuffleBoostraps class that uses drawing with
+    replacement to destroy the variables information by keeping its statistical properties. Use `bootstrap="shuffle"` to
+    call this method. Another method is the zero mean bootstrapping triggered by `bootstrap="zero_mean"` and performed
+    by the MeanBootstraps class. This method destroy the variable's information by a mode collapse to constant value of
+    zero. In case, the variable is normalized with a zero mean, this is equivalent to a mode collapse to the variable's
+    mean value. Statistics in general are not conserved in this case, but the mean value of course. A custom mean value
+    for bootstrapping is currently not supported.
     """
+
     def __init__(self, data: AbstractDataHandler, number_of_bootstraps: int = 10,
-                 bootstrap_dimension: str = "variables"):
+                 bootstrap_dimension: str = "variables", bootstrap_type="singleinput", bootstrap_method="shuffle"):
         """
         Create iterable class to be ready to iter.
 
@@ -100,20 +196,24 @@ class BootStraps(Iterable):
         :param number_of_bootstraps: the number of bootstrap realisations
         """
         self.data = data
-        self.number_of_bootstraps = number_of_bootstraps
+        self.number_of_bootstraps = number_of_bootstraps if bootstrap_method == "shuffle" else 1
         self.bootstrap_dimension = bootstrap_dimension
+        self.bootstrap_method = {"shuffle": ShuffleBootstraps(),
+                                 "zero_mean": MeanBootstraps(mean=0)}.get(
+            bootstrap_method)  # todo adjust number of bootstraps if mean bootstrapping
+        self.BootstrapIterator = {"singleinput": BootstrapIteratorSingleInput,
+                                  "branch": BootstrapIteratorBranch,
+                                  "variable": BootstrapIteratorVariable}.get(bootstrap_type,
+                                                                             BootstrapIteratorSingleInput)
 
     def __iter__(self):
-        return BootstrapIterator(self)
+        return self.BootstrapIterator(self, self.bootstrap_method)
 
     def __len__(self):
-        return len(self.bootstraps())
+        return len(self.BootstrapIterator.create_collection(self.data, self.bootstrap_dimension))
 
     def bootstraps(self):
-        l = []
-        for i, x in enumerate(self.data.get_X(as_numpy=False)):
-            l.append(list(map(lambda y: (i, y), x.indexes['variables'])))
-        return list(chain(*l))
+        return self.BootstrapIterator.create_collection(self.data, self.bootstrap_dimension)
 
     def get_orig_prediction(self, path: str, file_name: str, prediction_name: str = "CNN") -> np.ndarray:
         """
diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index 75249e11..e5080f6e 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -609,7 +609,8 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
     """
 
     def __init__(self, data: Dict, plot_folder: str = ".", model_setup: str = "", separate_vars: List = None,
-                 sampling: str = "daily", ahead_dim: str = "ahead"):
+                 sampling: str = "daily", ahead_dim: str = "ahead", bootstrap_type: str = None,
+                 bootstrap_method: str = None):
         """
         Set attributes and create plot.
 
@@ -619,19 +620,41 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         :param separate_vars: variables to plot separated (default: ['o3'])
         :param sampling: type of sampling rate, should be either hourly or daily (default: "daily")
         :param ahead_dim: name of the ahead dimensions (default: "ahead")
+        :param bootstrap_annotation: additional information to use in the file name (default: None)
         """
-        super().__init__(plot_folder, f"skill_score_bootstrap_{model_setup}")
+        annotation = ["_".join([s for s in ["", bootstrap_type, bootstrap_method] if s is not None])][0]
+        super().__init__(plot_folder, f"skill_score_bootstrap_{model_setup}{annotation}")
         if separate_vars is None:
             separate_vars = ['o3']
         self._labels = None
         self._x_name = "boot_var"
         self._ahead_dim = ahead_dim
+        self._boot_type = self._set_bootstrap_type(bootstrap_type)
+        self._boot_method = self._set_bootstrap_method(bootstrap_method)
+
+        self._title = f"Bootstrap analysis ({self._boot_method}, {self._boot_type})"
         self._data = self._prepare_data(data, sampling)
-        self._plot()
-        self._save()
-        self.plot_name += '_separated'
-        self._plot(separate_vars=separate_vars)
-        self._save(bbox_inches='tight')
+        if "branch" in self._data.columns:
+            plot_name = self.plot_name
+            for branch in self._data["branch"].unique():
+                self._title = f"Bootstrap analysis ({self._boot_method}, {self._boot_type}, {branch})"
+                self._plot(branch=branch)
+                self.plot_name = f"{plot_name}_{branch}"
+                self._save()
+        else:
+            self._plot()
+            self._save()
+            self.plot_name += '_separated'
+            self._plot(separate_vars=separate_vars)
+            self._save(bbox_inches='tight')
+
+    @staticmethod
+    def _set_bootstrap_type(boot_type):
+        return {"singleinput": "single input"}.get(boot_type, boot_type)
+
+    @staticmethod
+    def _set_bootstrap_method(boot_method):
+        return {"zero_mean": "zero mean", "shuffle": "shuffled"}.get(boot_method, boot_method)
 
     def _prepare_data(self, data: Dict, sampling: str) -> pd.DataFrame:
         """
@@ -643,16 +666,28 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         :param data: dictionary with station names as keys and 2D xarrays as values
         :return: pre-processed data set
         """
-        data = helpers.dict_to_xarray(data, "station").sortby(self._x_name)
-        new_boot_coords = self._return_vars_without_number_tag(data.coords[self._x_name].values, split_by='_', keep=1)
-        data = data.assign_coords({self._x_name: new_boot_coords})
+        station_dim = "station"
+        data = helpers.dict_to_xarray(data, station_dim).sortby(self._x_name)
+        if self._boot_type == "single input":
+            number_tags = self._get_number_tag(data.coords[self._x_name].values, split_by='_')
+            new_boot_coords = self._return_vars_without_number_tag(data.coords[self._x_name].values, split_by='_',
+                                                                   keep=1, as_unique=True)
+            values = data.values.reshape((data.shape[0], len(new_boot_coords), len(number_tags), data.shape[-1]))
+            data = xr.DataArray(values, coords={station_dim: data.coords["station"], self._x_name: new_boot_coords,
+                                                "branch": number_tags, self._ahead_dim: data.coords[self._ahead_dim]},
+                                dims=[station_dim, self._x_name, "branch", self._ahead_dim])
+        else:
+            try:
+                new_boot_coords = self._return_vars_without_number_tag(data.coords[self._x_name].values, split_by='_',
+                                                                       keep=1)
+                data = data.assign_coords({self._x_name: new_boot_coords})
+            except NotImplementedError:
+                pass
         _, sampling_letter = self._get_target_sampling(sampling, 1)
-        # sampling = (sampling, sampling) if isinstance(sampling, str) else sampling
-        # sampling_letter = {"hourly": "H", "daily": "d"}.get(sampling[1], "")
         self._labels = [str(i) + sampling_letter for i in data.coords[self._ahead_dim].values]
-        if "station" not in data.dims:
-            data = data.expand_dims("station")
-        return data.to_dataframe("data").reset_index(level=[0, 1, 2])
+        if station_dim not in data.dims:
+            data = data.expand_dims(station_dim)
+        return data.to_dataframe("data").reset_index(level=np.arange(len(data.dims)).tolist())
 
     @staticmethod
     def _get_target_sampling(sampling, pos):
@@ -660,7 +695,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         sampling_letter = {"hourly": "H", "daily": "d"}.get(sampling[pos], "")
         return sampling, sampling_letter
 
-    def _return_vars_without_number_tag(self, values, split_by, keep):
+    def _return_vars_without_number_tag(self, values, split_by, keep, as_unique=False):
         arr = np.array([v.split(split_by) for v in values])
         num = arr[:, 0]
         if arr.shape[keep] == 1:  # keep dim has only length 1, no number tags required
@@ -668,9 +703,17 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         new_val = arr[:, keep]
         if self._all_values_are_equal(num, axis=0):
             return new_val
+        elif as_unique is True:
+            return np.unique(new_val)
         else:
             raise NotImplementedError
 
+    @staticmethod
+    def _get_number_tag(values, split_by):
+        arr = np.array([v.split(split_by) for v in values])
+        num = arr[:, 0]
+        return np.unique(num).tolist()
+
     @staticmethod
     def _all_values_are_equal(arr, axis=0):
         if np.all(arr == arr[0], axis=axis):
@@ -687,10 +730,10 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         """
         return "" if score_only else "terms and "
 
-    def _plot(self, separate_vars=None):
+    def _plot(self, branch=None, separate_vars=None):
         """Plot climatological skill score."""
         if separate_vars is None:
-            self._plot_all_variables()
+            self._plot_all_variables(branch)
         else:
             self._plot_selected_variables(separate_vars)
 
@@ -752,6 +795,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
 
         align_yaxis(ax[0], ax[1])
         align_yaxis(ax[0], ax[1])
+        plt.title(self._title)
 
     @staticmethod
     def _select_data(df: pd.DataFrame, variables: List[str], column_name: str) -> pd.DataFrame:
@@ -776,16 +820,17 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         vars_in_df = set(self._get_unique_values_from_column_of_df(df, column_name))
         return set(variables).issubset(vars_in_df)
 
-    def _plot_all_variables(self):
+    def _plot_all_variables(self, branch=None):
         """
 
         """
         fig, ax = plt.subplots()
-        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=self._data, ax=ax, whis=1., palette="Blues_d",
+        plot_data = self._data if branch is None else self._data[self._data["branch"] == str(branch)]
+        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=plot_data, ax=ax, whis=1., palette="Blues_d",
                     showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
         ax.axhline(y=0, color="grey", linewidth=.5)
         plt.xticks(rotation=45)
-        ax.set(ylabel=f"skill score", xlabel="", title="summary of all stations")
+        ax.set(ylabel=f"skill score", xlabel="", title=self._title)
         handles, _ = ax.get_legend_handles_labels()
         ax.legend(handles, self._labels)
         plt.tight_layout()
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index f6eec3c8..0c530400 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -103,7 +103,7 @@ class PostProcessing(RunEnvironment):
         if self.data_store.get("evaluate_bootstraps", "postprocessing"):
             with TimeTracking(name="calculate bootstraps"):
                 create_new_bootstraps = self.data_store.get("create_new_bootstraps", "postprocessing")
-                self.bootstrap_postprocessing(create_new_bootstraps)
+                self.bootstrap_postprocessing(create_new_bootstraps)  # todo: make flexible and add boot method and type
 
         # skill scores and error metrics
         with TimeTracking(name="calculate skill scores"):
@@ -136,7 +136,8 @@ class PostProcessing(RunEnvironment):
                 continue
         return xr.concat(competing_predictions, "type") if len(competing_predictions) > 0 else None
 
-    def bootstrap_postprocessing(self, create_new_bootstraps: bool, _iter: int = 0) -> None:
+    def bootstrap_postprocessing(self, create_new_bootstraps: bool, _iter: int = 0, bootstrap_type=None,
+                                 bootstrap_method=None) -> None:
         """
         Calculate skill scores of bootstrapped data.
 
@@ -149,18 +150,28 @@ class PostProcessing(RunEnvironment):
         :param _iter: internal counter to reduce unnecessary recursive calls (maximum number is 2, otherwise something
             went wrong).
         """
-        try:
-            if create_new_bootstraps:
-                self.create_bootstrap_forecast()
-            self.bootstrap_skill_scores = self.calculate_bootstrap_skill_scores()
-        except FileNotFoundError:
-            if _iter != 0:
-                raise RuntimeError("bootstrap_postprocessing is called for the 2nd time. This means, that calling"
-                                   "manually the reason for the failure.")
-            logging.info("Couldn't load all files, restart bootstrap postprocessing with create_new_bootstraps=True.")
-            self.bootstrap_postprocessing(True, _iter=1)
-
-    def create_bootstrap_forecast(self) -> None:
+        self.bootstrap_skill_scores = {}
+        bootstrap_type = ["variable", "singleinput"]  # Todo: make flexible
+        bootstrap_method = ["shuffle", "zero_mean"]  # Todo: make flexible
+        for boot_type in to_list(bootstrap_type):
+            self.bootstrap_skill_scores[boot_type] = {}
+            for boot_method in to_list(bootstrap_method):
+                try:
+                    if create_new_bootstraps:
+                        self.create_bootstrap_forecast(bootstrap_type=boot_type, bootstrap_method=boot_method)
+                    boot_skill_score = self.calculate_bootstrap_skill_scores(bootstrap_type=boot_type,
+                                                                             bootstrap_method=boot_method)
+                    self.bootstrap_skill_scores[boot_type][boot_method] = boot_skill_score
+                except FileNotFoundError:
+                    if _iter != 0:
+                        raise RuntimeError(f"bootstrap_postprocessing ({boot_type}, {boot_type}) was called for the 2nd"
+                                           f" time. This means, that something internally goes wrong. Please check for "
+                                           f"possible errors")
+                    logging.info(f"Could not load all files for bootstrapping ({boot_type}, {boot_type}), restart "
+                                 f"bootstrap postprocessing with create_new_bootstraps=True.")
+                    self.bootstrap_postprocessing(True, _iter=1, bootstrap_type=boot_type, bootstrap_method=boot_method)
+
+    def create_bootstrap_forecast(self, bootstrap_type, bootstrap_method) -> None:
         """
         Create bootstrapped predictions for all stations and variables.
 
@@ -168,16 +179,16 @@ class PostProcessing(RunEnvironment):
         `bootstraps_labels_{station}.nc`.
         """
         # forecast
-        with TimeTracking(name=inspect.stack()[0].function):
+        with TimeTracking(name=f"{inspect.stack()[0].function} ({bootstrap_type}, {bootstrap_method})"):
             # extract all requirements from data store
-            bootstrap_path = self.data_store.get("bootstrap_path")
             forecast_path = self.data_store.get("forecast_path")
             number_of_bootstraps = self.data_store.get("number_of_bootstraps", "postprocessing")
             dims = ["index", self.ahead_dim, "type"]
             for station in self.test_data:
-                logging.info(str(station))
+                # logging.info(str(station))
                 X, Y = None, None
-                bootstraps = BootStraps(station, number_of_bootstraps)
+                bootstraps = BootStraps(station, number_of_bootstraps, bootstrap_type=bootstrap_type,
+                                        bootstrap_method=bootstrap_method)
                 for boot in bootstraps:
                     X, Y, (index, dimension) = boot
                     # make bootstrap predictions
@@ -188,18 +199,19 @@ class PostProcessing(RunEnvironment):
                     bootstrap_predictions = np.expand_dims(bootstrap_predictions, axis=-1)
                     shape = bootstrap_predictions.shape
                     coords = (range(shape[0]), range(1, shape[1] + 1))
-                    var = f"{index}_{dimension}"
+                    var = f"{index}_{dimension}" if index is not None else str(dimension)
                     tmp = xr.DataArray(bootstrap_predictions, coords=(*coords, [var]), dims=dims)
-                    file_name = os.path.join(forecast_path, f"bootstraps_{station}_{var}.nc")
+                    file_name = os.path.join(forecast_path,
+                                             f"bootstraps_{station}_{var}_{bootstrap_type}_{bootstrap_method}.nc")
                     tmp.to_netcdf(file_name)
                 else:
                     # store also true labels for each station
                     labels = np.expand_dims(Y, axis=-1)
-                    file_name = os.path.join(forecast_path, f"bootstraps_{station}_labels.nc")
+                    file_name = os.path.join(forecast_path, f"bootstraps_{station}_{bootstrap_method}_labels.nc")
                     labels = xr.DataArray(labels, coords=(*coords, ["obs"]), dims=dims)
                     labels.to_netcdf(file_name)
 
-    def calculate_bootstrap_skill_scores(self) -> Dict[str, xr.DataArray]:
+    def calculate_bootstrap_skill_scores(self, bootstrap_type, bootstrap_method) -> Dict[str, xr.DataArray]:
         """
         Calculate skill score of bootstrapped variables.
 
@@ -209,53 +221,67 @@ class PostProcessing(RunEnvironment):
 
         :return: The result dictionary with station-wise skill scores
         """
-        with TimeTracking(name=inspect.stack()[0].function):
+        with TimeTracking(name=f"{inspect.stack()[0].function} ({bootstrap_type}, {bootstrap_method})"):
             # extract all requirements from data store
-            bootstrap_path = self.data_store.get("bootstrap_path")
             forecast_path = self.data_store.get("forecast_path")
             number_of_bootstraps = self.data_store.get("number_of_bootstraps", "postprocessing")
             forecast_file = f"forecasts_norm_%s_test.nc"
-            bootstraps = BootStraps(self.test_data[0], number_of_bootstraps).bootstraps()
+
+            bootstraps = BootStraps(self.test_data[0], number_of_bootstraps, bootstrap_type=bootstrap_type,
+                                    bootstrap_method=bootstrap_method)
+            number_of_bootstraps = bootstraps.number_of_bootstraps
+            bootstrap_iter = bootstraps.bootstraps()
             skill_scores = statistics.SkillScores(None)
             score = {}
             for station in self.test_data:
-                logging.info(station)
-
                 # get station labels
-                file_name = os.path.join(forecast_path, f"bootstraps_{str(station)}_labels.nc")
-                labels = xr.open_dataarray(file_name)
+                file_name = os.path.join(forecast_path, f"bootstraps_{str(station)}_{bootstrap_method}_labels.nc")
+                with xr.open_dataarray(file_name) as da:
+                    labels = da.load()
                 shape = labels.shape
 
                 # get original forecasts
                 orig = self.get_orig_prediction(forecast_path, forecast_file % str(station), number_of_bootstraps)
                 orig = orig.reshape(shape)
                 coords = (range(shape[0]), range(1, shape[1] + 1), ["orig"])
-                orig = xr.DataArray(orig, coords=coords, dims=["index", "ahead", "type"])
+                orig = xr.DataArray(orig, coords=coords, dims=["index", self.ahead_dim, "type"])
 
                 # calculate skill scores for each variable
                 skill = pd.DataFrame(columns=range(1, self.window_lead_time + 1))
-                for boot_set in bootstraps:
-                    boot_var = f"{boot_set[0]}_{boot_set[1]}"
-                    file_name = os.path.join(forecast_path, f"bootstraps_{station}_{boot_var}.nc")
-                    boot_data = xr.open_dataarray(file_name)
+                for boot_set in bootstrap_iter:
+                    boot_var = boot_set if isinstance(boot_set, str) else f"{boot_set[0]}_{boot_set[1]}"
+                    file_name = os.path.join(forecast_path,
+                                             f"bootstraps_{station}_{boot_var}_{bootstrap_type}_{bootstrap_method}.nc")
+                    # boot_data = xr.open_dataarray(file_name)
+                    with xr.open_dataarray(file_name) as da:
+                        boot_data = da.load()
                     boot_data = boot_data.combine_first(labels).combine_first(orig)
                     boot_scores = []
                     for ahead in range(1, self.window_lead_time + 1):
-                        data = boot_data.sel(ahead=ahead)
+                        data = boot_data.sel({self.ahead_dim: ahead})
                         boot_scores.append(
                             skill_scores.general_skill_score(data, forecast_name=boot_var, reference_name="orig"))
                     skill.loc[boot_var] = np.array(boot_scores)
 
                 # collect all results in single dictionary
-                score[str(station)] = xr.DataArray(skill, dims=["boot_var", "ahead"])
+                score[str(station)] = xr.DataArray(skill, dims=["boot_var", self.ahead_dim])
             return score
 
     def get_orig_prediction(self, path, file_name, number_of_bootstraps, prediction_name=None):
         if prediction_name is None:
             prediction_name = self.forecast_indicator
         file = os.path.join(path, file_name)
-        prediction = xr.open_dataarray(file).sel(type=prediction_name).squeeze()
-        vals = np.tile(prediction.data, (number_of_bootstraps, 1))
+        # prediction = xr.open_dataarray(file).sel(type=prediction_name).squeeze()
+        with xr.open_dataarray(file) as da:
+            prediction = da.load().sel(type=prediction_name).squeeze()
+        return self.repeat_data(prediction, number_of_bootstraps)
+        # vals = np.tile(prediction.data, (number_of_bootstraps, 1))
+        # return vals[~np.isnan(vals).any(axis=1), :]
+
+    def repeat_data(self, data, number_of_repetition):
+        if isinstance(data, xr.DataArray):
+            data = data.data
+        vals = np.tile(data, (number_of_repetition, 1))
         return vals[~np.isnan(vals).any(axis=1), :]
 
     def _get_model_name(self):
@@ -317,9 +343,15 @@ class PostProcessing(RunEnvironment):
 
         try:
             if (self.bootstrap_skill_scores is not None) and ("PlotBootstrapSkillScore" in plot_list):
-                PlotBootstrapSkillScore(self.bootstrap_skill_scores, plot_folder=self.plot_path,
-                                        model_setup=self.forecast_indicator, sampling=self._sampling,
-                                        ahead_dim=self.ahead_dim, separate_vars=to_list(self.target_var))
+                for boot_type, boot_data in self.bootstrap_skill_scores.items():
+                    for boot_method, boot_skill_score in boot_data.items():
+                        PlotBootstrapSkillScore(boot_skill_score, plot_folder=self.plot_path,
+                                                model_setup=self.forecast_indicator, sampling=self._sampling,
+                                                ahead_dim=self.ahead_dim, separate_vars=to_list(self.target_var),
+                                                bootstrap_type=boot_type, bootstrap_method=boot_method)
+                # PlotBootstrapSkillScore(self.bootstrap_skill_scores, plot_folder=self.plot_path,
+                #                         model_setup=self.forecast_indicator, sampling=self._sampling,
+                #                         ahead_dim=self.ahead_dim, separate_vars=to_list(self.target_var))
         except Exception as e:
             logging.error(f"Could not create plot PlotBootstrapSkillScore due to the following error: {e}")
 
@@ -496,8 +528,9 @@ class PostProcessing(RunEnvironment):
         """
         path = os.path.join(self.competitor_path, competitor_name)
         file = os.path.join(path, f"forecasts_{station_name}_test.nc")
-        data = xr.open_dataarray(file)
-        # data = data.expand_dims(Stations=[station_name])  # ToDo: remove line
+        with xr.open_dataarray(file) as da:
+            data = da.load()
+        # data = xr.open_dataarray(file)
         forecast = data.sel(type=[self.forecast_indicator])
         forecast.coords["type"] = [competitor_name]
         return forecast
@@ -653,7 +686,9 @@ class PostProcessing(RunEnvironment):
         """
         try:
             file = os.path.join(path, f"forecasts_{str(station)}_train_val.nc")
-            return xr.open_dataarray(file)
+            with xr.open_dataarray(file) as da:
+                return da.load()
+            # return xr.open_dataarray(file)
         except (IndexError, KeyError, FileNotFoundError):
             return None
 
@@ -668,7 +703,9 @@ class PostProcessing(RunEnvironment):
         """
         try:
             file = os.path.join(path, f"forecasts_{str(station)}_test.nc")
-            return xr.open_dataarray(file)
+            with xr.open_dataarray(file) as da:
+                return da.load()
+            # return xr.open_dataarray(file)
         except (IndexError, KeyError, FileNotFoundError):
             return None
 
diff --git a/test/test_data_handler/old_t_bootstraps.py b/test/test_data_handler/old_t_bootstraps.py
index 9616ed3f..21c18c6c 100644
--- a/test/test_data_handler/old_t_bootstraps.py
+++ b/test/test_data_handler/old_t_bootstraps.py
@@ -160,7 +160,7 @@ class TestCreateShuffledData:
 
     def test_shuffle(self, shuffled_data_no_creation):
         dummy = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]])
-        res = shuffled_data_no_creation.shuffle(dummy, chunks=(2, 3)).compute()
+        res = shuffled_data_no_creation.apply_bootstrap_method(dummy, chunks=(2, 3)).compute()
         assert res.shape == dummy.shape
         assert dummy.max() >= res.max()
         assert dummy.min() <= res.min()
-- 
GitLab


From c7f4a9203a809a0ee78ddd7288380c51ed556a87 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 16 Jul 2021 14:50:19 +0200
Subject: [PATCH 164/175] cleanup, remove commented code

---
 mlair/plotting/postprocessing_plotting.py |  3 ---
 mlair/run_modules/post_processing.py      | 14 ++------------
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index e5080f6e..eef9208a 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -741,7 +741,6 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         data = self._data
         self.raise_error_if_separate_vars_do_not_exist(data, separate_vars, self._x_name)
         all_variables = self._get_unique_values_from_column_of_df(data, self._x_name)
-        # remaining_vars = helpers.list_pop(all_variables, separate_vars) #remove_items
         remaining_vars = helpers.remove_items(all_variables, separate_vars)
         data_first = self._select_data(df=data, variables=separate_vars, column_name=self._x_name)
         data_second = self._select_data(df=data, variables=remaining_vars, column_name=self._x_name)
@@ -945,8 +944,6 @@ class PlotTimeSeries:
     def _plot_obs(self, ax, data):
         ahead = 1
         obs_data = data.sel(type="obs", ahead=ahead).shift(index=ahead)
-        # index = data.index + np.timedelta64(1, self._sampling)
-        # ax.plot(index, obs_data.values, color=matplotlib.colors.cnames["green"], label="obs")
         ax.plot(obs_data, color=matplotlib.colors.cnames["green"], label="obs")
 
     @staticmethod
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 0c530400..c4ce0088 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -185,7 +185,6 @@ class PostProcessing(RunEnvironment):
             number_of_bootstraps = self.data_store.get("number_of_bootstraps", "postprocessing")
             dims = ["index", self.ahead_dim, "type"]
             for station in self.test_data:
-                # logging.info(str(station))
                 X, Y = None, None
                 bootstraps = BootStraps(station, number_of_bootstraps, bootstrap_type=bootstrap_type,
                                         bootstrap_method=bootstrap_method)
@@ -252,7 +251,6 @@ class PostProcessing(RunEnvironment):
                     boot_var = boot_set if isinstance(boot_set, str) else f"{boot_set[0]}_{boot_set[1]}"
                     file_name = os.path.join(forecast_path,
                                              f"bootstraps_{station}_{boot_var}_{bootstrap_type}_{bootstrap_method}.nc")
-                    # boot_data = xr.open_dataarray(file_name)
                     with xr.open_dataarray(file_name) as da:
                         boot_data = da.load()
                     boot_data = boot_data.combine_first(labels).combine_first(orig)
@@ -271,14 +269,12 @@ class PostProcessing(RunEnvironment):
         if prediction_name is None:
             prediction_name = self.forecast_indicator
         file = os.path.join(path, file_name)
-        # prediction = xr.open_dataarray(file).sel(type=prediction_name).squeeze()
         with xr.open_dataarray(file) as da:
             prediction = da.load().sel(type=prediction_name).squeeze()
         return self.repeat_data(prediction, number_of_bootstraps)
-        # vals = np.tile(prediction.data, (number_of_bootstraps, 1))
-        # return vals[~np.isnan(vals).any(axis=1), :]
 
-    def repeat_data(self, data, number_of_repetition):
+    @staticmethod
+    def repeat_data(data, number_of_repetition):
         if isinstance(data, xr.DataArray):
             data = data.data
         vals = np.tile(data, (number_of_repetition, 1))
@@ -349,9 +345,6 @@ class PostProcessing(RunEnvironment):
                                                 model_setup=self.forecast_indicator, sampling=self._sampling,
                                                 ahead_dim=self.ahead_dim, separate_vars=to_list(self.target_var),
                                                 bootstrap_type=boot_type, bootstrap_method=boot_method)
-                # PlotBootstrapSkillScore(self.bootstrap_skill_scores, plot_folder=self.plot_path,
-                #                         model_setup=self.forecast_indicator, sampling=self._sampling,
-                #                         ahead_dim=self.ahead_dim, separate_vars=to_list(self.target_var))
         except Exception as e:
             logging.error(f"Could not create plot PlotBootstrapSkillScore due to the following error: {e}")
 
@@ -530,7 +523,6 @@ class PostProcessing(RunEnvironment):
         file = os.path.join(path, f"forecasts_{station_name}_test.nc")
         with xr.open_dataarray(file) as da:
             data = da.load()
-        # data = xr.open_dataarray(file)
         forecast = data.sel(type=[self.forecast_indicator])
         forecast.coords["type"] = [competitor_name]
         return forecast
@@ -688,7 +680,6 @@ class PostProcessing(RunEnvironment):
             file = os.path.join(path, f"forecasts_{str(station)}_train_val.nc")
             with xr.open_dataarray(file) as da:
                 return da.load()
-            # return xr.open_dataarray(file)
         except (IndexError, KeyError, FileNotFoundError):
             return None
 
@@ -705,7 +696,6 @@ class PostProcessing(RunEnvironment):
             file = os.path.join(path, f"forecasts_{str(station)}_test.nc")
             with xr.open_dataarray(file) as da:
                 return da.load()
-            # return xr.open_dataarray(file)
         except (IndexError, KeyError, FileNotFoundError):
             return None
 
-- 
GitLab


From 271c10d069338d7175f393a51a74274d326fcb3a Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 16 Jul 2021 14:57:15 +0200
Subject: [PATCH 165/175] bootstrap method and type can be set during
 experiment setup

---
 mlair/configuration/defaults.py       | 2 ++
 mlair/run_modules/experiment_setup.py | 9 ++++++---
 mlair/run_modules/post_processing.py  | 9 ++++++---
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py
index 088a504a..d61146b6 100644
--- a/mlair/configuration/defaults.py
+++ b/mlair/configuration/defaults.py
@@ -46,6 +46,8 @@ DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS = True
 DEFAULT_EVALUATE_BOOTSTRAPS = True
 DEFAULT_CREATE_NEW_BOOTSTRAPS = False
 DEFAULT_NUMBER_OF_BOOTSTRAPS = 20
+DEFAULT_BOOTSTRAP_TYPE = "singleinput"
+DEFAULT_BOOTSTRAP_METHOD = "shuffle"
 DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
                      "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
                      "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram", "PlotPeriodogram"]
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index bd06914f..8036413c 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -19,7 +19,8 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
     DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
     DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
     DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \
-    DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG, DEFAULT_MAX_NUMBER_MULTIPROCESSING
+    DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG, DEFAULT_MAX_NUMBER_MULTIPROCESSING, \
+    DEFAULT_BOOTSTRAP_TYPE, DEFAULT_BOOTSTRAP_METHOD
 from mlair.data_handler import DefaultDataHandler
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel
@@ -211,8 +212,8 @@ class ExperimentSetup(RunEnvironment):
                  create_new_model=None, bootstrap_path=None, permute_data_on_training=None, transformation=None,
                  train_min_length=None, val_min_length=None, test_min_length=None, extreme_values: list = None,
                  extremes_on_right_tail_only: bool = None, evaluate_bootstraps=None, plot_list=None,
-                 number_of_bootstraps=None,
-                 create_new_bootstraps=None, data_path: str = None, batch_path: str = None, login_nodes=None,
+                 number_of_bootstraps=None, create_new_bootstraps=None, bootstrap_method=None, bootstrap_type=None,
+                 data_path: str = None, batch_path: str = None, login_nodes=None,
                  hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None,
                  data_origin: Dict = None, competitors: list = None, competitor_path: str = None,
                  use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None,
@@ -347,6 +348,8 @@ class ExperimentSetup(RunEnvironment):
         self._set_param("create_new_bootstraps", create_new_bootstraps, scope="general.postprocessing")
         self._set_param("number_of_bootstraps", number_of_bootstraps, default=DEFAULT_NUMBER_OF_BOOTSTRAPS,
                         scope="general.postprocessing")
+        self._set_param("bootstrap_method", bootstrap_method, default=DEFAULT_BOOTSTRAP_METHOD)
+        self._set_param("bootstrap_type", bootstrap_type, default=DEFAULT_BOOTSTRAP_TYPE)
         self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing")
         self._set_param("neighbors", ["DEBW030"])  # TODO: just for testing
 
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index c4ce0088..57b4d6ef 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -103,7 +103,10 @@ class PostProcessing(RunEnvironment):
         if self.data_store.get("evaluate_bootstraps", "postprocessing"):
             with TimeTracking(name="calculate bootstraps"):
                 create_new_bootstraps = self.data_store.get("create_new_bootstraps", "postprocessing")
-                self.bootstrap_postprocessing(create_new_bootstraps)  # todo: make flexible and add boot method and type
+                bootstrap_method = self.data_store.get("bootstrap_method", "postprocessing")
+                bootstrap_type = self.data_store.get("bootstrap_type", "postprocessing")
+                self.bootstrap_postprocessing(create_new_bootstraps, bootstrap_type=bootstrap_type,
+                                              bootstrap_method=bootstrap_method)
 
         # skill scores and error metrics
         with TimeTracking(name="calculate skill scores"):
@@ -136,8 +139,8 @@ class PostProcessing(RunEnvironment):
                 continue
         return xr.concat(competing_predictions, "type") if len(competing_predictions) > 0 else None
 
-    def bootstrap_postprocessing(self, create_new_bootstraps: bool, _iter: int = 0, bootstrap_type=None,
-                                 bootstrap_method=None) -> None:
+    def bootstrap_postprocessing(self, create_new_bootstraps: bool, _iter: int = 0, bootstrap_type="singleinput",
+                                 bootstrap_method="shuffle") -> None:
         """
         Calculate skill scores of bootstrapped data.
 
-- 
GitLab


From 83888fdc6431512f741d51b41d0eb1a23d8b01c2 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Fri, 16 Jul 2021 16:04:05 +0200
Subject: [PATCH 166/175] added branch bootstrap type

---
 mlair/data_handler/bootstraps.py     | 23 +++++++++++++++++++++--
 mlair/run_modules/post_processing.py |  4 +---
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/mlair/data_handler/bootstraps.py b/mlair/data_handler/bootstraps.py
index 0ae88599..e0388148 100644
--- a/mlair/data_handler/bootstraps.py
+++ b/mlair/data_handler/bootstraps.py
@@ -144,8 +144,27 @@ class BootstrapIteratorBranch(BootstrapIterator):
         super().__init__(*args)
 
     def __next__(self):
-        pass
-    # TODO: implement here: permute entire branch at once
+        try:
+            index = self._collection[self._position]
+            nboot = self._data.number_of_bootstraps
+            _X, _Y = self._data.data.get_data(as_numpy=False)
+            _X = list(map(lambda x: x.expand_dims({self.boot_dim: range(nboot)}, axis=-1), _X))
+            _Y = _Y.expand_dims({self.boot_dim: range(nboot)}, axis=-1)
+            for dimension in _X[index].coords[self._dimension].values:
+                single_variable = _X[index].sel({self._dimension: [dimension]})
+                bootstrapped_variable = self.apply_bootstrap_method(single_variable.values)
+                bootstrapped_data = xr.DataArray(bootstrapped_variable, coords=single_variable.coords,
+                                                 dims=single_variable.dims)
+                _X[index] = bootstrapped_data.combine_first(_X[index]).transpose(*_X[index].dims)
+            self._position += 1
+        except IndexError:
+            raise StopIteration()
+        _X, _Y = self._to_numpy(_X), self._to_numpy(_Y)
+        return self._reshape(_X), self._reshape(_Y), (None, index)
+
+    @classmethod
+    def create_collection(cls, data, dim):
+        return list(range(len(data.get_X(as_numpy=False))))
 
 
 class ShuffleBootstraps:
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 57b4d6ef..df8a7d5e 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -154,8 +154,6 @@ class PostProcessing(RunEnvironment):
             went wrong).
         """
         self.bootstrap_skill_scores = {}
-        bootstrap_type = ["variable", "singleinput"]  # Todo: make flexible
-        bootstrap_method = ["shuffle", "zero_mean"]  # Todo: make flexible
         for boot_type in to_list(bootstrap_type):
             self.bootstrap_skill_scores[boot_type] = {}
             for boot_method in to_list(bootstrap_method):
@@ -251,7 +249,7 @@ class PostProcessing(RunEnvironment):
                 # calculate skill scores for each variable
                 skill = pd.DataFrame(columns=range(1, self.window_lead_time + 1))
                 for boot_set in bootstrap_iter:
-                    boot_var = boot_set if isinstance(boot_set, str) else f"{boot_set[0]}_{boot_set[1]}"
+                    boot_var = f"{boot_set[0]}_{boot_set[1]}" if isinstance(boot_set, tuple) else str(boot_set)
                     file_name = os.path.join(forecast_path,
                                              f"bootstraps_{station}_{boot_var}_{bootstrap_type}_{bootstrap_method}.nc")
                     with xr.open_dataarray(file_name) as da:
-- 
GitLab


From 08c206f547be6d3fbcb2eaf77d660e306576f8e5 Mon Sep 17 00:00:00 2001
From: Falco Weichselbaum <f.weichselbaum@fz-juelich.de>
Date: Tue, 20 Jul 2021 12:39:24 +0200
Subject: [PATCH 167/175] ahead_dim=ahead now as kwarg to skill_score() and
 climatological_skill_score()

---
 mlair/helpers/statistics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index 15cef734..2cc52ed6 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -288,7 +288,7 @@ class SkillScores:
         combination_strings = [f"{first}-{second}" for (first, second) in combinations]
         return combinations, combination_strings
 
-    def skill_scores(self, window_lead_time: int) -> pd.DataFrame:
+    def skill_scores(self, window_lead_time: int, ahead_dim="ahead") -> pd.DataFrame:
         """
         Calculate skill scores for all combinations of model names.
 
@@ -296,7 +296,7 @@ class SkillScores:
 
         :return: skill score for each comparison and forecast step
         """
-        ahead_names = list(self.external_data.ahead.data)
+        ahead_names = list(self.external_data[ahead_dim].data)
         combinations, combination_strings = self.get_model_name_combinations()
         skill_score = pd.DataFrame(index=combination_strings)
         for iahead in ahead_names:
@@ -309,7 +309,7 @@ class SkillScores:
         return skill_score
 
     def climatological_skill_scores(self, internal_data: Data, window_lead_time: int,
-                                    forecast_name: str) -> xr.DataArray:
+                                    forecast_name: str, ahead_dim="ahead") -> xr.DataArray:
         """
         Calculate climatological skill scores according to Murphy (1988).
 
@@ -322,7 +322,7 @@ class SkillScores:
 
         :return: all CASES as well as all terms
         """
-        ahead_names = list(self.external_data.ahead.data)
+        ahead_names = list(self.external_data[ahead_dim].data)
 
         all_terms = ['AI', 'AII', 'AIII', 'AIV', 'BI', 'BII', 'BIV', 'CI', 'CIV', 'CASE I', 'CASE II', 'CASE III',
                      'CASE IV']
-- 
GitLab


From e8b75730b06ccda3257a45f7563f97bb9a71c7d0 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 20 Jul 2021 16:32:59 +0200
Subject: [PATCH 168/175] changed filter logging, added try statement on
 plotting

---
 mlair/helpers/filter.py                         |  3 ++-
 mlair/helpers/time_tracking.py                  |  5 +++--
 mlair/model_modules/fully_connected_networks.py |  6 +++---
 mlair/plotting/postprocessing_plotting.py       |  7 ++++---
 mlair/run_modules/post_processing.py            | 12 ++++++++----
 5 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index a551bec4..a63cef97 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -408,7 +408,8 @@ class ClimateFIRFilter:
                     continue
 
                 logging.debug(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
-                with TimeTracking(name=f"{data.coords['Stations'].values[0]} ({var}): filter convolve"):
+                with TimeTracking(name=f"{data.coords['Stations'].values[0]} ({var}): filter convolve",
+                                  logging_level=logging.DEBUG):
                     filt = xr.apply_ufunc(fir_filter_convolve, filter_input_data,
                                           input_core_dims=[[new_dim]],
                                           output_core_dims=[[new_dim]],
diff --git a/mlair/helpers/time_tracking.py b/mlair/helpers/time_tracking.py
index c85a6a04..3105ebcd 100644
--- a/mlair/helpers/time_tracking.py
+++ b/mlair/helpers/time_tracking.py
@@ -68,11 +68,12 @@ class TimeTracking(object):
     The only disadvantage of the latter implementation is, that the duration is logged but not returned.
     """
 
-    def __init__(self, start=True, name="undefined job"):
+    def __init__(self, start=True, name="undefined job", logging_level=logging.INFO):
         """Construct time tracking and start if enabled."""
         self.start = None
         self.end = None
         self._name = name
+        self._logging = {logging.INFO: logging.info, logging.DEBUG: logging.debug}.get(logging_level, logging.info)
         if start:
             self._start()
 
@@ -128,4 +129,4 @@ class TimeTracking(object):
     def __exit__(self, exc_type, exc_val, exc_tb) -> None:
         """Stop time tracking on exit and log info about passed time."""
         self.stop()
-        logging.info(f"{self._name} finished after {self}")
\ No newline at end of file
+        self._logging(f"{self._name} finished after {self}")
diff --git a/mlair/model_modules/fully_connected_networks.py b/mlair/model_modules/fully_connected_networks.py
index 21455383..03380333 100644
--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -374,7 +374,7 @@ class BranchedInputFCN(AbstractModelClass):
         print(self.model.summary())
 
     def set_compile_options(self):
-        # self.compile_options = {"loss": [keras.losses.mean_squared_error],
-        #                         "metrics": ["mse", "mae", var_loss]}
-        self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss], loss_weights=[2, 1])],
+        self.compile_options = {"loss": [keras.losses.mean_squared_error],
                                 "metrics": ["mse", "mae", var_loss]}
+        # self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss], loss_weights=[2, 1])],
+        #                         "metrics": ["mse", "mae", var_loss]}
diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index eef9208a..fe658b09 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -644,9 +644,10 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         else:
             self._plot()
             self._save()
-            self.plot_name += '_separated'
-            self._plot(separate_vars=separate_vars)
-            self._save(bbox_inches='tight')
+            if len(set(separate_vars).intersection(self._data[self._x_name].unique())) > 0:
+                self.plot_name += '_separated'
+                self._plot(separate_vars=separate_vars)
+                self._save(bbox_inches='tight')
 
     @staticmethod
     def _set_bootstrap_type(boot_type):
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index df8a7d5e..539a15ab 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -342,10 +342,14 @@ class PostProcessing(RunEnvironment):
             if (self.bootstrap_skill_scores is not None) and ("PlotBootstrapSkillScore" in plot_list):
                 for boot_type, boot_data in self.bootstrap_skill_scores.items():
                     for boot_method, boot_skill_score in boot_data.items():
-                        PlotBootstrapSkillScore(boot_skill_score, plot_folder=self.plot_path,
-                                                model_setup=self.forecast_indicator, sampling=self._sampling,
-                                                ahead_dim=self.ahead_dim, separate_vars=to_list(self.target_var),
-                                                bootstrap_type=boot_type, bootstrap_method=boot_method)
+                        try:
+                            PlotBootstrapSkillScore(boot_skill_score, plot_folder=self.plot_path,
+                                                    model_setup=self.forecast_indicator, sampling=self._sampling,
+                                                    ahead_dim=self.ahead_dim, separate_vars=to_list(self.target_var),
+                                                    bootstrap_type=boot_type, bootstrap_method=boot_method)
+                        except Exception as e:
+                            logging.error(f"Could not create plot PlotBootstrapSkillScore ({boot_type}, {boot_method}) "
+                                          f"due to the following error: {e}")
         except Exception as e:
             logging.error(f"Could not create plot PlotBootstrapSkillScore due to the following error: {e}")
 
-- 
GitLab


From 9e9e74ce0bfa30769cf29bb375cbd112474a30e2 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 21 Jul 2021 13:49:36 +0200
Subject: [PATCH 169/175] adjust minmax scaling for filtered data to
 standardise

---
 mlair/data_handler/data_handler_with_filter.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 80253b0a..e76f396a 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -61,6 +61,8 @@ class DataHandlerFilterSingleStation(DataHandlerSingleStation):
             for k, v in transformation[0].items():
                 if v["method"] == "log":
                     transformation[0][k]["method"] = "standardise"
+                elif v["method"] == "min_max":
+                    transformation[0][k]["method"] = "standardise"
         return transformation
 
     def _check_sampling(self, **kwargs):
-- 
GitLab


From a843aaeba3d4f54d5c186e644e9a006b55379a47 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 21 Jul 2021 16:18:54 +0200
Subject: [PATCH 170/175] extended development from #313 to be more flexible

---
 mlair/helpers/statistics.py          | 37 +++++++++++++---------------
 mlair/run_modules/post_processing.py |  8 +++---
 2 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index d5499294..a1e713a8 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -257,11 +257,12 @@ class SkillScores:
     """
     models_default = ["cnn", "persi", "ols"]
 
-    def __init__(self, external_data: Data, models=None, observation_name="obs"):
+    def __init__(self, external_data: Union[Data, None], models=None, observation_name="obs", ahead_dim="ahead"):
         """Set internal data."""
         self.external_data = external_data
         self.models = self.set_model_names(models)
         self.observation_name = observation_name
+        self.ahead_dim = ahead_dim
 
     def set_model_names(self, models: List[str]) -> List[str]:
         """Either use given models or use defaults."""
@@ -283,19 +284,17 @@ class SkillScores:
         combination_strings = [f"{first}-{second}" for (first, second) in combinations]
         return combinations, combination_strings
 
-    def skill_scores(self, window_lead_time: int, ahead_dim="ahead") -> pd.DataFrame:
+    def skill_scores(self) -> pd.DataFrame:
         """
         Calculate skill scores for all combinations of model names.
 
-        :param window_lead_time: length of forecast steps
-
         :return: skill score for each comparison and forecast step
         """
-        ahead_names = list(self.external_data[ahead_dim].data)
+        ahead_names = list(self.external_data[self.ahead_dim].data)
         combinations, combination_strings = self.get_model_name_combinations()
         skill_score = pd.DataFrame(index=combination_strings)
         for iahead in ahead_names:
-            data = self.external_data.sel(ahead=iahead)
+            data = self.external_data.sel({self.ahead_dim: iahead})
             skill_score[iahead] = [self.general_skill_score(data,
                                                             forecast_name=first,
                                                             reference_name=second,
@@ -303,8 +302,7 @@ class SkillScores:
                                    for (first, second) in combinations]
         return skill_score
 
-    def climatological_skill_scores(self, internal_data: Data, window_lead_time: int,
-                                    forecast_name: str, ahead_dim="ahead") -> xr.DataArray:
+    def climatological_skill_scores(self, internal_data: Data, forecast_name: str) -> xr.DataArray:
         """
         Calculate climatological skill scores according to Murphy (1988).
 
@@ -312,20 +310,19 @@ class SkillScores:
         is part of parameters.
 
         :param internal_data: internal data
-        :param window_lead_time: interested time step of forecast horizon to select data
         :param forecast_name: name of the forecast to use for this calculation (must be available in `data`)
 
         :return: all CASES as well as all terms
         """
-        ahead_names = list(self.external_data[ahead_dim].data)
+        ahead_names = list(self.external_data[self.ahead_dim].data)
 
         all_terms = ['AI', 'AII', 'AIII', 'AIV', 'BI', 'BII', 'BIV', 'CI', 'CIV', 'CASE I', 'CASE II', 'CASE III',
                      'CASE IV']
         skill_score = xr.DataArray(np.full((len(all_terms), len(ahead_names)), np.nan), coords=[all_terms, ahead_names],
-                                   dims=['terms', 'ahead'])
+                                   dims=['terms', self.ahead_dim])
 
         for iahead in ahead_names:
-            data = internal_data.sel(ahead=iahead)
+            data = internal_data.sel({self.ahead_dim: iahead})
 
             skill_score.loc[["CASE I", "AI", "BI", "CI"], iahead] = np.stack(self._climatological_skill_score(
                 data, mu_type=1, forecast_name=forecast_name, observation_name=self.observation_name).values.flatten())
@@ -334,7 +331,7 @@ class SkillScores:
                 data, mu_type=2, forecast_name=forecast_name, observation_name=self.observation_name).values.flatten())
 
             if self.external_data is not None and self.observation_name in self.external_data.coords["type"]:
-                external_data = self.external_data.sel(ahead=iahead, type=[self.observation_name])
+                external_data = self.external_data.sel({self.ahead_dim: iahead, "type": [self.observation_name]})
                 skill_score.loc[["CASE III", "AIII"], iahead] = np.stack(self._climatological_skill_score(
                     data, mu_type=3, forecast_name=forecast_name, observation_name=self.observation_name,
                     external_data=external_data).values.flatten())
@@ -373,12 +370,12 @@ class SkillScores:
         skill_score = 1 - mse(observation, forecast) / mse(observation, reference)
         return skill_score.values
 
-    @staticmethod
-    def skill_score_pre_calculations(data: Data, observation_name: str, forecast_name: str) -> Tuple[np.ndarray,
-                                                                                                     np.ndarray,
-                                                                                                     np.ndarray,
-                                                                                                     Data,
-                                                                                                     Dict[str, Data]]:
+    def skill_score_pre_calculations(self, data: Data, observation_name: str, forecast_name: str) -> Tuple[np.ndarray,
+                                                                                                           np.ndarray,
+                                                                                                           np.ndarray,
+                                                                                                           Data,
+                                                                                                           Dict[
+                                                                                                               str, Data]]:
         """
         Calculate terms AI, BI, and CI, mean, variance and pearson's correlation and clean up data.
 
@@ -391,7 +388,7 @@ class SkillScores:
 
         :returns: Terms AI, BI, and CI, internal data without nans and mean, variance, correlation and its p-value
         """
-        data = data.sel(type=[observation_name, forecast_name]).drop("ahead")
+        data = data.sel(type=[observation_name, forecast_name]).drop(self.ahead_dim)
         data = data.dropna("index")
 
         mean = data.mean("index")
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 539a15ab..2c6a3539 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -231,7 +231,7 @@ class PostProcessing(RunEnvironment):
                                     bootstrap_method=bootstrap_method)
             number_of_bootstraps = bootstraps.number_of_bootstraps
             bootstrap_iter = bootstraps.bootstraps()
-            skill_scores = statistics.SkillScores(None)
+            skill_scores = statistics.SkillScores(None, ahead_dim=self.ahead_dim)
             score = {}
             for station in self.test_data:
                 # get station labels
@@ -742,14 +742,14 @@ class PostProcessing(RunEnvironment):
             competitor = self.load_competitors(station)
             combined = self._combine_forecasts(external_data, competitor, dim="type")
             model_list = remove_items(list(combined.type.values), "obs") if combined is not None else None
-            skill_score = statistics.SkillScores(combined, models=model_list)
+            skill_score = statistics.SkillScores(combined, models=model_list, ahead_dim=self.ahead_dim)
             if external_data is not None:
-                skill_score_competitive[station] = skill_score.skill_scores(self.window_lead_time)
+                skill_score_competitive[station] = skill_score.skill_scores()
 
             internal_data = self._get_internal_data(station, path)
             if internal_data is not None:
                 skill_score_climatological[station] = skill_score.climatological_skill_scores(
-                    internal_data, self.window_lead_time, forecast_name=self.forecast_indicator)
+                    internal_data, forecast_name=self.forecast_indicator)
 
         errors.update({"total": self.calculate_average_errors(errors)})
         return skill_score_competitive, skill_score_climatological, errors
-- 
GitLab


From d9a0aee00359819696bcb68ec1d030e385a8ac17 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 22 Jul 2021 09:54:50 +0200
Subject: [PATCH 171/175] MLAir can store start script if provided via
 start_script=__file__

---
 mlair/run_modules/experiment_setup.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index 8036413c..209859c1 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -6,6 +6,7 @@ import logging
 import os
 import sys
 from typing import Union, Dict, Any, List, Callable
+from dill.source import getsource
 
 from mlair.configuration import path_config
 from mlair import helpers
@@ -217,7 +218,7 @@ class ExperimentSetup(RunEnvironment):
                  hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None,
                  data_origin: Dict = None, competitors: list = None, competitor_path: str = None,
                  use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None,
-                 max_number_multiprocessing: int = None, **kwargs):
+                 max_number_multiprocessing: int = None, start_script: Union[Callable, str] = None, **kwargs):
 
         # create run framework
         super().__init__()
@@ -366,6 +367,10 @@ class ExperimentSetup(RunEnvironment):
         # set model architecture class
         self._set_param("model_class", model, VanillaModel)
 
+        # store starting script if provided
+        if start_script is not None:
+            self._store_start_script(start_script, experiment_path)
+
         # set remaining kwargs
         if len(kwargs) > 0:
             for k, v in kwargs.items():
@@ -387,6 +392,18 @@ class ExperimentSetup(RunEnvironment):
         logging.debug(f"set experiment attribute: {param}({scope})={value}")
         return value
 
+    @staticmethod
+    def _store_start_script(start_script, store_path):
+        out_file = os.path.join(store_path, "start_script.txt")
+        if isinstance(start_script, Callable):
+            with open(out_file, "w") as fh:
+                fh.write(getsource(start_script))
+        if isinstance(start_script, str):
+            with open(start_script, 'r') as f:
+                with open(out_file, "w") as out:
+                    for line in (f.readlines()):
+                        print(line, end='', file=out)
+
     def _compare_variables_and_statistics(self):
         """
         Compare variables and statistics.
-- 
GitLab


From 61f97525c914da8c743d6cec7cc0614b357e7d02 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 22 Jul 2021 09:55:47 +0200
Subject: [PATCH 172/175] added start script argument to all run scripts

---
 run.py                | 2 +-
 run_HPC.py            | 2 +-
 run_hourly.py         | 2 +-
 run_hourly_kz.py      | 2 +-
 run_mixed_sampling.py | 2 +-
 run_zam347.py         | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/run.py b/run.py
index f2bb336e..eb703e11 100644
--- a/run.py
+++ b/run.py
@@ -28,7 +28,7 @@ def main(parser_args):
         evaluate_bootstraps=False,  # plot_list=["PlotCompetitiveSkillScore"],
         competitors=["test_model", "test_model2"],
         competitor_path=os.path.join(os.getcwd(), "data", "comp_test"),
-        **parser_args.__dict__)
+        **parser_args.__dict__, start_script=__file__)
     workflow.run()
 
 
diff --git a/run_HPC.py b/run_HPC.py
index d6dbb4dc..dfa5045b 100644
--- a/run_HPC.py
+++ b/run_HPC.py
@@ -7,7 +7,7 @@ from mlair.workflows import DefaultWorkflowHPC
 
 def main(parser_args):
 
-    workflow = DefaultWorkflowHPC(**parser_args.__dict__)
+    workflow = DefaultWorkflowHPC(**parser_args.__dict__, start_script=__file__)
     workflow.run()
 
 
diff --git a/run_hourly.py b/run_hourly.py
index 48c72058..869f8ea1 100644
--- a/run_hourly.py
+++ b/run_hourly.py
@@ -22,7 +22,7 @@ def main(parser_args):
                                train_model=False,
                                create_new_model=False,
                                network="UBA",
-                               plot_list=["PlotStationMap"], **parser_args.__dict__)
+                               plot_list=["PlotStationMap"], **parser_args.__dict__, start_script=__file__)
     workflow.run()
 
 
diff --git a/run_hourly_kz.py b/run_hourly_kz.py
index 5536b56e..ba293916 100644
--- a/run_hourly_kz.py
+++ b/run_hourly_kz.py
@@ -19,7 +19,7 @@ def main(parser_args):
                 test_end="2011-12-31",
                 stations=["DEBW107", "DEBW013"]
                 )
-    workflow = DefaultWorkflow(**args)
+    workflow = DefaultWorkflow(**args, start_script=__file__)
     workflow.run()
 
 
diff --git a/run_mixed_sampling.py b/run_mixed_sampling.py
index 6ffb6599..819ef511 100644
--- a/run_mixed_sampling.py
+++ b/run_mixed_sampling.py
@@ -36,7 +36,7 @@ def main(parser_args):
                 test_end="2011-12-31",
                 **parser_args.__dict__,
                 )
-    workflow = DefaultWorkflow(**args)
+    workflow = DefaultWorkflow(**args, start_script=__file__)
     workflow.run()
 
 
diff --git a/run_zam347.py b/run_zam347.py
index 352f0417..49fce3e7 100644
--- a/run_zam347.py
+++ b/run_zam347.py
@@ -31,7 +31,7 @@ def load_stations():
 
 def main(parser_args):
 
-    workflow = DefaultWorkflowHPC(stations=load_stations(), **parser_args.__dict__)
+    workflow = DefaultWorkflowHPC(stations=load_stations(), **parser_args.__dict__, start_script=__file__)
     workflow.run()
 
 
-- 
GitLab


From e75f14ae4d81b9926c957674ffafdc41ca58a286 Mon Sep 17 00:00:00 2001
From: "v.gramlich1" <v.gramlichfz-juelich.de>
Date: Fri, 23 Jul 2021 08:36:13 +0200
Subject: [PATCH 173/175] Start debugging

---
 run.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/run.py b/run.py
index f2bb336e..c9d0e3c9 100644
--- a/run.py
+++ b/run.py
@@ -25,6 +25,7 @@ def main(parser_args):
         # stations=["DEBW087","DEBW013", "DEBW107",  "DEBW076"],
         stations=["DEBW013", "DEBW087", "DEBW107", "DEBW076"],
         train_model=False, create_new_model=True, network="UBA",
+        window_lead_time=1,
         evaluate_bootstraps=False,  # plot_list=["PlotCompetitiveSkillScore"],
         competitors=["test_model", "test_model2"],
         competitor_path=os.path.join(os.getcwd(), "data", "comp_test"),
-- 
GitLab


From 41211579d6a3e774118f43ec437b7b594c8c916b Mon Sep 17 00:00:00 2001
From: "v.gramlich1" <v.gramlichfz-juelich.de>
Date: Fri, 23 Jul 2021 10:57:52 +0200
Subject: [PATCH 174/175] Enabled window_lead_time=1

---
 mlair/plotting/postprocessing_plotting.py | 2 ++
 mlair/run_modules/post_processing.py      | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index 491aa52e..cd898e9e 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -81,6 +81,8 @@ class PlotMonthlySummary(AbstractPlotClass):
             data_nn = data.sel(type=self._model_name).squeeze()
             if len(data_nn.shape) > 1:
                 data_nn = data_nn.assign_coords(ahead=[f"{days}d" for days in data_nn.coords["ahead"].values])
+            else:
+                data_nn.coords["ahead"].values = str(data_nn.coords["ahead"].values) + "d"
 
             data_obs = data.sel(type="obs", ahead=1).squeeze()
             data_obs.coords["ahead"] = "obs"
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 0d7bfeb4..2c31fba9 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -533,7 +533,14 @@ class PostProcessing(RunEnvironment):
         """
         tmp_ols = self.ols_model.predict(input_data)
         target_shape = ols_prediction.values.shape
-        ols_prediction.values = np.swapaxes(tmp_ols, 2, 0) if target_shape != tmp_ols.shape else tmp_ols
+        if target_shape != tmp_ols.shape:
+            if len(target_shape)==2:
+                new_values = np.swapaxes(tmp_ols,1,0)
+            else:
+                new_values = np.swapaxes(tmp_ols, 2, 0)
+        else:
+            new_values = tmp_ols
+        ols_prediction.values = new_values
         if not normalised:
             ols_prediction = transformation_func(ols_prediction, "target", inverse=True)
         return ols_prediction
-- 
GitLab


From 63e222e5beb8870ea938ce37c10098f116daf226 Mon Sep 17 00:00:00 2001
From: "v.gramlich1" <v.gramlichfz-juelich.de>
Date: Fri, 23 Jul 2021 10:59:48 +0200
Subject: [PATCH 175/175] Enabled window_lead_time=1

---
 run.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/run.py b/run.py
index c9d0e3c9..f2bb336e 100644
--- a/run.py
+++ b/run.py
@@ -25,7 +25,6 @@ def main(parser_args):
         # stations=["DEBW087","DEBW013", "DEBW107",  "DEBW076"],
         stations=["DEBW013", "DEBW087", "DEBW107", "DEBW076"],
         train_model=False, create_new_model=True, network="UBA",
-        window_lead_time=1,
         evaluate_bootstraps=False,  # plot_list=["PlotCompetitiveSkillScore"],
         competitors=["test_model", "test_model2"],
         competitor_path=os.path.join(os.getcwd(), "data", "comp_test"),
-- 
GitLab