diff --git a/.gitlab/issue_templates/release.md b/.gitlab/issue_templates/release.md
index a95cf033eed919339c6c1734638542c3e0cdbc57..c8289cdd1be4ab02a61c6feb0db9db7cb6ca40d3 100644
--- a/.gitlab/issue_templates/release.md
+++ b/.gitlab/issue_templates/release.md
@@ -14,6 +14,7 @@ vX.Y.Z
 * [ ] Adjust `changelog.md` (see template for changelog)
 * [ ] Update version number in `mlair/__ init__.py`
 * [ ] Create new dist file: `python3 setup.py sdist bdist_wheel`
+* [ ] Add new dist file `mlair-X.Y.Z-py3-none-any.whl` to git
 * [ ] Update file link `distribution file (current version)` in `README.md`
 * [ ] Update file link in `docs/_source/installation.rst`
 * [ ] Commit + push
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b11a169c854465c5ea932f00f5da5a1688df7c18..34795b8333df846d5383fc2d8eca4b40517aab73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,25 @@
 # Changelog
 All notable changes to this project will be documented in this file.
 
-## v1.4.0 -  2021-07-27  - <release description>
+## v1.5.0 -  2021-11-11  - new uncertainty estimation
+
+### general:
+* introduces method to estimate sample uncertainty
+* improved multiprocessing
+* last release with tensorflow v1 support
+
+### new features:
+* test set sample uncertainty estmation during postprocessing (#333)
+* support of Kolmogorov Zurbenko filter for data handlers with filters (#334)
+
+### technical:
+* new communication scheme for multiprocessing (#321, #322)
+* improved error reporting (#323)
+* feature importance returns now unaggregated results (#335)
+* error metrics are reported for all competitors (#332)
+* minor bugfixes and refacs (#330, #326, #329, #325, #324, #320, #337)
+
+## v1.4.0 -  2021-07-27  - new model classes and data handlers, improved usability and transparency 
 
 ### general:
 * many technical adjustments to improve usability and transparency of MLAir
diff --git a/HPC_setup/create_runscripts_HPC.sh b/HPC_setup/create_runscripts_HPC.sh
index 5e37d820ae1241c09c1c87c141bdcf005044a3b7..730aa52ef42144826bd000d88c0fc81c9d508de0 100755
--- a/HPC_setup/create_runscripts_HPC.sh
+++ b/HPC_setup/create_runscripts_HPC.sh
@@ -85,7 +85,7 @@ source venv_${hpcsys}/bin/activate
 
 timestamp=\`date +"%Y-%m-%d_%H%M-%S"\`
 
-export PYTHONPATH=\${PWD}/venv_${hpcsys}/lib/python3.6/site-packages:\${PYTHONPATH}
+export PYTHONPATH=\${PWD}/venv_${hpcsys}/lib/python3.8/site-packages:\${PYTHONPATH}
 
 srun --cpu-bind=none python run.py --experiment_date=\$timestamp
 EOT
@@ -102,6 +102,7 @@ cat <<EOT > ${cur}/run_${hpcsys}_batch.bash
 #SBATCH --output=${hpclogging}mlt-out.%j
 #SBATCH --error=${hpclogging}mlt-err.%j
 #SBATCH --time=08:00:00
+#SBATCH --gres=gpu:4
 #SBATCH --mail-type=ALL
 #SBATCH --mail-user=${email}
 
@@ -110,7 +111,7 @@ source venv_${hpcsys}/bin/activate
 
 timestamp=\`date +"%Y-%m-%d_%H%M-%S"\`
 
-export PYTHONPATH=\${PWD}/venv_${hpcsys}/lib/python3.6/site-packages:\${PYTHONPATH}
+export PYTHONPATH=\${PWD}/venv_${hpcsys}/lib/python3.8/site-packages:\${PYTHONPATH}
 
 srun --cpu-bind=none python run_HPC.py --experiment_date=\$timestamp
 EOT
diff --git a/HPC_setup/mlt_modules_juwels.sh b/HPC_setup/mlt_modules_juwels.sh
index e72c0f63141bad4bab442e18b93d9fbb37adb287..ffacfe6fc45302dfa60b108ca2493d9a27408df1 100755
--- a/HPC_setup/mlt_modules_juwels.sh
+++ b/HPC_setup/mlt_modules_juwels.sh
@@ -11,7 +11,7 @@ module use $OTHERSTAGES
 ml Stages/2020
 ml GCCcore/.10.3.0
 
-# ml Jupyter/2021.3.1-Python-3.8.5
+ml Jupyter/2021.3.1-Python-3.8.5
 ml Python/3.8.5
 ml TensorFlow/2.5.0-Python-3.8.5
 ml SciPy-Stack/2021-Python-3.8.5
diff --git a/HPC_setup/setup_venv_hdfml.sh b/HPC_setup/setup_venv_hdfml.sh
index 7e8334dd26874514c4fcfa686c49eeb7e1cabf0d..dbe001d587f3c0333a7e06cbe41c93759a53efda 100644
--- a/HPC_setup/setup_venv_hdfml.sh
+++ b/HPC_setup/setup_venv_hdfml.sh
@@ -31,14 +31,14 @@ echo "##### FINISH INSTALLING requirements_HDFML_additionals.txt #####"
 # pip install --ignore-installed matplotlib==3.2.0
 # pip install --ignore-installed pandas==1.0.1
 # pip install --ignore-installed statsmodels==0.11.1
-pip install --ignore-installed tabulate
-pip install -U typing_extensions
+# pip install --ignore-installed tabulate
+# pip install -U typing_extensions
 # see wiki on hdfml for information oh h5py:
 # https://gitlab.version.fz-juelich.de/haf/Wiki/-/wikis/HDF-ML%20System
 
 export CC=mpicc
 export HDF5_MPI="ON"
 pip install --no-binary=h5py h5py
-pip install --ignore-installed netcdf4==1.5.4
+# pip install --ignore-installed netcdf4==1.5.4
 
 python -m pip install "dask[complete]" 
diff --git a/HPC_setup/setup_venv_juwels.sh b/HPC_setup/setup_venv_juwels.sh
index ba44900ee2db3e3cde63b4d38c05e643eb154d5c..242ff376b92bac0b77b3224c9703d67c2a6f4136 100755
--- a/HPC_setup/setup_venv_juwels.sh
+++ b/HPC_setup/setup_venv_juwels.sh
@@ -31,7 +31,7 @@ echo "##### FINISH INSTALLING requirements_JUWELS_additionals.txt #####"
 
 # pip install --ignore-installed matplotlib==3.2.0
 # pip install --ignore-installed pandas==1.0.1
-pip install -U typing_extensions
+# pip install -U typing_extensions
 
 python -m pip install --ignore-installed "dask[complete]==2021.3.0"
 # Comment: Maybe we have to export PYTHONPATH a second time ater activating the venv (after job allocation)
diff --git a/README.md b/README.md
index 0e1df0561d15b743a85b0981b552a1444b6cc38c..a5fce2e53d82e3cff75a4f61000c616c62cbec69 100644
--- a/README.md
+++ b/README.md
@@ -25,13 +25,15 @@ HPC systems, see [here](#special-instructions-for-installation-on-jülich-hpc-sy
 * Install all **requirements** from [`requirements.txt`](https://gitlab.version.fz-juelich.de/toar/mlair/-/blob/master/requirements.txt)
   preferably in a virtual environment. You can use `pip install -r requirements.txt` to install all requirements at 
   once. Note, we recently updated the version of Cartopy and there seems to be an ongoing 
-  [issue](https://github.com/SciTools/cartopy/issues/1552) when installing numpy and Cartopy at the same time. If you
-  run into trouble, you could use `cat requirements.txt | cut -f1 -d"#" | sed '/^\s*$/d' | xargs -L 1 pip install` 
-  instead.
+  [issue](https://github.com/SciTools/cartopy/issues/1552) when installing **numpy** and **Cartopy** at the same time. 
+  If you run into trouble, you could use 
+ `cat requirements.txt | cut -f1 -d"#" | sed '/^\s*$/d' | xargs -L 1 pip install` instead or first install numpy with 
+ `pip install numpy==<version_from_reqs>` followed be the default installation of requirements. For the latter, you can
+  also use `grep numpy requirements.txt | xargs pip install`.
 * Installation of **MLAir**:
     * Either clone MLAir from the [gitlab repository](https://gitlab.version.fz-juelich.de/toar/mlair.git) 
       and use it without installation (beside the requirements) 
-    * or download the distribution file ([current version](https://gitlab.version.fz-juelich.de/toar/mlair/-/blob/master/dist/mlair-1.4.0-py3-none-any.whl)) 
+    * or download the distribution file ([current version](https://gitlab.version.fz-juelich.de/toar/mlair/-/blob/master/dist/mlair-1.5.0-py3-none-any.whl)) 
       and install it via `pip install <dist_file>.whl`. In this case, you can simply import MLAir in any python script 
       inside your virtual environment using `import mlair`.
 * (tf) Currently, TensorFlow-1.13 is mentioned in the requirements. We already tested the TensorFlow-1.15 version and couldn't
diff --git a/dist/mlair-1.5.0-py3-none-any.whl b/dist/mlair-1.5.0-py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..34495d960b009737fb40bec6dfe3a96effd14c02
Binary files /dev/null and b/dist/mlair-1.5.0-py3-none-any.whl differ
diff --git a/docs/_source/installation.rst b/docs/_source/installation.rst
index 27543ac109439e487756cc211ecc47be946c586c..c87e64b217b4207185cfc662fdf00d2f7e891cc5 100644
--- a/docs/_source/installation.rst
+++ b/docs/_source/installation.rst
@@ -26,7 +26,7 @@ Installation of MLAir
 * Install all requirements from `requirements.txt <https://gitlab.version.fz-juelich.de/toar/machinelearningtools/-/blob/master/requirements.txt>`_
   preferably in a virtual environment
 * Either clone MLAir from the `gitlab repository <https://gitlab.version.fz-juelich.de/toar/machinelearningtools.git>`_
-* or download the distribution file (`current version <https://gitlab.version.fz-juelich.de/toar/mlair/-/blob/master/dist/mlair-1.4.0-py3-none-any.whl>`_)
+* or download the distribution file (`current version <https://gitlab.version.fz-juelich.de/toar/mlair/-/blob/master/dist/mlair-1.5.0-py3-none-any.whl>`_)
   and install it via :py:`pip install <dist_file>.whl`. In this case, you can simply
   import MLAir in any python script inside your virtual environment using :py:`import mlair`.
 * (tf) Currently, TensorFlow-1.13 is mentioned in the requirements. We already tested the TensorFlow-1.15 version and couldn't
diff --git a/docs/requirements_docs.txt b/docs/requirements_docs.txt
index a39acca8a7cf887237f595d7992960ac10233a85..8ccf3ba6515d31ebd2b35901d3c9e58734d653d8 100644
--- a/docs/requirements_docs.txt
+++ b/docs/requirements_docs.txt
@@ -3,4 +3,5 @@ sphinx-autoapi==1.3.0
 sphinx-autodoc-typehints==1.10.3
 sphinx-rtd-theme==0.4.3
 #recommonmark==0.6.0
-m2r2==0.2.5
\ No newline at end of file
+m2r2==0.2.5
+docutils<0.18
\ No newline at end of file
diff --git a/mlair/__init__.py b/mlair/__init__.py
index f760f9b0fa4b87bde1f6ee409626f4428083d895..75359e1773edea55ecc47556a83a465510fac6c8 100644
--- a/mlair/__init__.py
+++ b/mlair/__init__.py
@@ -1,6 +1,6 @@
 __version_info__ = {
     'major': 1,
-    'minor': 4,
+    'minor': 5,
     'micro': 0,
 }
 
diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py
index 5d70d9f0d76817591b92cdb5ccddd95ad0d5d006..7fb31f5ad4519373d2ef0e6ec045fa2ad08b7213 100644
--- a/mlair/configuration/defaults.py
+++ b/mlair/configuration/defaults.py
@@ -54,7 +54,7 @@ DEFAULT_FEATURE_IMPORTANCE_N_BOOTS = 20
 DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE = "singleinput"
 DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_METHOD = "shuffle"
 DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
-                     "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
+                     "PlotCompetitiveSkillScore", "PlotFeatureImportanceSkillScore", "PlotConditionalQuantiles",
                      "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram", "PlotPeriodogram",
                      "PlotSampleUncertaintyFromBootstrap"]
 DEFAULT_SAMPLING = "daily"
diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 628e6b44aef975c67c468d1f1c27c6588bc701b6..28b58feb3671935fff627f0197055754adaa5216 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -615,6 +615,12 @@ class DataHandlerSingleStation(AbstractDataHandler):
             non_nan_history = self.history.dropna(dim=dim)
             non_nan_label = self.label.dropna(dim=dim)
             non_nan_observation = self.observation.dropna(dim=dim)
+            if non_nan_label.coords[dim].shape[0] == 0:
+                raise ValueError(f'self.label consist of NaNs only - station {self.station} is therefore dropped')
+            if non_nan_history.coords[dim].shape[0] == 0:
+                raise ValueError(f'self.history consist of NaNs only - station {self.station} is therefore dropped')
+            if non_nan_observation.coords[dim].shape[0] == 0:
+                raise ValueError(f'self.observation consist of NaNs only - station {self.station} is therefore dropped')
             intersect = reduce(np.intersect1d, (non_nan_history.coords[dim].values, non_nan_label.coords[dim].values,
                                                 non_nan_observation.coords[dim].values))
 
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 3649eaf65ed7efe45c5fac54f892bd1e471e5838..6b51eac9da03fbd39e54dfd5eb878e2ccf82734c 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -304,6 +304,7 @@ class DefaultDataHandler(AbstractDataHandler):
                 os.remove(_res_file)
                 transformation_dict = cls.update_transformation_dict(dh, transformation_dict)
             pool.close()
+            pool.join()
         else:  # serial solution
             logging.info("use serial transformation approach")
             sp_keys.update({"return_strategy": "result"})
diff --git a/mlair/data_handler/input_bootstraps.py b/mlair/data_handler/input_bootstraps.py
index ab4c71f1c77ab12b0e751f6991fbf20cd55aa8ae..187f09050bb39a953ac58c2b7fca54b6a207aed1 100644
--- a/mlair/data_handler/input_bootstraps.py
+++ b/mlair/data_handler/input_bootstraps.py
@@ -28,12 +28,13 @@ class BootstrapIterator(Iterator):
 
     _position: int = None
 
-    def __init__(self, data: "Bootstraps", method):
+    def __init__(self, data: "Bootstraps", method, return_reshaped=False):
         assert isinstance(data, Bootstraps)
         self._data = data
         self._dimension = data.bootstrap_dimension
         self.boot_dim = "boots"
         self._method = method
+        self._return_reshaped = return_reshaped
         self._collection = self.create_collection(self._data.data, self._dimension)
         self._position = 0
 
@@ -46,12 +47,15 @@ class BootstrapIterator(Iterator):
         raise NotImplementedError
 
     def _reshape(self, d):
-        if isinstance(d, list):
-            return list(map(lambda x: self._reshape(x), d))
-            # return list(map(lambda x: np.rollaxis(x, -1, 0).reshape(x.shape[0] * x.shape[-1], *x.shape[1:-1]), d))
+        if self._return_reshaped:
+            if isinstance(d, list):
+                return list(map(lambda x: self._reshape(x), d))
+                # return list(map(lambda x: np.rollaxis(x, -1, 0).reshape(x.shape[0] * x.shape[-1], *x.shape[1:-1]), d))
+            else:
+                shape = d.shape
+                return np.rollaxis(d, -1, 0).reshape(shape[0] * shape[-1], *shape[1:-1])
         else:
-            shape = d.shape
-            return np.rollaxis(d, -1, 0).reshape(shape[0] * shape[-1], *shape[1:-1])
+            return d
 
     def _to_numpy(self, d):
         if isinstance(d, list):
@@ -75,8 +79,8 @@ class BootstrapIterator(Iterator):
 class BootstrapIteratorSingleInput(BootstrapIterator):
     _position: int = None
 
-    def __init__(self, *args):
-        super().__init__(*args)
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
 
     def __next__(self):
         """Return next element or stop iteration."""
@@ -107,8 +111,8 @@ class BootstrapIteratorSingleInput(BootstrapIterator):
 
 class BootstrapIteratorVariable(BootstrapIterator):
 
-    def __init__(self, *args):
-        super().__init__(*args)
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
 
     def __next__(self):
         """Return next element or stop iteration."""
@@ -140,8 +144,8 @@ class BootstrapIteratorVariable(BootstrapIterator):
 
 class BootstrapIteratorBranch(BootstrapIterator):
 
-    def __init__(self, *args):
-        super().__init__(*args)
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
 
     def __next__(self):
         try:
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 543cff3624577ac617733b8b593c5f52f25196b3..36c93b04486fc9be013af2c4f34d2b3ee1bd84c2 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -768,6 +768,7 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
 
 
 def firwin_kzf(m, k):
+    m, k = int(m), int(k)
     coef = np.ones(m)
     for i in range(1, k):
         t = np.zeros((m, m + i * (m - 1)))
diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index aa89da0ea66e263d076af9abd578ba125c260bec..19a4893d49c7702cd092858c5c885453e974cbc1 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -284,7 +284,7 @@ class SkillScores:
     def get_model_name_combinations(self):
         """Return all combinations of two models as tuple and string."""
         combinations = list(itertools.combinations(self.models, 2))
-        combination_strings = [f"{first}-{second}" for (first, second) in combinations]
+        combination_strings = [f"{first} - {second}" for (first, second) in combinations]
         return combinations, combination_strings
 
     def skill_scores(self) -> [pd.DataFrame, pd.DataFrame]:
@@ -361,7 +361,7 @@ class SkillScores:
                                                                        **kwargs)
 
     def general_skill_score(self, data: Data, forecast_name: str, reference_name: str,
-                            observation_name: str = None) -> np.ndarray:
+                            observation_name: str = None, dim: str = "index") -> np.ndarray:
         r"""
         Calculate general skill score based on mean squared error.
 
@@ -374,12 +374,12 @@ class SkillScores:
         """
         if observation_name is None:
             observation_name = self.observation_name
-        data = data.dropna("index")
+        data = data.dropna(dim)
         observation = data.sel(type=observation_name)
         forecast = data.sel(type=forecast_name)
         reference = data.sel(type=reference_name)
         mse = mean_squared_error
-        skill_score = 1 - mse(observation, forecast) / mse(observation, reference)
+        skill_score = 1 - mse(observation, forecast, dim=dim) / mse(observation, reference, dim=dim)
         return skill_score.values
 
     def get_count(self, data: Data, forecast_name: str, reference_name: str,
diff --git a/mlair/helpers/time_tracking.py b/mlair/helpers/time_tracking.py
index 3105ebcd04406b7d449ba312bd3af46f83e3a716..cf366db88adc524e90c2b771bef77c71ee5a9502 100644
--- a/mlair/helpers/time_tracking.py
+++ b/mlair/helpers/time_tracking.py
@@ -68,12 +68,13 @@ class TimeTracking(object):
     The only disadvantage of the latter implementation is, that the duration is logged but not returned.
     """
 
-    def __init__(self, start=True, name="undefined job", logging_level=logging.INFO):
+    def __init__(self, start=True, name="undefined job", logging_level=logging.INFO, log_on_enter=False):
         """Construct time tracking and start if enabled."""
         self.start = None
         self.end = None
         self._name = name
         self._logging = {logging.INFO: logging.info, logging.DEBUG: logging.debug}.get(logging_level, logging.info)
+        self._log_on_enter = log_on_enter
         if start:
             self._start()
 
@@ -124,6 +125,7 @@ class TimeTracking(object):
 
     def __enter__(self):
         """Context manager."""
+        self._logging(f"start {self._name}") if self._log_on_enter is True else None
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb) -> None:
diff --git a/mlair/model_modules/abstract_model_class.py b/mlair/model_modules/abstract_model_class.py
index e7d0437f2ff62b635146047496f09db9e7fcdd5c..95f6d8edf8189aee022c663dafa035ba89d5241a 100644
--- a/mlair/model_modules/abstract_model_class.py
+++ b/mlair/model_modules/abstract_model_class.py
@@ -39,6 +39,13 @@ class AbstractModelClass(ABC):
         self._output_shape = self.__extract_from_tuple(output_shape)
         # self.avail_gpus = len(K.tensorflow_backend._get_available_gpus())
 
+    def load_model(self, name: str, compile: bool = False) -> None:
+        hist = self.model.history
+        self.model.load_weights(name)
+        self.model.history = hist
+        if compile is True:
+            self.model.compile(**self.compile_options)
+
     def __getattr__(self, name: str) -> Any:
         """
         Is called if __getattribute__ is not able to find requested attribute.
diff --git a/mlair/model_modules/keras_extensions.py b/mlair/model_modules/keras_extensions.py
index d890e7b0ff3beea812d8fc7766433a84d65a1ebe..8b99acd0f5723d3b00ec1bd0098712753da21b52 100644
--- a/mlair/model_modules/keras_extensions.py
+++ b/mlair/model_modules/keras_extensions.py
@@ -3,6 +3,7 @@
 __author__ = 'Lukas Leufen, Felix Kleinert'
 __date__ = '2020-01-31'
 
+import copy
 import logging
 import math
 import pickle
@@ -199,12 +200,18 @@ class ModelCheckpointAdvanced(ModelCheckpoint):
                         if self.verbose > 0:  # pragma: no branch
                             print('\nEpoch %05d: save to %s' % (epoch + 1, file_path))
                         with open(file_path, "wb") as f:
-                            pickle.dump(callback["callback"], f)
+                            c = copy.copy(callback["callback"])
+                            if hasattr(c, "model"):
+                                c.model = None
+                            pickle.dump(c, f)
                 else:
                     with open(file_path, "wb") as f:
                         if self.verbose > 0:  # pragma: no branch
                             print('\nEpoch %05d: save to %s' % (epoch + 1, file_path))
-                        pickle.dump(callback["callback"], f)
+                        c = copy.copy(callback["callback"])
+                        if hasattr(c, "model"):
+                            c.model = None
+                        pickle.dump(c, f)
 
 
 clbk_type = TypedDict("clbk_type", {"name": str, str: Callback, "path": str})
@@ -346,6 +353,8 @@ class CallbackHandler:
         for pos, callback in enumerate(self.__callbacks):
             path = callback["path"]
             clb = pickle.load(open(path, "rb"))
+            if clb.model is None and hasattr(self._checkpoint, "model"):
+                clb.model = self._checkpoint.model
             self._update_callback(pos, clb)
 
     def update_checkpoint(self, history_name: str = "hist") -> None:
diff --git a/mlair/plotting/abstract_plot_class.py b/mlair/plotting/abstract_plot_class.py
index dab45156ac1bbe033ba073e01245ffc8b65ca6b3..c91dbec78c4bc990cc9c40c3afb6c506b62928d8 100644
--- a/mlair/plotting/abstract_plot_class.py
+++ b/mlair/plotting/abstract_plot_class.py
@@ -59,7 +59,7 @@ class AbstractPlotClass:
         if not os.path.exists(plot_folder):
             os.makedirs(plot_folder)
         self.plot_folder = plot_folder
-        self.plot_name = plot_name
+        self.plot_name = plot_name.replace("/", "_")
         self.resolution = resolution
         if rc_params is None:
             rc_params = {'axes.labelsize': 'large',
diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index 6180493741c030d5dfdfcfa8972035619632c8aa..47051a500c29349197f3163861a0fe40cade525d 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -21,6 +21,8 @@ from mlair.data_handler import DataCollection
 from mlair.helpers import TimeTrackingWrapper, to_list, remove_items
 from mlair.plotting.abstract_plot_class import AbstractPlotClass
 
+matplotlib.use("Agg")
+
 
 @TimeTrackingWrapper
 class PlotStationMap(AbstractPlotClass):  # pragma: no cover
@@ -632,7 +634,10 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
             self._plot_total(raw=True)
             self._plot_total(raw=False)
             if multiple > 1:
-                self._plot_difference(label_names)
+                self._plot_difference(label_names, plot_name_add="_last")
+                self._prepare_pgram(generator, pos, multiple, use_multiprocessing=use_multiprocessing,
+                                    use_last_input_value=False)
+                self._plot_difference(label_names, plot_name_add="_first")
 
     @staticmethod
     def _has_filter_dimension(g, pos):
@@ -649,7 +654,7 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
                 return check_data.coords[filter_dim].shape[0], check_data.coords[filter_dim].values.tolist()
 
     @TimeTrackingWrapper
-    def _prepare_pgram(self, generator, pos, multiple=1, use_multiprocessing=False):
+    def _prepare_pgram(self, generator, pos, multiple=1, use_multiprocessing=False, use_last_input_value=True):
         """
         Create periodogram data.
         """
@@ -663,7 +668,8 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
             plot_data_raw_single = dict()
             plot_data_mean_single = dict()
             self.f_index = np.logspace(-3, 0 if self._sampling == "daily" else np.log10(24), 1000)
-            raw_data_single = self._prepare_pgram_parallel_gen(generator, m, pos, use_multiprocessing)
+            raw_data_single = self._prepare_pgram_parallel_gen(generator, m, pos, use_multiprocessing,
+                                                               use_last_input_value=use_last_input_value)
             for var in raw_data_single.keys():
                 pgram_com = []
                 pgram_mean = 0
@@ -705,6 +711,7 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
                 for i, p in enumerate(output):
                     res.append(p.get())
                 pool.close()
+                pool.join()
             else:  # serial solution
                 for var in d[self.variables_dim].values:
                     res.append(f_proc(var, d.loc[{self.variables_dim: var}].squeeze().dropna(self.time_dim)))
@@ -715,7 +722,7 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
                     raw_data_single[var_str] = raw_data_single[var_str] + [(f, pgram)]
         return raw_data_single
 
-    def _prepare_pgram_parallel_gen(self, generator, m, pos, use_multiprocessing):
+    def _prepare_pgram_parallel_gen(self, generator, m, pos, use_multiprocessing, use_last_input_value=True):
         """Implementation of data preprocessing using parallel generator element processing."""
         raw_data_single = dict()
         res = []
@@ -723,14 +730,16 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
             pool = multiprocessing.Pool(
                 min([psutil.cpu_count(logical=False), len(generator), 16]))  # use only physical cpus
             output = [
-                pool.apply_async(f_proc_2, args=(g, m, pos, self.variables_dim, self.time_dim, self.f_index))
+                pool.apply_async(f_proc_2, args=(g, m, pos, self.variables_dim, self.time_dim, self.f_index,
+                                                 use_last_input_value))
                 for g in generator]
             for i, p in enumerate(output):
                 res.append(p.get())
             pool.close()
+            pool.join()
         else:
             for g in generator:
-                res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim, self.f_index))
+                res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim, self.f_index, use_last_input_value))
         for res_dict in res:
             for k, v in res_dict.items():
                 if k not in raw_data_single.keys():
@@ -816,8 +825,8 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         pdf_pages.close()
         plt.close('all')
 
-    def _plot_difference(self, label_names):
-        plot_name = f"{self.plot_name}_{self._sampling}_{self._add_text}_filter.pdf"
+    def _plot_difference(self, label_names, plot_name_add = ""):
+        plot_name = f"{self.plot_name}_{self._sampling}_{self._add_text}_filter{plot_name_add}.pdf"
         plot_path = os.path.join(os.path.abspath(self.plot_folder), plot_name)
         logging.info(f"... plotting {plot_name}")
         pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
@@ -846,24 +855,33 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         plt.close('all')
 
 
-def f_proc(var, d_var, f_index, time_dim="datetime"):  # pragma: no cover
+def f_proc(var, d_var, f_index, time_dim="datetime", use_last_value=True):  # pragma: no cover
     var_str = str(var)
     t = (d_var[time_dim] - d_var[time_dim][0]).astype("timedelta64[h]").values / np.timedelta64(1, "D")
     if len(d_var.shape) > 1:  # use only max value if dimensions are remaining (e.g. max(window) -> latest value)
         to_remove = remove_items(d_var.coords.dims, time_dim)
         for e in to_list(to_remove):
-            d_var = d_var.sel({e: d_var[e].max()})
+            d_var = d_var.sel({e: d_var[e].max() if use_last_value is True else d_var[e].min()})
     pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").power(f_index)
     # f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").autopower()
     return var_str, f_index, pgram
 
 
-def f_proc_2(g, m, pos, variables_dim, time_dim, f_index):  # pragma: no cover
+def f_proc_2(g, m, pos, variables_dim, time_dim, f_index, use_last_value):  # pragma: no cover
     raw_data_single = dict()
     if hasattr(g.id_class, "lazy"):
         g.id_class.load_lazy() if g.id_class.lazy is True else None
     if m == 0:
         d = g.id_class._data
+        if d is None:
+            window_dim = g.id_class.window_dim
+            history = g.id_class.history
+            last_entry = history.coords[window_dim][-1]
+            d1 = history.sel({window_dim: last_entry}, drop=True)
+            label = g.id_class.label
+            first_entry = label.coords[window_dim][0]
+            d2 = label.sel({window_dim: first_entry}, drop=True)
+            d = (d1, d2)
     else:
         gd = g.id_class
         filter_sel = {"filter": gd.input_data.coords["filter"][m - 1]}
@@ -871,7 +889,7 @@ def f_proc_2(g, m, pos, variables_dim, time_dim, f_index):  # pragma: no cover
     d = d[pos] if isinstance(d, tuple) else d
     for var in d[variables_dim].values:
         d_var = d.loc[{variables_dim: var}].squeeze().dropna(time_dim)
-        var_str, f, pgram = f_proc(var, d_var, f_index)
+        var_str, f, pgram = f_proc(var, d_var, f_index, use_last_value=use_last_value)
         raw_data_single[var_str] = [(f, pgram)]
     if hasattr(g.id_class, "lazy"):
         g.id_class.clean_up() if g.id_class.lazy is True else None
diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index 29b609fdf8f404059918a76eea89ba67eac2e207..60526617383ba0e86459e1582cc9b74e28eb59f0 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -130,7 +130,7 @@ class PlotMonthlySummary(AbstractPlotClass):  # pragma: no cover
         logging.debug("... start plotting")
         color_palette = [matplotlib.colors.cnames["green"]] + sns.color_palette("Blues_d",
                                                                                 self._window_lead_time).as_hex()
-        ax = sns.boxplot(x='index', y='values', hue='ahead', data=data.compute(), whis=1., palette=color_palette,
+        ax = sns.boxplot(x='index', y='values', hue='ahead', data=data.compute(), whis=1.5, palette=color_palette,
                          flierprops={'marker': '.', 'markersize': 1}, showmeans=True,
                          meanprops={'markersize': 1, 'markeredgecolor': 'k'})
         ylabel = self._spell_out_chemical_concentrations(target_var)
@@ -172,14 +172,14 @@ class PlotConditionalQuantiles(AbstractPlotClass):  # pragma: no cover
     warnings.filterwarnings("ignore", message="Attempted to set non-positive bottom ylim on a log-scaled axis.")
 
     def __init__(self, stations: List, data_pred_path: str, plot_folder: str = ".", plot_per_seasons=True,
-                 rolling_window: int = 3, model_name: str = "nn", obs_name: str = "obs", **kwargs):
+                 rolling_window: int = 3, forecast_indicator: str = "nn", obs_indicator: str = "obs", **kwargs):
         """Initialise."""
         super().__init__(plot_folder, "conditional_quantiles")
         self._data_pred_path = data_pred_path
         self._stations = stations
         self._rolling_window = rolling_window
-        self._model_name = model_name
-        self._obs_name = obs_name
+        self._forecast_indicator = forecast_indicator
+        self._obs_name = obs_indicator
         self._opts = self._get_opts(kwargs)
         self._seasons = ['DJF', 'MAM', 'JJA', 'SON'] if plot_per_seasons is True else ""
         self._data = self._load_data()
@@ -206,7 +206,8 @@ class PlotConditionalQuantiles(AbstractPlotClass):  # pragma: no cover
         for station in self._stations:
             file = os.path.join(self._data_pred_path, f"forecasts_{station}_test.nc")
             data_tmp = xr.open_dataarray(file)
-            data_collector.append(data_tmp.loc[:, :, [self._model_name, self._obs_name]].assign_coords(station=station))
+            data_collector.append(data_tmp.loc[:, :, [self._forecast_indicator,
+                                                      self._obs_name]].assign_coords(station=station))
         res = xr.concat(data_collector, dim='station').transpose('index', 'type', 'ahead', 'station')
         return res
 
@@ -322,7 +323,7 @@ class PlotConditionalQuantiles(AbstractPlotClass):  # pragma: no cover
         """Create seasonal plots."""
         for season in self._seasons:
             try:
-                self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._model_name,
+                self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._forecast_indicator,
                                 y_model=self._obs_name, plot_name_affix="cali-ref", season=season)
             except Exception as e:
                 logging.error(f"Could not create plot PlotConditionalQuantiles._plot_seasons: {season}, cali-ref" 
@@ -330,7 +331,7 @@ class PlotConditionalQuantiles(AbstractPlotClass):  # pragma: no cover
                               f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
             try:
                 self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._obs_name,
-                                y_model=self._model_name, plot_name_affix="like-base", season=season)
+                                y_model=self._forecast_indicator, plot_name_affix="like-base", season=season)
             except Exception as e:
                 logging.error(f"Could not create plot PlotConditionalQuantiles._plot_seasons: {season}, like-base" 
                               f" due to the following error:"
@@ -338,8 +339,8 @@ class PlotConditionalQuantiles(AbstractPlotClass):  # pragma: no cover
 
     def _plot_all(self):
         """Plot overall conditional quantiles on full data."""
-        self._plot_base(data=self._data, x_model=self._model_name, y_model=self._obs_name, plot_name_affix="cali-ref")
-        self._plot_base(data=self._data, x_model=self._obs_name, y_model=self._model_name, plot_name_affix="like-base")
+        self._plot_base(data=self._data, x_model=self._forecast_indicator, y_model=self._obs_name, plot_name_affix="cali-ref")
+        self._plot_base(data=self._data, x_model=self._obs_name, y_model=self._forecast_indicator, plot_name_affix="like-base")
 
     @TimeTrackingWrapper
     def _plot_base(self, data: xr.DataArray, x_model: str, y_model: str, plot_name_affix: str, season: str = ""):
@@ -420,14 +421,14 @@ class PlotClimatologicalSkillScore(AbstractPlotClass):  # pragma: no cover
     :param plot_folder: path to save the plot (default: current directory)
     :param score_only: if true plot only scores of CASE I to IV, otherwise plot all single terms (default True)
     :param extra_name_tag: additional tag that can be included in the plot name (default "")
-    :param model_setup: architecture type to specify plot name (default "")
+    :param model_name: architecture type to specify plot name (default "")
 
     """
 
     def __init__(self, data: Dict, plot_folder: str = ".", score_only: bool = True, extra_name_tag: str = "",
-                 model_setup: str = ""):
+                 model_name: str = ""):
         """Initialise."""
-        super().__init__(plot_folder, f"skill_score_clim_{extra_name_tag}{model_setup}")
+        super().__init__(plot_folder, f"skill_score_clim_{extra_name_tag}{model_name}")
         self._labels = None
         self._data = self._prepare_data(data, score_only)
         self._plot(score_only)
@@ -468,7 +469,7 @@ class PlotClimatologicalSkillScore(AbstractPlotClass):  # pragma: no cover
         fig, ax = plt.subplots()
         if not score_only:
             fig.set_size_inches(11.7, 8.27)
-        sns.boxplot(x="terms", y="data", hue="ahead", data=self._data, ax=ax, whis=1., palette="Blues_d",
+        sns.boxplot(x="terms", y="data", hue="ahead", data=self._data, ax=ax, whis=1.5, palette="Blues_d",
                     showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
         ax.axhline(y=0, color="grey", linewidth=.5)
         ax.set(ylabel=f"{self._label_add(score_only)}skill score", xlabel="", title="summary of all stations",
@@ -584,13 +585,13 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):  # pragma: no cover
 
     def _create_pseudo_order(self, data):
         """Provide first predefined elements and append all remaining."""
-        first_elements = [f"{self._model_setup}-persi", "ols-persi", f"{self._model_setup}-ols"]
+        first_elements = [f"{self._model_setup} - persi", "ols - persi", f"{self._model_setup} - ols"]
         first_elements = list(filter(lambda x: x in data.comparison.tolist(), first_elements))
         uniq, index = np.unique(first_elements + data.comparison.unique().tolist(), return_index=True)
         return uniq[index.argsort()]
 
     def _filter_comparisons(self, data):
-        filtered_headers = list(filter(lambda x: "nn-" in x, data.comparison.unique()))
+        filtered_headers = list(filter(lambda x: f"{self._model_setup} - " in x, data.comparison.unique()))
         return data[data.comparison.isin(filtered_headers)]
 
     @staticmethod
@@ -610,14 +611,9 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):  # pragma: no cover
 
 
 @TimeTrackingWrapper
-class PlotBootstrapSkillScore(AbstractPlotClass):  # pragma: no cover
+class PlotFeatureImportanceSkillScore(AbstractPlotClass):  # pragma: no cover
     """
-    Create plot of climatological skill score after Murphy (1988) as box plot over all stations.
-
-    A forecast time step (called "ahead") is separately shown to highlight the differences for each prediction time
-    step. Either each single term is plotted (score_only=False) or only the resulting scores CASE I to IV are displayed
-    (score_only=True, default). Y-axis is adjusted following the data and not hard coded. The plot is saved under
-    plot_folder path with name skill_score_clim_{extra_name_tag}{model_setup}.pdf and resolution of 500dpi.
+    Create plot of feature importance analysis.
 
     By passing a list `separate_vars` containing variable names, a second plot is created showing the `separate_vars`
     and the remaining variables side by side with different scaling.
@@ -630,50 +626,49 @@ class PlotBootstrapSkillScore(AbstractPlotClass):  # pragma: no cover
 
     """
 
-    def __init__(self, data: Dict, plot_folder: str = ".", model_setup: str = "", separate_vars: List = None,
-                 sampling: str = "daily", ahead_dim: str = "ahead", bootstrap_type: str = None,
-                 bootstrap_method: str = None):
-
+    def __init__(self, data: Dict, plot_folder: str = ".", separate_vars: List = None, sampling: str = "daily",
+                 ahead_dim: str = "ahead", bootstrap_type: str = None, bootstrap_method: str = None,
+                 boot_dim: str = "boots", model_name: str = "NN", branch_names: list = None, ylim: tuple = None):
         """
         Set attributes and create plot.
 
         :param data: dictionary with station names as keys and 2D xarrays as values, consist on axis ahead and terms.
         :param plot_folder: path to save the plot (default: current directory)
-        :param model_setup: architecture type to specify plot name (default "CNN")
         :param separate_vars: variables to plot separated (default: ['o3'])
         :param sampling: type of sampling rate, should be either hourly or daily (default: "daily")
         :param ahead_dim: name of the ahead dimensions (default: "ahead")
         :param bootstrap_annotation: additional information to use in the file name (default: None)
+        :param model_name: architecture type to specify plot name (default "NN")
         """
         annotation = ["_".join([s for s in ["", bootstrap_type, bootstrap_method] if s is not None])][0]
-        super().__init__(plot_folder, f"skill_score_bootstrap_{model_setup}{annotation}")
+        super().__init__(plot_folder, f"feature_importance_{model_name}{annotation}")
         if separate_vars is None:
             separate_vars = ['o3']
         self.sampling = sampling
         self._labels = None
         self._x_name = "boot_var"
-# <<<<<<< HEAD
-#         self._data = self._prepare_data(data)
-        self._individual_vars = set(self._data[self._x_name].values)
-        # self._plot()
-        # self._save(bbox_inches='tight')
-        # self.plot_name += '_separated'
-        # self._plot(separate_vars=separate_vars)
-        # self._save(bbox_inches='tight')
-# =======
+
         self._ahead_dim = ahead_dim
+        self._boot_dim = boot_dim
         self._boot_type = self._set_bootstrap_type(bootstrap_type)
         self._boot_method = self._set_bootstrap_method(bootstrap_method)
+        self._number_of_bootstraps = 0
+        self._branches_names = branch_names
+        self._ylim = ylim
 
-        self._title = f"Bootstrap analysis ({self._boot_method}, {self._boot_type})"
         self._data = self._prepare_data(data, sampling)
+        self._set_title(model_name)
         if "branch" in self._data.columns:
             plot_name = self.plot_name
             for branch in self._data["branch"].unique():
-                self._title = f"Bootstrap analysis ({self._boot_method}, {self._boot_type}, {branch})"
+                self._set_title(model_name, branch)
                 self._plot(branch=branch)
                 self.plot_name = f"{plot_name}_{branch}"
                 self._save()
+                if len(set(separate_vars).intersection(self._data[self._x_name].unique())) > 0:
+                    self.plot_name += '_separated'
+                    self._plot(branch=branch, separate_vars=separate_vars)
+                    self._save(bbox_inches='tight')
         else:
             self._plot()
             self._save()
@@ -686,6 +681,21 @@ class PlotBootstrapSkillScore(AbstractPlotClass):  # pragma: no cover
     def _set_bootstrap_type(boot_type):
         return {"singleinput": "single input"}.get(boot_type, boot_type)
 
+    def _set_title(self, model_name, branch=None):
+        title_d = {"single input": "Single Inputs", "branch": "Input Branches", "variable": "Variables"}
+        base_title = f"{model_name}\nImportance of {title_d[self._boot_type]}"
+
+        additional = []
+        if branch is not None:
+            branch_name = self._branches_names[branch] if self._branches_names is not None else branch
+            additional.append(branch_name)
+        if self._number_of_bootstraps > 1:
+            additional.append(f"n={self._number_of_bootstraps}")
+        additional_title = ", ".join(additional)
+        if len(additional_title) > 0:
+            additional_title = f" ({additional_title})"
+        self._title = base_title + additional_title
+
     @staticmethod
     def _set_bootstrap_method(boot_method):
         return {"zero_mean": "zero mean", "shuffle": "shuffled"}.get(boot_method, boot_method)
@@ -713,14 +723,16 @@ class PlotBootstrapSkillScore(AbstractPlotClass):  # pragma: no cover
 # =======
         station_dim = "station"
         data = helpers.dict_to_xarray(data, station_dim).sortby(self._x_name)
+        data = data.transpose(station_dim, self._ahead_dim, self._boot_dim, self._x_name)
         if self._boot_type == "single input":
             number_tags = self._get_number_tag(data.coords[self._x_name].values, split_by='_')
             new_boot_coords = self._return_vars_without_number_tag(data.coords[self._x_name].values, split_by='_',
                                                                    keep=1, as_unique=True)
-            values = data.values.reshape((data.shape[0], len(new_boot_coords), len(number_tags), data.shape[-1]))
-            data = xr.DataArray(values, coords={station_dim: data.coords["station"], self._x_name: new_boot_coords,
-                                                "branch": number_tags, self._ahead_dim: data.coords[self._ahead_dim]},
-                                dims=[station_dim, self._x_name, "branch", self._ahead_dim])
+            values = data.values.reshape((*data.shape[:3], len(number_tags), len(new_boot_coords)))
+            data = xr.DataArray(values, coords={station_dim: data.coords[station_dim], self._x_name: new_boot_coords,
+                                                "branch": number_tags, self._ahead_dim: data.coords[self._ahead_dim],
+                                                self._boot_dim: data.coords[self._boot_dim]},
+                                dims=[station_dim, self._ahead_dim, self._boot_dim, "branch", self._x_name])
         else:
             try:
                 new_boot_coords = self._return_vars_without_number_tag(data.coords[self._x_name].values, split_by='_',
@@ -732,6 +744,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass):  # pragma: no cover
         self._labels = [str(i) + sampling_letter for i in data.coords[self._ahead_dim].values]
         if station_dim not in data.dims:
             data = data.expand_dims(station_dim)
+        self._number_of_bootstraps = np.unique(data.coords[self._boot_dim].values).shape[0]
         return data.to_dataframe("data").reset_index(level=np.arange(len(data.dims)).tolist())
 
     @staticmethod
@@ -781,10 +794,10 @@ class PlotBootstrapSkillScore(AbstractPlotClass):  # pragma: no cover
         if separate_vars is None:
             self._plot_all_variables(branch)
         else:
-            self._plot_selected_variables(separate_vars)
+            self._plot_selected_variables(separate_vars, branch)
 
-    def _plot_selected_variables(self, separate_vars: List):
-        data = self._data
+    def _plot_selected_variables(self, separate_vars: List, branch=None):
+        data = self._data if branch is None else self._data[self._data["branch"] == str(branch)]
         self.raise_error_if_separate_vars_do_not_exist(data, separate_vars, self._x_name)
         all_variables = self._get_unique_values_from_column_of_df(data, self._x_name)
         remaining_vars = helpers.remove_items(all_variables, separate_vars)
@@ -805,41 +818,35 @@ class PlotBootstrapSkillScore(AbstractPlotClass):  # pragma: no cover
         data_first = self._select_data(df=data, variables=separate_vars, column_name=self._x_name)
         data_second = self._select_data(df=data, variables=remaining_vars, column_name=self._x_name)
 
-        fig, ax = plt.subplots(nrows=1, ncols=2,
-                               figsize=figsize,
-                               gridspec_kw={'width_ratios': [len(separate_vars),
-                                                                               len(remaining_vars)]})
-# >>>>>>> develop
+        fig, ax = plt.subplots(nrows=1, ncols=2, gridspec_kw={'width_ratios': [len(separate_vars),
+                                                                               len(remaining_vars)]},
+                               figsize=(len(remaining_vars),len(remaining_vars)/2.))
         if len(separate_vars) > 1:
             first_box_width = .8
         else:
-            first_box_width = 2.
+            first_box_width = .8
 
-# <<<<<<< HEAD
-#         sns.boxplot(x=self._x_name, y="data", hue="ahead", data=data_first, ax=ax[0], whis=1., palette="Blues_d",
-#                     showmeans=True, order=order_first, meanprops={"markersize": 1, "markeredgecolor": "k"},
-#                     flierprops={"marker": "."}, width=first_box_width
-#                     )
-#         ax[0].set(ylabel=f"skill score", xlabel="")
-#
-#         sns.boxplot(x=self._x_name, y="data", hue="ahead", data=data_second, ax=ax[1], whis=1., palette="Blues_d",
-#                     showmeans=True, order=order_second, meanprops={"markersize": 1, "markeredgecolor": "k"},
-#                     flierprops={"marker": "."},
-#                     )
-# =======
-        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=data_first, ax=ax[0], whis=1.,
+
+        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=data_first, ax=ax[0], whis=1.5,
                     palette="Blues_d", showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
-                    flierprops={"marker": "."}, width=first_box_width)
+                    showfliers=False, width=first_box_width)
         ax[0].set(ylabel=f"skill score", xlabel="")
+        if self._ylim is not None:
+            _ylim = self._ylim if isinstance(self._ylim, tuple) else self._ylim[0]
+            ax[0].set(ylim=_ylim)
 
-        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=data_second, ax=ax[1], whis=1.,
+        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=data_second, ax=ax[1], whis=1.5,
                     palette="Blues_d", showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
-                    flierprops={"marker": "."})
-# >>>>>>> develop
+                    showfliers=False)
+
         ax[1].set(ylabel="", xlabel="")
         if group_size > 1:
             [ax[1].axvline(x + .5, color='grey') for i, x in enumerate(ax[1].get_xticks(), start=1) if i % group_size == 0]
         ax[1].yaxis.tick_right()
+        if self._ylim is not None and isinstance(self._ylim, list):
+            _ylim = self._ylim[1]
+            ax[1].set(ylim=_ylim)
+
         handles, _ = ax[1].get_legend_handles_labels()
         for sax in ax:
             matplotlib.pyplot.sca(sax)
@@ -847,7 +854,8 @@ class PlotBootstrapSkillScore(AbstractPlotClass):  # pragma: no cover
             plt.xticks(rotation=45, ha='right')
             sax.legend_.remove()
 
-        fig.legend(handles, self._labels, loc='upper center', ncol=len(handles) + 1, )
+        # fig.legend(handles, self._labels, loc='upper center', ncol=len(handles) + 1, )
+        ax[1].legend(handles, self._labels, loc='lower center', ncol=len(handles) + 1, fontsize="medium")
 
         def align_yaxis(ax1, ax2):
             """
@@ -872,6 +880,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass):  # pragma: no cover
 
         align_yaxis(ax[0], ax[1])
         align_yaxis(ax[0], ax[1])
+        plt.subplots_adjust(right=0.8)
         plt.title(self._title)
 
     @staticmethod
@@ -901,53 +910,30 @@ class PlotBootstrapSkillScore(AbstractPlotClass):  # pragma: no cover
         """
 
         """
-# # <<<<<<< HEAD
-#         number_of_vars = len(self._individual_vars)
-#         order, center_names = self.set_order_for_x_axis(self._individual_vars, return_center_names=True)
-#         group_size = int(number_of_vars / len(center_names))
-#
-#         if number_of_vars > 20:
-#             fig, ax = plt.subplots(figsize=(number_of_vars/2, 10))
-#         else:
-#             fig, ax = plt.subplots(figsize=(15, 10))
-#         sns.boxplot(x=self._x_name, y="data", hue="ahead", data=self._data, ax=ax, whis=1., palette="Blues_d",
-#                     showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."},
-#                     order=order)
-#         ax.axhline(y=0, color="grey", linewidth=.5)
-#         if group_size > 1:
-#             [ax.axvline(x + .5, color='grey') for i, x in enumerate(ax.get_xticks(), start=1) if i % group_size == 0]
-#         plt.xticks(rotation=45, horizontalalignment="right")
-#         ax.set(ylabel=f"skill score", xlabel="", title="summary of all stations")
-# # =======
-#         fig, ax = plt.subplots()
-#         plot_data = self._data if branch is None else self._data[self._data["branch"] == str(branch)]
-#         sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=plot_data, ax=ax, whis=1., palette="Blues_d",
-#                     showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
-#         ax.axhline(y=0, color="grey", linewidth=.5)
-#         plt.xticks(rotation=45)
-#         ax.set(ylabel=f"skill score", xlabel="", title=self._title)
-# >>>>>>> develop
-
-        # ToDo B: idea to solve merge conflict
-        number_of_vars = len(self._individual_vars)
-        order, center_names = self.set_order_for_x_axis(self._individual_vars, return_center_names=True)
-        group_size = int(number_of_vars / len(center_names))
-
-        if number_of_vars > 20:
-            fig, ax = plt.subplots(figsize=(number_of_vars/2, 10))
-        else:
-            fig, ax = plt.subplots(figsize=(15, 10))
-
         plot_data = self._data if branch is None else self._data[self._data["branch"] == str(branch)]
-        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=plot_data, ax=ax, whis=1., palette="Blues_d",
-                    showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
+        if self._boot_type == "branch":
+            fig, ax = plt.subplots(figsize=(0.5 + 2 / len(plot_data[self._x_name].unique()) + len(plot_data[self._x_name].unique()),4))
+            sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=plot_data, ax=ax, whis=1.,
+                        palette="Blues_d", showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
+                        showfliers=False, width=0.8)
+        else:
+            fig, ax = plt.subplots()
+            sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=plot_data, ax=ax, whis=1.5, palette="Blues_d",
+                        showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, showfliers=False)
         ax.axhline(y=0, color="grey", linewidth=.5)
-        if group_size > 1:
-            [ax.axvline(x + .5, color='grey') for i, x in enumerate(ax.get_xticks(), start=1) if i % group_size == 0]
-        plt.xticks(rotation=45, horizontalalignment="right")
-        ax.set(ylabel=f"skill score", xlabel="", title="summary of all stations")
 
-        # ToDo E: idea to solve merge conflict
+        if self._ylim is not None:
+            if isinstance(self._ylim, tuple):
+                _ylim = self._ylim
+            else:
+                _ylim = (min(self._ylim[0][0], self._ylim[1][0]), max(self._ylim[0][1], self._ylim[1][1]))
+            ax.set(ylim=_ylim)
+
+        if self._boot_type == "branch":
+            plt.xticks()
+        else:
+            plt.xticks(rotation=45)
+        ax.set(ylabel=f"skill score", xlabel="", title=self._title)
         handles, _ = ax.get_legend_handles_labels()
         ax.legend(handles, self._labels)
         plt.tight_layout()
@@ -1142,7 +1128,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
 
     def __init__(self, data: xr.DataArray, plot_folder: str = ".", model_type_dim: str = "type",
                  error_measure: str = "mse", error_unit: str = None, dim_name_boots: str = 'boots',
-                 block_length: str = None):
+                 block_length: str = None, model_name: str = "NN", model_indicator: str = "nn"):
         super().__init__(plot_folder, "sample_uncertainty_from_bootstrap")
         default_name = self.plot_name
         self.model_type_dim = model_type_dim
@@ -1150,6 +1136,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
         self.dim_name_boots = dim_name_boots
         self.error_unit = error_unit
         self.block_length = block_length
+        data = self.rename_model_indicator(data, model_name, model_indicator)
         self.prepare_data(data)
         self._plot(orientation="v")
 
@@ -1167,6 +1154,11 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
         self._data_table = None
         self._n_boots = None
 
+    def rename_model_indicator(self, data, model_name, model_indicator):
+        data.coords[self.model_type_dim] = [{model_indicator: model_name}.get(n, n)
+                                            for n in data.coords[self.model_type_dim].values]
+        return data
+
     def prepare_data(self, data: xr.DataArray):
         self._data_table = data.to_pandas()
         if "persi" in self._data_table.columns:
@@ -1189,7 +1181,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
         else:
             raise ValueError(f"orientation must be `v' or `h' but is: {orientation}")
         fig, ax = plt.subplots(figsize=figsize)
-        sns.boxplot(data=data_table, ax=ax, whis=1., color="white",
+        sns.boxplot(data=data_table, ax=ax, whis=1.5, color="white",
                     showmeans=True, meanprops={"markersize": 6, "markeredgecolor": "k"},
                     flierprops={"marker": "o", "markerfacecolor": "black", "markeredgecolor": "none", "markersize": 3},
                     boxprops={'facecolor': 'none', 'edgecolor': 'k'},
diff --git a/mlair/plotting/training_monitoring.py b/mlair/plotting/training_monitoring.py
index b2b531b99c85bb43e4e758fd23045c9f0575cb24..39dd80651226519463d7b503fb612e43983d73cf 100644
--- a/mlair/plotting/training_monitoring.py
+++ b/mlair/plotting/training_monitoring.py
@@ -45,15 +45,18 @@ class PlotModelHistory:
         self._additional_columns = self._filter_columns(history)
         self._plot(filename)
 
-    @staticmethod
-    def _get_plot_metric(history, plot_metric, main_branch):
-        if plot_metric.lower() == "mse":
-            plot_metric = "mean_squared_error"
-        elif plot_metric.lower() == "mae":
-            plot_metric = "mean_absolute_error"
+    def _get_plot_metric(self, history, plot_metric, main_branch, correct_names=True):
+        _plot_metric = plot_metric
+        if correct_names is True:
+            if plot_metric.lower() == "mse":
+                plot_metric = "mean_squared_error"
+            elif plot_metric.lower() == "mae":
+                plot_metric = "mean_absolute_error"
         available_keys = [k for k in history.keys() if
                           plot_metric in k and ("main" in k.lower() if main_branch else True)]
         available_keys.sort(key=len)
+        if len(available_keys) == 0 and correct_names is True:
+            return self._get_plot_metric(history, _plot_metric, main_branch, correct_names=False)
         return available_keys[0]
 
     def _filter_columns(self, history: Dict) -> List[str]:
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index 251cf9ac0dcbd524ae4ba6bd9d3910fbee2accb8..784fd7bfd054959655702faff96eb45a42f6bee3 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -232,7 +232,7 @@ class ExperimentSetup(RunEnvironment):
                  max_number_multiprocessing: int = None, start_script: Union[Callable, str] = None,
                  overwrite_lazy_data: bool = None, uncertainty_estimate_block_length: str = None,
                  uncertainty_estimate_evaluate_competitors: bool = None, uncertainty_estimate_n_boots: int = None,
-                 do_uncertainty_estimate: bool = None, **kwargs):
+                 do_uncertainty_estimate: bool = None, model_display_name: str = None, **kwargs):
 
         # create run framework
         super().__init__()
@@ -385,6 +385,8 @@ class ExperimentSetup(RunEnvironment):
                         default=DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE, scope="feature_importance")
 
         self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing")
+        if model_display_name is not None:
+            self._set_param("model_display_name", model_display_name)
         self._set_param("neighbors", ["DEBW030"])  # TODO: just for testing
 
         # set competitors
diff --git a/mlair/run_modules/model_setup.py b/mlair/run_modules/model_setup.py
index 0b9e8ec56592901d9feba15eb50b6b21a0c53560..98263eb732d8067fba0950c7a4882fb3ef020995 100644
--- a/mlair/run_modules/model_setup.py
+++ b/mlair/run_modules/model_setup.py
@@ -84,7 +84,7 @@ class ModelSetup(RunEnvironment):
 
         # load weights if no training shall be performed
         if not self._train_model and not self._create_new_model:
-            self.load_weights()
+            self.load_model()
 
         # create checkpoint
         self._set_callbacks()
@@ -131,13 +131,13 @@ class ModelSetup(RunEnvironment):
                                           save_best_only=True, mode='auto')
         self.data_store.set("callbacks", callbacks, self.scope)
 
-    def load_weights(self):
-        """Try to load weights from existing model or skip if not possible."""
+    def load_model(self):
+        """Try to load model from disk or skip if not possible."""
         try:
-            self.model.load_weights(self.model_name)
-            logging.info(f"reload weights from model {self.model_name} ...")
+            self.model.load_model(self.model_name)
+            logging.info(f"reload model {self.model_name} from disk ...")
         except OSError:
-            logging.info('no weights to reload...')
+            logging.info('no local model to load...')
 
     def build_model(self):
         """Build model using input and output shapes from data store."""
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 7c42b9a34c75e332bb18b8ac465486f4eaa189a7..eba1a299b710980fdc6ca0e22c883a151638e324 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -18,11 +18,11 @@ import xarray as xr
 from mlair.configuration import path_config
 from mlair.data_handler import Bootstraps, KerasIterator
 from mlair.helpers.datastore import NameNotFoundInDataStore
-from mlair.helpers import TimeTracking, statistics, extract_value, remove_items, to_list, tables
+from mlair.helpers import TimeTracking, TimeTrackingWrapper, statistics, extract_value, remove_items, to_list, tables
 from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel
 from mlair.model_modules import AbstractModelClass
 from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \
-    PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, \
+    PlotCompetitiveSkillScore, PlotTimeSeries, PlotFeatureImportanceSkillScore, PlotConditionalQuantiles, \
     PlotSeparationOfScales, PlotSampleUncertaintyFromBootstrap
 from mlair.plotting.data_insight_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \
     PlotPeriodogram, PlotDataHistogram
@@ -68,7 +68,7 @@ class PostProcessing(RunEnvironment):
     def __init__(self):
         """Initialise and run post-processing."""
         super().__init__()
-        self.model: keras.Model = self._load_model()
+        self.model: AbstractModelClass = self._load_model()
         self.model_name = self.data_store.get("model_name", "model").rsplit("/", 1)[1].split(".", 1)[0]
         self.ols_model = None
         self.batch_size: int = self.data_store.get_default("batch_size", "model", 64)
@@ -99,6 +99,7 @@ class PostProcessing(RunEnvironment):
         self.uncertainty_estimate_boot_dim = "boots"
         self.model_type_dim = "type"
         self.index_dim = "index"
+        self.model_display_name = self.data_store.get_default("model_display_name", default=self.model.model_name)
         self._run()
 
     def _run(self):
@@ -118,17 +119,17 @@ class PostProcessing(RunEnvironment):
 
         # feature importance bootstraps
         if self.data_store.get("evaluate_feature_importance", "postprocessing"):
-            with TimeTracking(name="calculate feature importance using bootstraps"):
+            with TimeTracking(name="evaluate_feature_importance", log_on_enter=True):
                 create_new_bootstraps = self.data_store.get("create_new_bootstraps", "feature_importance")
                 bootstrap_method = self.data_store.get("bootstrap_method", "feature_importance")
                 bootstrap_type = self.data_store.get("bootstrap_type", "feature_importance")
                 self.calculate_feature_importance(create_new_bootstraps, bootstrap_type=bootstrap_type,
                                                   bootstrap_method=bootstrap_method)
-            if self.feature_importance_skill_scores is not None:
-                self.report_feature_importance_results(self.feature_importance_skill_scores)
+                if self.feature_importance_skill_scores is not None:
+                    self.report_feature_importance_results(self.feature_importance_skill_scores)
 
         # skill scores and error metrics
-        with TimeTracking(name="calculate skill scores"):
+        with TimeTracking(name="calculate_error_metrics", log_on_enter=True):
             skill_score_competitive, _, skill_score_climatological, errors = self.calculate_error_metrics()
             self.skill_scores = (skill_score_competitive, skill_score_climatological)
         self.report_error_metrics(errors)
@@ -138,12 +139,14 @@ class PostProcessing(RunEnvironment):
         # plotting
         self.plot()
 
+    @TimeTrackingWrapper
     def estimate_sample_uncertainty(self, separate_ahead=False):
         """
         Estimate sample uncertainty by using a bootstrap approach. Forecasts are split into individual blocks along time
         and randomly drawn with replacement. The resulting behaviour of the error indicates the robustness of each
         analyzed model to quantify which model might be superior compared to others.
         """
+        logging.info("start estimate_sample_uncertainty")
         n_boots = self.data_store.get_default("n_boots", default=1000, scope="uncertainty_estimate")
         block_length = self.data_store.get_default("block_length", default="1m", scope="uncertainty_estimate")
         evaluate_competitors = self.data_store.get_default("evaluate_competitors", default=True,
@@ -276,7 +279,8 @@ class PostProcessing(RunEnvironment):
         if _iter == 0:
             self.feature_importance_skill_scores = {}
         for boot_type in to_list(bootstrap_type):
-            self.feature_importance_skill_scores[boot_type] = {}
+            if _iter == 0:
+                self.feature_importance_skill_scores[boot_type] = {}
             for boot_method in to_list(bootstrap_method):
                 try:
                     if create_new_bootstraps:
@@ -285,7 +289,7 @@ class PostProcessing(RunEnvironment):
                     boot_skill_score = self.calculate_feature_importance_skill_scores(bootstrap_type=boot_type,
                                                                                       bootstrap_method=boot_method)
                     self.feature_importance_skill_scores[boot_type][boot_method] = boot_skill_score
-                except FileNotFoundError:
+                except (FileNotFoundError, ValueError):
                     if _iter != 0:
                         raise RuntimeError(f"calculate_feature_importance ({boot_type}, {boot_type}) was called for the "
                                            f"2nd time. This means, that something internally goes wrong. Please check "
@@ -302,26 +306,34 @@ class PostProcessing(RunEnvironment):
         These forecasts are saved in bootstrap_path with the names `bootstraps_{var}_{station}.nc` and
         `bootstraps_labels_{station}.nc`.
         """
+
+        def _reshape(d, pos):
+            if isinstance(d, list):
+                return list(map(lambda x: _reshape(x, pos), d))
+            else:
+                return d[..., pos]
+
         # forecast
         with TimeTracking(name=f"{inspect.stack()[0].function} ({bootstrap_type}, {bootstrap_method})"):
             # extract all requirements from data store
             forecast_path = self.data_store.get("forecast_path")
             number_of_bootstraps = self.data_store.get("n_boots", "feature_importance")
-            dims = [self.index_dim, self.ahead_dim, self.model_type_dim]
+            dims = [self.uncertainty_estimate_boot_dim, self.index_dim, self.ahead_dim, self.model_type_dim]
             for station in self.test_data:
                 X, Y = None, None
                 bootstraps = Bootstraps(station, number_of_bootstraps, bootstrap_type=bootstrap_type,
                                         bootstrap_method=bootstrap_method)
+                number_of_bootstraps = bootstraps.number_of_bootstraps
                 for boot in bootstraps:
                     X, Y, (index, dimension) = boot
                     # make bootstrap predictions
-                    bootstrap_predictions = self.model.predict(X)
-                    if isinstance(bootstrap_predictions, list):  # if model is branched model
-                        bootstrap_predictions = bootstrap_predictions[-1]
+                    bootstrap_predictions = [self.model.predict(_reshape(X, pos)) for pos in range(number_of_bootstraps)]
+                    if isinstance(bootstrap_predictions[0], list):  # if model is branched model
+                        bootstrap_predictions = list(map(lambda x: x[-1], bootstrap_predictions))
                     # save bootstrap predictions separately for each station and variable combination
-                    bootstrap_predictions = np.expand_dims(bootstrap_predictions, axis=-1)
-                    shape = bootstrap_predictions.shape
-                    coords = (range(shape[0]), range(1, shape[1] + 1))
+                    bootstrap_predictions = list(map(lambda x: np.expand_dims(x, axis=-1), bootstrap_predictions))
+                    shape = bootstrap_predictions[0].shape
+                    coords = (range(number_of_bootstraps), range(shape[0]), range(1, shape[1] + 1))
                     var = f"{index}_{dimension}" if index is not None else str(dimension)
                     tmp = xr.DataArray(bootstrap_predictions, coords=(*coords, [var]), dims=dims)
                     file_name = os.path.join(forecast_path,
@@ -329,9 +341,9 @@ class PostProcessing(RunEnvironment):
                     tmp.to_netcdf(file_name)
                 else:
                     # store also true labels for each station
-                    labels = np.expand_dims(Y, axis=-1)
+                    labels = np.expand_dims(Y[..., 0], axis=-1)
                     file_name = os.path.join(forecast_path, f"bootstraps_{station}_{bootstrap_method}_labels.nc")
-                    labels = xr.DataArray(labels, coords=(*coords, [self.observation_indicator]), dims=dims)
+                    labels = xr.DataArray(labels, coords=(*coords[1:], [self.observation_indicator]), dims=dims[1:])
                     labels.to_netcdf(file_name)
 
     def calculate_feature_importance_skill_scores(self, bootstrap_type, bootstrap_method) -> Dict[str, xr.DataArray]:
@@ -362,16 +374,13 @@ class PostProcessing(RunEnvironment):
                 file_name = os.path.join(forecast_path, f"bootstraps_{str(station)}_{bootstrap_method}_labels.nc")
                 with xr.open_dataarray(file_name) as da:
                     labels = da.load()
-                shape = labels.shape
 
                 # get original forecasts
-                orig = self.get_orig_prediction(forecast_path, forecast_file % str(station), number_of_bootstraps)
-                orig = orig.reshape(shape)
-                coords = (range(shape[0]), range(1, shape[1] + 1), [reference_name])
-                orig = xr.DataArray(orig, coords=coords, dims=[self.index_dim, self.ahead_dim, self.model_type_dim])
+                orig = self.get_orig_prediction(forecast_path, forecast_file % str(station), reference_name=reference_name)
+                orig.coords[self.index_dim] = labels.coords[self.index_dim]
 
                 # calculate skill scores for each variable
-                skill = pd.DataFrame(columns=range(1, self.window_lead_time + 1))
+                skill = []
                 for boot_set in bootstrap_iter:
                     boot_var = f"{boot_set[0]}_{boot_set[1]}" if isinstance(boot_set, tuple) else str(boot_set)
                     file_name = os.path.join(forecast_path,
@@ -384,33 +393,39 @@ class PostProcessing(RunEnvironment):
                         data = boot_data.sel({self.ahead_dim: ahead})
                         boot_scores.append(
                             skill_scores.general_skill_score(data, forecast_name=boot_var,
-                                                             reference_name=reference_name))
-                    skill.loc[boot_var] = np.array(boot_scores)
+                                                             reference_name=reference_name, dim=self.index_dim))
+                    tmp = xr.DataArray(np.expand_dims(np.array(boot_scores), axis=-1),
+                                       coords={self.ahead_dim: range(1, self.window_lead_time + 1),
+                                               self.uncertainty_estimate_boot_dim: range(number_of_bootstraps),
+                                               self.boot_var_dim: [boot_var]},
+                                       dims=[self.ahead_dim, self.uncertainty_estimate_boot_dim, self.boot_var_dim])
+                    skill.append(tmp)
 
                 # collect all results in single dictionary
-                score[str(station)] = xr.DataArray(skill, dims=[self.boot_var_dim, self.ahead_dim])
+                score[str(station)] = xr.concat(skill, dim=self.boot_var_dim)
             return score
 
-    def get_orig_prediction(self, path, file_name, number_of_bootstraps, prediction_name=None):
+    def get_orig_prediction(self, path, file_name, prediction_name=None, reference_name=None):
         if prediction_name is None:
             prediction_name = self.forecast_indicator
         file = os.path.join(path, file_name)
         with xr.open_dataarray(file) as da:
-            prediction = da.load().sel(type=prediction_name).squeeze()
-        return self.repeat_data(prediction, number_of_bootstraps)
+            prediction = da.load().sel({self.model_type_dim: [prediction_name]})
+        if reference_name is not None:
+            prediction.coords[self.model_type_dim] = [reference_name]
+        return prediction.dropna(dim=self.index_dim)
 
     @staticmethod
     def repeat_data(data, number_of_repetition):
         if isinstance(data, xr.DataArray):
             data = data.data
-        vals = np.tile(data, (number_of_repetition, 1))
-        return vals[~np.isnan(vals).any(axis=1), :]
+        return np.repeat(np.expand_dims(data, axis=-1), number_of_repetition, axis=-1)
 
     def _get_model_name(self):
         """Return model name without path information."""
         return self.data_store.get("model_name", "model").rsplit("/", 1)[1].split(".", 1)[0]
 
-    def _load_model(self) -> keras.models:
+    def _load_model(self) -> AbstractModelClass:
         """
         Load NN model either from data store or from local path.
 
@@ -421,8 +436,8 @@ class PostProcessing(RunEnvironment):
         except NameNotFoundInDataStore:
             logging.info("No model was saved in data store. Try to load model from experiment path.")
             model_name = self.data_store.get("model_name", "model")
-            model_class: AbstractModelClass = self.data_store.get("model", "model")
-            model = keras.models.load_model(model_name, custom_objects=model_class.custom_objects)
+            model: AbstractModelClass = self.data_store.get("model", "model")
+            model.load_model(model_name)
         return model
 
     # noinspection PyBroadException
@@ -466,25 +481,28 @@ class PostProcessing(RunEnvironment):
                           f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
         try:
-            if (self.feature_importance_skill_scores is not None) and ("PlotBootstrapSkillScore" in plot_list):
+            if (self.feature_importance_skill_scores is not None) and ("PlotFeatureImportanceSkillScore" in plot_list):
                 for boot_type, boot_data in self.feature_importance_skill_scores.items():
                     for boot_method, boot_skill_score in boot_data.items():
                         try:
-                            PlotBootstrapSkillScore(boot_skill_score, plot_folder=self.plot_path,
-                                                    model_setup=self.forecast_indicator, sampling=self._sampling,
-                                                    ahead_dim=self.ahead_dim, separate_vars=to_list(self.target_var),
-                                                    bootstrap_type=boot_type, bootstrap_method=boot_method)
+                            PlotFeatureImportanceSkillScore(
+                                boot_skill_score, plot_folder=self.plot_path, model_name=self.model_display_name,
+                                sampling=self._sampling, ahead_dim=self.ahead_dim,
+                                separate_vars=to_list(self.target_var), bootstrap_type=boot_type,
+                                bootstrap_method=boot_method)
                         except Exception as e:
-                            logging.error(f"Could not create plot PlotBootstrapSkillScore ({boot_type}, {boot_method}) "
-                                          f"due to the following error:\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n"
-                                          f"{sys.exc_info()[2]}")
+                            logging.error(f"Could not create plot PlotFeatureImportanceSkillScore ({boot_type}, "
+                                          f"{boot_method}) due to the following error:\n{sys.exc_info()[0]}\n"
+                                          f"{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
         except Exception as e:
-            logging.error(f"Could not create plot PlotBootstrapSkillScore due to the following error: {e}")
+            logging.error(f"Could not create plot PlotFeatureImportanceSkillScore due to the following error: {e}")
 
         try:
             if "PlotConditionalQuantiles" in plot_list:
-                PlotConditionalQuantiles(self.test_data.keys(), data_pred_path=path, plot_folder=self.plot_path)
+                PlotConditionalQuantiles(self.test_data.keys(), data_pred_path=path, plot_folder=self.plot_path,
+                                         forecast_indicator=self.forecast_indicator,
+                                         obs_indicator=self.observation_indicator)
         except Exception as e:
             logging.error(f"Could not create plot PlotConditionalQuantiles due to the following error:"
                           f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
@@ -500,9 +518,9 @@ class PostProcessing(RunEnvironment):
         try:
             if "PlotClimatologicalSkillScore" in plot_list:
                 PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path,
-                                             model_setup=self.forecast_indicator)
+                                             model_name=self.model_display_name)
                 PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path, score_only=False,
-                                             extra_name_tag="all_terms_", model_setup=self.forecast_indicator)
+                                             extra_name_tag="all_terms_", model_name=self.model_display_name)
         except Exception as e:
             logging.error(f"Could not create plot PlotClimatologicalSkillScore due to the following error: {e}"
                           f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
@@ -510,8 +528,7 @@ class PostProcessing(RunEnvironment):
         try:
             if "PlotCompetitiveSkillScore" in plot_list:
                 PlotCompetitiveSkillScore(self.skill_scores[0], plot_folder=self.plot_path,
-                                          model_setup=self.forecast_indicator, sampling=self._sampling,
-                                          model_name_for_plots=self.model_name_for_plots)
+                                          model_setup=self.model_display_name)
         except Exception as e:
             logging.error(f"Could not create plot PlotCompetitiveSkillScore due to the following error: {e}"
                           f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
@@ -581,11 +598,12 @@ class PostProcessing(RunEnvironment):
 
         try:
             if "PlotSampleUncertaintyFromBootstrap" in plot_list and self.uncertainty_estimate is not None:
-                block_length= self.data_store.get_default("block_length", default="1m", scope="uncertainty_estimate")
+                block_length = self.data_store.get_default("block_length", default="1m", scope="uncertainty_estimate")
                 PlotSampleUncertaintyFromBootstrap(
                     data=self.uncertainty_estimate, plot_folder=self.plot_path, model_type_dim=self.model_type_dim,
                     dim_name_boots=self.uncertainty_estimate_boot_dim, error_measure="mean squared error",
-                    error_unit=r"ppb$^2$", block_length=block_length)
+                    error_unit=r"ppb$^2$", block_length=block_length, model_name=self.model_display_name,
+                    model_indicator=self.forecast_indicator)
         except Exception as e:
             logging.error(f"Could not create plot PlotSampleUncertaintyFromBootstrap due to the following error: {e}"
                           f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
@@ -898,6 +916,8 @@ class PostProcessing(RunEnvironment):
 
             # test errors
             if external_data is not None:
+                external_data.coords[self.model_type_dim] = [{self.forecast_indicator: self.model_display_name}.get(n, n)
+                                                              for n in external_data.coords[self.model_type_dim].values]
                 model_type_list = external_data.coords[self.model_type_dim].values.tolist()
                 for model_type in remove_items(model_type_list, self.observation_indicator):
                     if model_type not in errors.keys():
@@ -969,15 +989,17 @@ class PostProcessing(RunEnvironment):
         """Create a csv file containing all results from feature importance."""
         report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
         path_config.check_path_and_create(report_path)
-        res = [[self.model_type_dim, "method", "station", self.boot_var_dim, self.ahead_dim, "vals"]]
+        res = []
         for boot_type, d0 in results.items():
             for boot_method, d1 in d0.items():
                 for station_name, vals in d1.items():
                     for boot_var in vals.coords[self.boot_var_dim].values.tolist():
                         for ahead in vals.coords[self.ahead_dim].values.tolist():
                             res.append([boot_type, boot_method, station_name, boot_var, ahead,
-                                        float(vals.sel({self.boot_var_dim: boot_var, self.ahead_dim: ahead}))])
-        col_names = res.pop(0)
+                                        *vals.sel({self.boot_var_dim: boot_var,
+                                                   self.ahead_dim: ahead}).values.round(5).tolist()])
+        col_names = [self.model_type_dim, "method", "station", self.boot_var_dim, self.ahead_dim,
+                     *list(range(len(res[0]) - 5))]
         df = pd.DataFrame(res, columns=col_names)
         file_name = "feature_importance_skill_score_report_raw.csv"
         df.to_csv(os.path.join(report_path, file_name), sep=";")
@@ -1012,8 +1034,8 @@ class PostProcessing(RunEnvironment):
                     df.reindex(df.index.drop(["total"]).to_list() + ["total"], )
                 column_format = tables.create_column_format_for_tex(df)
                 if model_type == "skill_score":
-                    file_name = f"error_report_{model_type}_{metric}.%s".replace(' ', '_')
+                    file_name = f"error_report_{model_type}_{metric}.%s".replace(' ', '_').replace('/', '_')
                 else:
-                    file_name = f"error_report_{metric}_{model_type}.%s".replace(' ', '_')
+                    file_name = f"error_report_{metric}_{model_type}.%s".replace(' ', '_').replace('/', '_')
                 tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df)
                 tables.save_to_md(report_path, file_name % "md", df=df)
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index f0192e469c0a52a68fc5b29a5254583d5207cc32..302eeb03e8c43ab316c4938b214d28ac0b1bd196 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -270,6 +270,7 @@ class PreProcessing(RunEnvironment):
                     collection.add(dh)
                     valid_stations.append(s)
             pool.close()
+            pool.join()
         else:  # serial solution
             logging.info("use serial validate station approach")
             kwargs.update({"return_strategy": "result"})
diff --git a/mlair/run_modules/training.py b/mlair/run_modules/training.py
index 0696c2e7b8daa75925cf16096e183de94c21fe85..8d82afb4c002c660e6fb966945b2e383007d5b70 100644
--- a/mlair/run_modules/training.py
+++ b/mlair/run_modules/training.py
@@ -70,7 +70,7 @@ class Training(RunEnvironment):
         self.model: keras.Model = self.data_store.get("model", "model")
         self.train_set: Union[KerasIterator, None] = None
         self.val_set: Union[KerasIterator, None] = None
-        self.test_set: Union[KerasIterator, None] = None
+        # self.test_set: Union[KerasIterator, None] = None
         self.batch_size = self.data_store.get("batch_size")
         self.epochs = self.data_store.get("epochs")
         self.callbacks: CallbackHandler = self.data_store.get("callbacks", "model")
@@ -81,9 +81,9 @@ class Training(RunEnvironment):
 
     def _run(self) -> None:
         """Run training. Details in class description."""
-        self.set_generators()
         self.make_predict_function()
         if self._train_model:
+            self.set_generators()
             self.train()
             self.save_model()
             self.report_training()
@@ -118,7 +118,9 @@ class Training(RunEnvironment):
         The called sub-method will automatically distribute the data according to the batch size. The subsets can be
         accessed as class variables train_set, val_set, and test_set.
         """
-        for mode in ["train", "val", "test"]:
+        logging.info("set generators for training and validation")
+        # for mode in ["train", "val", "test"]:
+        for mode in ["train", "val"]:
             self._set_gen(mode)
 
     def train(self) -> None:
@@ -149,7 +151,7 @@ class Training(RunEnvironment):
             logging.info("Found locally stored model and checkpoints. Training is resumed from the last checkpoint.")
             self.callbacks.load_callbacks()
             self.callbacks.update_checkpoint()
-            self.model = keras.models.load_model(checkpoint.filepath)
+            self.model.load_model(checkpoint.filepath, compile=True)
             hist: History = self.callbacks.get_callback_by_name("hist")
             initial_epoch = max(hist.epoch) + 1
             _ = self.model.fit(self.train_set,
@@ -179,6 +181,7 @@ class Training(RunEnvironment):
         model_name = self.data_store.get("model_name", "model")
         logging.debug(f"save best model to {model_name}")
         self.model.save(model_name, save_format='h5')
+        self.model.save(model_name)
         self.data_store.set("best_model", self.model)
 
     def load_best_model(self, name: str) -> None:
@@ -189,8 +192,8 @@ class Training(RunEnvironment):
         """
         logging.debug(f"load best model: {name}")
         try:
-            self.model.load_weights(name)
-            logging.info('reload weights...')
+            self.model.load_model(name, compile=True)
+            logging.info('reload model...')
         except OSError:
             logging.info('no weights to reload...')
 
@@ -235,9 +238,11 @@ class Training(RunEnvironment):
         if multiple_branches_used:
             filename = os.path.join(path, f"{name}_history_main_loss.pdf")
             PlotModelHistory(filename=filename, history=history, main_branch=True)
-        if len([e for e in history.model.metrics_names if "mean_squared_error" in e]) > 0:
+        mse_indicator = list(set(history.model.metrics_names).intersection(["mean_squared_error", "mse"]))
+        if len(mse_indicator) > 0:
             filename = os.path.join(path, f"{name}_history_main_mse.pdf")
-            PlotModelHistory(filename=filename, history=history, plot_metric="mse", main_branch=multiple_branches_used)
+            PlotModelHistory(filename=filename, history=history, plot_metric=mse_indicator[0],
+                             main_branch=multiple_branches_used)
 
         # plot learning rate
         if lr_sc:
diff --git a/requirements.txt b/requirements.txt
index 8d21c80db974033c94985821564e26cbb4aa8088..3afc17b67fddbf5a269df1e1b7e103045630a290 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,23 +1,34 @@
-## this list was generated using pipreqs on mlair/ directory
 astropy==4.1
 auto_mix_prep==0.2.0
 Cartopy==0.18.0
+dask==2021.3.0
 dill==0.3.3
+fsspec==2021.11.0
 keras==2.6.0
 keras_nightly==2.5.0.dev2021032900
+locket==0.2.1
 matplotlib==3.3.4
 mock==4.0.3
+netcdf4==1.5.8
 numpy==1.19.5
 pandas==1.1.5
+partd==1.2.0
 psutil==5.8.0
+pydot==1.4.2
 pytest==6.2.2
+pytest-cov==2.11.1
+pytest-html==3.1.1
+pytest-lazy-fixture==0.6.3
 requests==2.25.1
 scipy==1.5.2
 seaborn==0.11.1
 setuptools==47.1.0
+--no-binary shapely Shapely==1.8.0
 six==1.15.0
 statsmodels==0.12.2
+tabulate==0.8.9
 tensorflow==2.5.0
+toolz==0.11.2
 typing_extensions==3.7.4.3
 wget==3.2
 xarray==0.16.2
diff --git a/requirements_vm_local.txt b/requirements_vm_local.txt
deleted file mode 100644
index d57cfb8e0b75055e187816b9922f72ac510cbd7d..0000000000000000000000000000000000000000
--- a/requirements_vm_local.txt
+++ /dev/null
@@ -1,103 +0,0 @@
-absl-py==0.11.0
-appdirs==1.4.4
-astor==0.8.1
-astropy==4.1
-astunparse==1.6.3
-attrs==20.3.0
-Bottleneck==1.3.2
-cached-property==1.5.2
-cachetools==4.2.4
-Cartopy==0.18.0
-certifi==2020.12.5
-cftime==1.4.1
-chardet==4.0.0
-click==8.0.3
-cloudpickle==2.0.0
-coverage==5.4
-cycler==0.10.0
-dask==2021.10.0
-dill==0.3.3
-distributed==2021.10.0
-flatbuffers==1.12
-fsspec==0.8.5
-gast==0.4.0
-google-auth==2.3.0
-google-auth-oauthlib==0.4.6
-google-pasta==0.2.0
-greenlet==1.1.2
-grpcio==1.34.0
-h5py==3.1.0
-HeapDict==1.0.1
-idna==2.10
-importlib-metadata==3.4.0
-iniconfig==1.1.1
-Jinja2==3.0.2
-joblib==1.1.0
-keras-nightly==2.5.0.dev2021032900
-Keras-Preprocessing==1.1.2
-kiwisolver==1.3.1
-locket==0.2.1
-Markdown==3.3.3
-MarkupSafe==2.0.1
-matplotlib==3.3.4
-mock==4.0.3
-msgpack==1.0.2
-netCDF4==1.5.5.1
-numpy==1.19.5
-oauthlib==3.1.1
-opt-einsum==3.3.0
-ordered-set==4.0.2
-packaging==20.9
-pandas==1.1.5
-partd==1.1.0
-patsy==0.5.1
-Pillow==8.1.0
-pluggy==0.13.1
-protobuf==3.15.0
-psutil==5.8.0
-py==1.10.0
-pyasn1==0.4.8
-pyasn1-modules==0.2.8
-pydot==1.4.2
-pyparsing==2.4.7
-pyshp==2.1.3
-pytest==6.2.2
-pytest-cov==2.11.1
-pytest-html==3.1.1
-pytest-lazy-fixture==0.6.3
-pytest-metadata==1.11.0
-pytest-sugar==0.9.4
-python-dateutil==2.8.1
-pytz==2021.1
-PyYAML==5.4.1
-requests==2.25.1
-requests-oauthlib==1.3.0
-rsa==4.7.2
-scikit-learn==1.0.1
-scipy==1.5.2
-seaborn==0.11.1
-Shapely==1.7.1
-six==1.15.0
-sortedcontainers==2.4.0
-SQLAlchemy==1.4.26
-statsmodels==0.12.2
-tabulate==0.8.8
-tblib==1.7.0
-tensorboard==2.7.0
-tensorboard-data-server==0.6.1
-tensorboard-plugin-wit==1.8.0
-tensorflow==2.5.0
-tensorflow-estimator==2.5.0
-termcolor==1.1.0
-threadpoolctl==3.0.0
-toml==0.10.2
-toolz==0.11.1
-tornado==6.1
-typing-extensions==3.7.4.3
-urllib3==1.26.3
-Werkzeug==1.0.1
-wget==3.2
-wrapt==1.12.1
-xarray==0.16.2
-zict==2.0.0
-zipp==3.4.0
diff --git a/run.py b/run.py
index 5324e55a09b004352c4e35f23f5e2ea21a7451d6..82bb0e2814d403b5be602eaebd1bc44b6cf6d6f9 100644
--- a/run.py
+++ b/run.py
@@ -30,7 +30,6 @@ def main(parser_args):
         train_model=False, create_new_model=True, network="UBA",
         evaluate_feature_importance=False,  # plot_list=["PlotCompetitiveSkillScore"],
         competitors=["test_model", "test_model2"],
-
         competitor_path=os.path.join(os.getcwd(), "data", "comp_test"),
         **parser_args.__dict__, start_script=__file__)
     workflow.run()
diff --git a/run_mixed_sampling.py b/run_mixed_sampling.py
index 784f653fbfb2eb4c78e6e858acf67cd0ae47a593..47aa9b970c0e95ccadb60e8c090136c0fa6ceea4 100644
--- a/run_mixed_sampling.py
+++ b/run_mixed_sampling.py
@@ -4,8 +4,8 @@ __date__ = '2019-11-14'
 import argparse
 
 from mlair.workflows import DefaultWorkflow
-from mlair.data_handler.data_handler_mixed_sampling import DataHandlerMixedSampling, DataHandlerMixedSamplingWithFilter, \
-    DataHandlerSeparationOfScales
+from mlair.data_handler.data_handler_mixed_sampling import DataHandlerMixedSampling
+
 
 stats = {'o3': 'dma8eu', 'no': 'dma8eu', 'no2': 'dma8eu',
          'relhum': 'average_values', 'u': 'average_values', 'v': 'average_values',
@@ -20,7 +20,7 @@ data_origin = {'o3': '', 'no': '', 'no2': '',
 def main(parser_args):
     args = dict(stations=["DEBW107", "DEBW013"],
                 network="UBA",
-                evaluate_feature_importance=False, plot_list=[],
+                evaluate_feature_importance=True, # plot_list=[],
                 data_origin=data_origin, data_handler=DataHandlerMixedSampling,
                 interpolation_limit=(3, 1), overwrite_local_data=False,
                 sampling=("hourly", "daily"),
@@ -28,8 +28,6 @@ def main(parser_args):
                 create_new_model=True, train_model=False, epochs=1,
                 window_history_size=6 * 24 + 16,
                 window_history_offset=16,
-                kz_filter_length=[100 * 24, 15 * 24],
-                kz_filter_iter=[4, 5],
                 start="2006-01-01",
                 train_start="2006-01-01",
                 end="2011-12-31",
diff --git a/test/test_configuration/test_defaults.py b/test/test_configuration/test_defaults.py
index 8644181185203186bb6c8549e8faa99e75a31a81..f6bc6d24724c2620083602d3864bcbca0a709681 100644
--- a/test/test_configuration/test_defaults.py
+++ b/test/test_configuration/test_defaults.py
@@ -72,8 +72,6 @@ class TestAllDefaults:
         assert DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE == "singleinput"
         assert DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_METHOD == "shuffle"
         assert DEFAULT_PLOT_LIST == ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore",
-                                     "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore",
+                                     "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotFeatureImportanceSkillScore",
                                      "PlotConditionalQuantiles", "PlotAvailability", "PlotAvailabilityHistogram",
                                      "PlotDataHistogram", "PlotPeriodogram", "PlotSampleUncertaintyFromBootstrap"]
-
-
diff --git a/test/test_run_modules/test_training.py b/test/test_run_modules/test_training.py
index 9d633a348bd1e24cd3f3abcdb83124f6107db2e9..1b83b3823519d63d5dcbc10f0e31fc3433f98f34 100644
--- a/test/test_run_modules/test_training.py
+++ b/test/test_run_modules/test_training.py
@@ -1,8 +1,12 @@
+import copy
 import glob
 import json
+import time
+
 import logging
 import os
 import shutil
+from typing import Callable
 
 import tensorflow.keras as keras
 import mock
@@ -11,6 +15,7 @@ from tensorflow.keras.callbacks import History
 
 from mlair.data_handler import DataCollection, KerasIterator, DefaultDataHandler
 from mlair.helpers import PyTestRegex
+from mlair.model_modules.fully_connected_networks import FCN
 from mlair.model_modules.flatten import flatten_tail
 from mlair.model_modules.inception_model import InceptionModelBase
 from mlair.model_modules.keras_extensions import LearningRateDecay, HistoryAdvanced, CallbackHandler, EpoTimingCallback
@@ -76,10 +81,24 @@ class TestTraining:
         obj.data_store.set("plot_path", path_plot, "general")
         obj._train_model = True
         obj._create_new_model = False
-        yield obj
-        if os.path.exists(path):
-            shutil.rmtree(path)
-        RunEnvironment().__del__()
+        try:
+            yield obj
+        finally:
+            if os.path.exists(path):
+                shutil.rmtree(path)
+            try:
+                RunEnvironment().__del__()
+            except AssertionError:
+                pass
+        # try:
+        #     yield obj
+        # finally:
+        #     if os.path.exists(path):
+        #         shutil.rmtree(path)
+        #     try:
+        #         RunEnvironment().__del__()
+        #     except AssertionError:
+        #         pass
 
     @pytest.fixture
     def learning_rate(self):
@@ -144,7 +163,7 @@ class TestTraining:
     @pytest.fixture
     def model(self, window_history_size, window_lead_time, statistics_per_var):
         channels = len(list(statistics_per_var.keys()))
-        return my_test_model(keras.layers.PReLU, window_history_size, channels, window_lead_time, 0.1, False)
+        return FCN([(window_history_size + 1, 1, channels)], [window_lead_time])
 
     @pytest.fixture
     def callbacks(self, path):
@@ -174,7 +193,8 @@ class TestTraining:
         obj.data_store.set("data_collection", data_collection, "general.train")
         obj.data_store.set("data_collection", data_collection, "general.val")
         obj.data_store.set("data_collection", data_collection, "general.test")
-        obj.model.compile(optimizer=keras.optimizers.SGD(), loss=keras.losses.mean_absolute_error)
+        obj.model.compile(**obj.model.compile_options)
+        keras.utils.get_custom_objects().update(obj.model.custom_objects)
         return obj
 
     @pytest.fixture
@@ -209,6 +229,57 @@ class TestTraining:
         if os.path.exists(path):
             shutil.rmtree(path)
 
+    @staticmethod
+    def create_training_obj(epochs, path, data_collection, batch_path, model_path,
+                            statistics_per_var, window_history_size, window_lead_time) -> Training:
+
+        channels = len(list(statistics_per_var.keys()))
+        model = FCN([(window_history_size + 1, 1, channels)], [window_lead_time])
+
+        obj = object.__new__(Training)
+        super(Training, obj).__init__()
+        obj.model = model
+        obj.train_set = None
+        obj.val_set = None
+        obj.test_set = None
+        obj.batch_size = 256
+        obj.epochs = epochs
+
+        clbk = CallbackHandler()
+        hist = HistoryAdvanced()
+        epo_timing = EpoTimingCallback()
+        clbk.add_callback(hist, os.path.join(path, "hist_checkpoint.pickle"), "hist")
+        lr = LearningRateDecay()
+        clbk.add_callback(lr, os.path.join(path, "lr_checkpoint.pickle"), "lr")
+        clbk.add_callback(epo_timing, os.path.join(path, "epo_timing.pickle"), "epo_timing")
+        clbk.create_model_checkpoint(filepath=os.path.join(path, "model_checkpoint"), monitor='val_loss',
+                                     save_best_only=True)
+        obj.callbacks = clbk
+        obj.lr_sc = lr
+        obj.hist = hist
+        obj.experiment_name = "TestExperiment"
+        obj.data_store.set("data_collection", data_collection, "general.train")
+        obj.data_store.set("data_collection", data_collection, "general.val")
+        obj.data_store.set("data_collection", data_collection, "general.test")
+        if not os.path.exists(path):
+            os.makedirs(path)
+        obj.data_store.set("experiment_path", path, "general")
+        os.makedirs(batch_path, exist_ok=True)
+        obj.data_store.set("batch_path", batch_path, "general")
+        os.makedirs(model_path, exist_ok=True)
+        obj.data_store.set("model_path", model_path, "general")
+        obj.data_store.set("model_name", os.path.join(model_path, "test_model.h5"), "general.model")
+        obj.data_store.set("experiment_name", "TestExperiment", "general")
+
+        path_plot = os.path.join(path, "plots")
+        os.makedirs(path_plot, exist_ok=True)
+        obj.data_store.set("plot_path", path_plot, "general")
+        obj._train_model = True
+        obj._create_new_model = False
+
+        obj.model.compile(**obj.model.compile_options)
+        return obj
+
     def test_init(self, ready_to_init):
         assert isinstance(Training(), Training)  # just test, if nothing fails
 
@@ -223,9 +294,10 @@ class TestTraining:
         assert ready_to_run._run() is None  # just test, if nothing fails
 
     def test_make_predict_function(self, init_without_run):
-        assert hasattr(init_without_run.model, "predict_function") is False
+        assert hasattr(init_without_run.model, "predict_function") is True
+        assert init_without_run.model.predict_function is None
         init_without_run.make_predict_function()
-        assert hasattr(init_without_run.model, "predict_function")
+        assert isinstance(init_without_run.model.predict_function, Callable)
 
     def test_set_gen(self, init_without_run):
         assert init_without_run.train_set is None
@@ -234,7 +306,7 @@ class TestTraining:
         assert init_without_run.train_set._collection.return_value == "mock_train_gen"
 
     def test_set_generators(self, init_without_run):
-        sets = ["train", "val", "test"]
+        sets = ["train", "val"]
         assert all([getattr(init_without_run, f"{obj}_set") is None for obj in sets])
         init_without_run.set_generators()
         assert not all([getattr(init_without_run, f"{obj}_set") is None for obj in sets])
@@ -242,10 +314,10 @@ class TestTraining:
             [getattr(init_without_run, f"{obj}_set")._collection.return_value == f"mock_{obj}_gen" for obj in sets])
 
     def test_train(self, ready_to_train, path):
-        assert not hasattr(ready_to_train.model, "history")
+        assert ready_to_train.model.history is None
         assert len(glob.glob(os.path.join(path, "plots", "TestExperiment_history_*.pdf"))) == 0
         ready_to_train.train()
-        assert list(ready_to_train.model.history.history.keys()) == ["val_loss", "loss"]
+        assert sorted(list(ready_to_train.model.history.history.keys())) == ["loss", "val_loss"]
         assert ready_to_train.model.history.epoch == [0, 1]
         assert len(glob.glob(os.path.join(path, "plots", "TestExperiment_history_*.pdf"))) == 2
 
@@ -260,8 +332,8 @@ class TestTraining:
 
     def test_load_best_model_no_weights(self, init_without_run, caplog):
         caplog.set_level(logging.DEBUG)
-        init_without_run.load_best_model("notExisting")
-        assert caplog.record_tuples[0] == ("root", 10, PyTestRegex("load best model: notExisting"))
+        init_without_run.load_best_model("notExisting.h5")
+        assert caplog.record_tuples[0] == ("root", 10, PyTestRegex("load best model: notExisting.h5"))
         assert caplog.record_tuples[1] == ("root", 20, PyTestRegex("no weights to reload..."))
 
     def test_save_callbacks_history_created(self, init_without_run, history, learning_rate, epo_timing, model_path):
@@ -290,3 +362,14 @@ class TestTraining:
         history.model.metrics_names = mock.MagicMock(return_value=["loss", "mean_squared_error"])
         init_without_run.create_monitoring_plots(history, learning_rate)
         assert len(glob.glob(os.path.join(path, "plots", "TestExperiment_history_*.pdf"))) == 2
+
+    def test_resume_training1(self, path: str, model_path, batch_path, data_collection, statistics_per_var,
+                              window_history_size, window_lead_time):
+
+        obj_1st = self.create_training_obj(4, path, data_collection, batch_path, model_path, statistics_per_var,
+                                           window_history_size, window_lead_time)
+        keras.utils.get_custom_objects().update(obj_1st.model.custom_objects)
+        assert obj_1st._run() is None
+        obj_2nd = self.create_training_obj(8, path, data_collection, batch_path, model_path, statistics_per_var,
+                                           window_history_size, window_lead_time)
+        assert obj_2nd._run() is None