diff --git a/.gitlab/issue_templates/release.md b/.gitlab/issue_templates/release.md
index a95cf033eed919339c6c1734638542c3e0cdbc57..c8289cdd1be4ab02a61c6feb0db9db7cb6ca40d3 100644
--- a/.gitlab/issue_templates/release.md
+++ b/.gitlab/issue_templates/release.md
@@ -14,6 +14,7 @@ vX.Y.Z
 * [ ] Adjust `changelog.md` (see template for changelog)
 * [ ] Update version number in `mlair/__ init__.py`
 * [ ] Create new dist file: `python3 setup.py sdist bdist_wheel`
+* [ ] Add new dist file `mlair-X.Y.Z-py3-none-any.whl` to git
 * [ ] Update file link `distribution file (current version)` in `README.md`
 * [ ] Update file link in `docs/_source/installation.rst`
 * [ ] Commit + push
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b11a169c854465c5ea932f00f5da5a1688df7c18..34795b8333df846d5383fc2d8eca4b40517aab73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,25 @@
 # Changelog
 All notable changes to this project will be documented in this file.
 
-## v1.4.0 -  2021-07-27  - <release description>
+## v1.5.0 -  2021-11-11  - new uncertainty estimation
+
+### general:
+* introduces method to estimate sample uncertainty
+* improved multiprocessing
+* last release with tensorflow v1 support
+
+### new features:
+* test set sample uncertainty estmation during postprocessing (#333)
+* support of Kolmogorov Zurbenko filter for data handlers with filters (#334)
+
+### technical:
+* new communication scheme for multiprocessing (#321, #322)
+* improved error reporting (#323)
+* feature importance returns now unaggregated results (#335)
+* error metrics are reported for all competitors (#332)
+* minor bugfixes and refacs (#330, #326, #329, #325, #324, #320, #337)
+
+## v1.4.0 -  2021-07-27  - new model classes and data handlers, improved usability and transparency 
 
 ### general:
 * many technical adjustments to improve usability and transparency of MLAir
diff --git a/README.md b/README.md
index 6d07ecec3bd1ab76f6e29dc969e4339e5521593b..1baf4465a7ad4d55476fec1f4ed8d45a7a531386 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ HPC systems, see [here](#special-instructions-for-installation-on-jülich-hpc-sy
 * Installation of **MLAir**:
     * Either clone MLAir from the [gitlab repository](https://gitlab.version.fz-juelich.de/toar/mlair.git) 
       and use it without installation (beside the requirements) 
-    * or download the distribution file ([current version](https://gitlab.version.fz-juelich.de/toar/mlair/-/blob/master/dist/mlair-1.4.0-py3-none-any.whl)) 
+    * or download the distribution file ([current version](https://gitlab.version.fz-juelich.de/toar/mlair/-/blob/master/dist/mlair-1.5.0-py3-none-any.whl)) 
       and install it via `pip install <dist_file>.whl`. In this case, you can simply import MLAir in any python script 
       inside your virtual environment using `import mlair`.
 * (tf) Currently, TensorFlow-1.13 is mentioned in the requirements. We already tested the TensorFlow-1.15 version and couldn't
@@ -518,3 +518,22 @@ add it to `src/join_settings.py` in the hourly data section. Replace the `TOAR_S
 value. To make sure, that this **sensitive** data is not uploaded to the remote server, use the following command to
 prevent git from tracking this file: `git update-index --assume-unchanged src/join_settings.py`
 
+
+## Known Issues
+
+### Problem with multiprocessing
+
+* cpython and python's native multiprocessing can crash when using the multiprocessing approach for preprocessing. This 
+is caused by an internal limitation in order of 2GB. When using long periods and therefore very big data, 
+multiprocessing is not able to handle these data correctly:
+```shell
+File "mlair/mlair/run_modules/pre_processing.py", line X, in validate_station
+  dh, s = p.get()
+File "multiprocessing/pool.py", line 644, in get
+  raise self._value
+multiprocessing.pool.MaybeEncodingError: Error sending result: '(DEMV012, 'DEMV012')'. Reason: 'error("'i' format requires -2147483648 <= number <= 2147483647",)'
+```
+* to solve this issue, either update your python version to >=3.8 (warning, this version is not tested with MLAir) or 
+apply the patch that is applied in this commit 
+https://github.com/python/cpython/commit/bccacd19fa7b56dcf2fbfab15992b6b94ab6666b or as proposed in this comment 
+https://stackoverflow.com/questions/47776486/python-struct-error-i-format-requires-2147483648-number-2147483647
diff --git a/dist/mlair-1.5.0-py3-none-any.whl b/dist/mlair-1.5.0-py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..34495d960b009737fb40bec6dfe3a96effd14c02
Binary files /dev/null and b/dist/mlair-1.5.0-py3-none-any.whl differ
diff --git a/docs/_source/installation.rst b/docs/_source/installation.rst
index 27543ac109439e487756cc211ecc47be946c586c..c87e64b217b4207185cfc662fdf00d2f7e891cc5 100644
--- a/docs/_source/installation.rst
+++ b/docs/_source/installation.rst
@@ -26,7 +26,7 @@ Installation of MLAir
 * Install all requirements from `requirements.txt <https://gitlab.version.fz-juelich.de/toar/machinelearningtools/-/blob/master/requirements.txt>`_
   preferably in a virtual environment
 * Either clone MLAir from the `gitlab repository <https://gitlab.version.fz-juelich.de/toar/machinelearningtools.git>`_
-* or download the distribution file (`current version <https://gitlab.version.fz-juelich.de/toar/mlair/-/blob/master/dist/mlair-1.4.0-py3-none-any.whl>`_)
+* or download the distribution file (`current version <https://gitlab.version.fz-juelich.de/toar/mlair/-/blob/master/dist/mlair-1.5.0-py3-none-any.whl>`_)
   and install it via :py:`pip install <dist_file>.whl`. In this case, you can simply
   import MLAir in any python script inside your virtual environment using :py:`import mlair`.
 * (tf) Currently, TensorFlow-1.13 is mentioned in the requirements. We already tested the TensorFlow-1.15 version and couldn't
diff --git a/docs/requirements_docs.txt b/docs/requirements_docs.txt
index a39acca8a7cf887237f595d7992960ac10233a85..8ccf3ba6515d31ebd2b35901d3c9e58734d653d8 100644
--- a/docs/requirements_docs.txt
+++ b/docs/requirements_docs.txt
@@ -3,4 +3,5 @@ sphinx-autoapi==1.3.0
 sphinx-autodoc-typehints==1.10.3
 sphinx-rtd-theme==0.4.3
 #recommonmark==0.6.0
-m2r2==0.2.5
\ No newline at end of file
+m2r2==0.2.5
+docutils<0.18
\ No newline at end of file
diff --git a/mlair/__init__.py b/mlair/__init__.py
index f760f9b0fa4b87bde1f6ee409626f4428083d895..75359e1773edea55ecc47556a83a465510fac6c8 100644
--- a/mlair/__init__.py
+++ b/mlair/__init__.py
@@ -1,6 +1,6 @@
 __version_info__ = {
     'major': 1,
-    'minor': 4,
+    'minor': 5,
     'micro': 0,
 }
 
diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py
index d61146b61a5ade9118675fa7b895212f310acc71..ca569720dc41d95621d0613a2170cc4d9d46c082 100644
--- a/mlair/configuration/defaults.py
+++ b/mlair/configuration/defaults.py
@@ -13,6 +13,7 @@ DEFAULT_START = "1997-01-01"
 DEFAULT_END = "2017-12-31"
 DEFAULT_WINDOW_HISTORY_SIZE = 13
 DEFAULT_OVERWRITE_LOCAL_DATA = False
+DEFAULT_OVERWRITE_LAZY_DATA = False
 DEFAULT_HPC_LOGIN_LIST = ["ju", "hdfmll"]  # ju[wels} #hdfmll(ogin)
 DEFAULT_HPC_HOST_LIST = ["jw", "hdfmlc"]  # first part of node names for Juwels (jw[comp], hdfmlc(ompute).
 DEFAULT_CREATE_NEW_MODEL = True
@@ -43,14 +44,19 @@ DEFAULT_TEST_END = "2017-12-31"
 DEFAULT_TEST_MIN_LENGTH = 90
 DEFAULT_TRAIN_VAL_MIN_LENGTH = 180
 DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS = True
-DEFAULT_EVALUATE_BOOTSTRAPS = True
-DEFAULT_CREATE_NEW_BOOTSTRAPS = False
-DEFAULT_NUMBER_OF_BOOTSTRAPS = 20
-DEFAULT_BOOTSTRAP_TYPE = "singleinput"
-DEFAULT_BOOTSTRAP_METHOD = "shuffle"
+DEFAULT_DO_UNCERTAINTY_ESTIMATE = True
+DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH = "1m"
+DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS = True
+DEFAULT_UNCERTAINTY_ESTIMATE_N_BOOTS = 1000
+DEFAULT_EVALUATE_FEATURE_IMPORTANCE = True
+DEFAULT_FEATURE_IMPORTANCE_CREATE_NEW_BOOTSTRAPS = False
+DEFAULT_FEATURE_IMPORTANCE_N_BOOTS = 20
+DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE = "singleinput"
+DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_METHOD = "shuffle"
 DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
-                     "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
-                     "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram", "PlotPeriodogram"]
+                     "PlotCompetitiveSkillScore", "PlotFeatureImportanceSkillScore", "PlotConditionalQuantiles",
+                     "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram", "PlotPeriodogram",
+                     "PlotSampleUncertaintyFromBootstrap"]
 DEFAULT_SAMPLING = "daily"
 DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA", "press": "REA", "relhum": "REA",
                        "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "",
diff --git a/mlair/configuration/path_config.py b/mlair/configuration/path_config.py
index e7418b984dab74b0527b8dca05a9f6c3636ac18f..6b9c799ceb190b9150be3a4cfcd336eaf45aa768 100644
--- a/mlair/configuration/path_config.py
+++ b/mlair/configuration/path_config.py
@@ -3,6 +3,7 @@ import getpass
 import logging
 import os
 import re
+import shutil
 import socket
 from typing import Union
 
@@ -112,17 +113,23 @@ def set_bootstrap_path(bootstrap_path: str, data_path: str) -> str:
     return os.path.abspath(bootstrap_path)
 
 
-def check_path_and_create(path: str) -> None:
+def check_path_and_create(path: str, remove_existing: bool = False) -> None:
     """
     Check a given path and create if not existing.
 
     :param path: path to check and create
+    :param remove_existing: if set to true an existing folder is removed and replaced by a new one (default False).
     """
     try:
         os.makedirs(path)
         logging.debug(f"Created path: {path}")
     except FileExistsError:
-        logging.debug(f"Path already exists: {path}")
+        if remove_existing is True:
+            logging.debug(f"Remove / clean path: {path}")
+            shutil.rmtree(path)
+            check_path_and_create(path, remove_existing=False)
+        else:
+            logging.debug(f"Path already exists: {path}")
 
 
 def get_host():
diff --git a/mlair/data_handler/__init__.py b/mlair/data_handler/__init__.py
index 495b6e7c8604a839a084a2b78a54563c13eb06e6..d119977802038155af0d07d99c75c665622a148c 100644
--- a/mlair/data_handler/__init__.py
+++ b/mlair/data_handler/__init__.py
@@ -9,7 +9,7 @@ __author__ = 'Lukas Leufen, Felix Kleinert'
 __date__ = '2020-04-17'
 
 
-from .bootstraps import BootStraps
+from .input_bootstraps import Bootstraps
 from .iterator import KerasIterator, DataCollection
 from .default_data_handler import DefaultDataHandler
 from .abstract_data_handler import AbstractDataHandler
diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py
index 8205ae6c28f3683b1052c292e5d063d8bca555dc..5aefb0368ec1cf544443bb5e0412dd16a97f2a7f 100644
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -12,6 +12,7 @@ from mlair.helpers import remove_items
 from mlair.configuration.defaults import DEFAULT_SAMPLING, DEFAULT_INTERPOLATION_LIMIT, DEFAULT_INTERPOLATION_METHOD
 from mlair.helpers.filter import filter_width_kzf
 
+import copy
 import inspect
 from typing import Callable
 import datetime as dt
@@ -140,13 +141,6 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
     def load_and_interpolate(self, ind) -> [xr.DataArray, pd.DataFrame]:
 
         start, end = self.update_start_end(ind)
-        # if ind == 0:  # for inputs
-        #     estimated_filter_width = self.estimate_filter_width()
-        #     start = self._add_time_delta(self.start, -estimated_filter_width)
-        #     end = self._add_time_delta(self.end, estimated_filter_width)
-        # else:  # target
-        #     start, end = self.start, self.end
-
         vars = [self.variables, self.target_var]
         stats_per_var = helpers.select_from_dict(self.statistics_per_var, vars[ind])
 
@@ -264,7 +258,83 @@ class DataHandlerMixedSamplingWithClimateFirFilter(DataHandlerClimateFirFilter):
 
     data_handler = DataHandlerMixedSamplingWithClimateFirFilterSingleStation
     data_handler_transformation = DataHandlerMixedSamplingWithClimateFirFilterSingleStation
-    _requirements = data_handler.requirements()
+    data_handler_unfiltered = DataHandlerMixedSamplingSingleStation
+    _requirements = list(set(data_handler.requirements() + data_handler_unfiltered.requirements()))
+    DEFAULT_FILTER_ADD_UNFILTERED = False
+
+    def __init__(self, *args, data_handler_class_unfiltered: data_handler_unfiltered = None,
+                 filter_add_unfiltered: bool = DEFAULT_FILTER_ADD_UNFILTERED, **kwargs):
+        self.dh_unfiltered = data_handler_class_unfiltered
+        self.filter_add_unfiltered = filter_add_unfiltered
+        super().__init__(*args, **kwargs)
+
+    @classmethod
+    def own_args(cls, *args):
+        """Return all arguments (including kwonlyargs)."""
+        super_own_args = DataHandlerClimateFirFilter.own_args(*args)
+        arg_spec = inspect.getfullargspec(cls)
+        list_of_args = arg_spec.args + arg_spec.kwonlyargs + super_own_args
+        return remove_items(list_of_args, ["self"] + list(args))
+
+    def _create_collection(self):
+        if self.filter_add_unfiltered is True and self.dh_unfiltered is not None:
+            return [self.id_class, self.dh_unfiltered]
+        else:
+            return super()._create_collection()
+
+    @classmethod
+    def build(cls, station: str, **kwargs):
+        sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls.data_handler.requirements() if k in kwargs}
+        filter_add_unfiltered = kwargs.get("filter_add_unfiltered", False)
+        sp_keys = cls.build_update_kwargs(sp_keys, dh_type="filtered")
+        sp = cls.data_handler(station, **sp_keys)
+        if filter_add_unfiltered is True:
+            sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls.data_handler_unfiltered.requirements() if k in kwargs}
+            sp_keys = cls.build_update_kwargs(sp_keys, dh_type="unfiltered")
+            sp_unfiltered = cls.data_handler_unfiltered(station, **sp_keys)
+        else:
+            sp_unfiltered = None
+        dp_args = {k: copy.deepcopy(kwargs[k]) for k in cls.own_args("id_class") if k in kwargs}
+        return cls(sp, data_handler_class_unfiltered=sp_unfiltered, **dp_args)
+
+    @classmethod
+    def build_update_kwargs(cls, kwargs_dict, dh_type="filtered"):
+        if "transformation" in kwargs_dict:
+            trafo_opts = kwargs_dict.get("transformation")
+            if isinstance(trafo_opts, dict):
+                kwargs_dict["transformation"] = trafo_opts.get(dh_type)
+        return kwargs_dict
+
+    @classmethod
+    def transformation(cls, set_stations, tmp_path=None, **kwargs):
+
+        sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs}
+        if "transformation" not in sp_keys.keys():
+            return
+
+        transformation_filtered = super().transformation(set_stations, tmp_path=tmp_path,
+                                                         dh_transformation=cls.data_handler_transformation, **kwargs)
+        if kwargs.get("filter_add_unfiltered", False) is False:
+            return transformation_filtered
+        else:
+            transformation_unfiltered = super().transformation(set_stations, tmp_path=tmp_path,
+                                                               dh_transformation=cls.data_handler_unfiltered, **kwargs)
+            return {"filtered": transformation_filtered, "unfiltered": transformation_unfiltered}
+
+    def get_X_original(self):
+        if self.use_filter_branches is True:
+            X = []
+            for data in self._collection:
+                if hasattr(data, "filter_dim"):
+                    X_total = data.get_X()
+                    filter_dim = data.filter_dim
+                    for filter_name in data.filter_dim_order:
+                        X.append(X_total.sel({filter_dim: filter_name}, drop=True))
+                else:
+                    X.append(data.get_X())
+            return X
+        else:
+            return super().get_X_original()
 
 
 class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithKzFilterSingleStation):
diff --git a/mlair/data_handler/data_handler_neighbors.py b/mlair/data_handler/data_handler_neighbors.py
index 6c87946eaad5568e1ff59c3988bf8fe469442641..26fe6f8549639019169bf0b894186d354eaddc83 100644
--- a/mlair/data_handler/data_handler_neighbors.py
+++ b/mlair/data_handler/data_handler_neighbors.py
@@ -1,6 +1,10 @@
 __author__ = 'Lukas Leufen'
 __date__ = '2020-07-17'
 
+"""
+WARNING: This data handler is just a prototype and has not been validated to work properly! Use it with caution!
+"""
+
 import datetime as dt
 
 import numpy as np
@@ -19,7 +23,7 @@ number = Union[float, int]
 num_or_list = Union[number, List[number]]
 
 
-class DataHandlerNeighbors(DefaultDataHandler):
+class DataHandlerNeighbors(DefaultDataHandler):  # pragma: no cover
     """Data handler including neighboring stations."""
 
     def __init__(self, id_class, data_path, neighbors=None, min_length=0,
@@ -48,7 +52,7 @@ class DataHandlerNeighbors(DefaultDataHandler):
         return [super(DataHandlerNeighbors, self).get_coordinates()].append(neighbors)
 
 
-def run_data_prep():
+def run_data_prep():  # pragma: no cover
     """Comment: methods just to start write meaningful test routines."""
     data = DummyDataHandler("main_class")
     data.get_X()
@@ -63,7 +67,7 @@ def run_data_prep():
     data_prep.get_data(upsampling=False)
 
 
-def create_data_prep():
+def create_data_prep():  # pragma: no cover
     """Comment: methods just to start write meaningful test routines."""
     path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata")
     station_type = None
@@ -91,7 +95,7 @@ def create_data_prep():
     return data_prep
 
 
-class DummyDataHandler(AbstractDataHandler):
+class DummyDataHandler(AbstractDataHandler):  # pragma: no cover
 
     def __init__(self, name, number_of_samples=None):
         """This data handler takes a name argument and the number of samples to generate. If not provided, a random
diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 4330efd9ee5d3ae8a64c6eb9b95a0c58e18b3c36..88a57d108e4533968eeb9a65aabf575fae085704 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -5,6 +5,8 @@ __date__ = '2020-07-20'
 
 import copy
 import datetime as dt
+import gc
+
 import dill
 import hashlib
 import logging
@@ -61,7 +63,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
                  interpolation_method: Union[str, Tuple[str]] = DEFAULT_INTERPOLATION_METHOD,
                  overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True,
                  min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None,
-                 lazy_preprocessing: bool = False, **kwargs):
+                 lazy_preprocessing: bool = False, overwrite_lazy_data=False, **kwargs):
         super().__init__()
         self.station = helpers.to_list(station)
         self.path = self.setup_data_path(data_path, sampling)
@@ -92,6 +94,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
         self.interpolation_method = interpolation_method
 
         self.overwrite_local_data = overwrite_local_data
+        self.overwrite_lazy_data = True if self.overwrite_local_data is True else overwrite_lazy_data
         self.store_data_locally = store_data_locally
         self.min_length = min_length
         self.start = start
@@ -107,6 +110,13 @@ class DataHandlerSingleStation(AbstractDataHandler):
 
         # create samples
         self.setup_samples()
+        self.clean_up()
+
+    def clean_up(self):
+        self._data = None
+        self.input_data = None
+        self.target_data = None
+        gc.collect()
 
     def __str__(self):
         return self.station[0]
@@ -213,7 +223,8 @@ class DataHandlerSingleStation(AbstractDataHandler):
             elif method == "centre":
                 return statistics.centre_apply(data, mean), {"mean": mean, "method": method}
             elif method == "min_max":
-                return statistics.min_max_apply(data, min, max), {"min": min, "max": max, "method": method,
+                kws = {"feature_range": feature_range} if feature_range is not None else {}
+                return statistics.min_max_apply(data, min, max, **kws), {"min": min, "max": max, "method": method,
                                                                   "feature_range": feature_range}
             elif method == "log":
                 return statistics.log_apply(data, mean, std), {"mean": mean, "std": std, "method": method}
@@ -253,7 +264,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
         hash = self._get_hash()
         filename = os.path.join(self.lazy_path, hash + ".pickle")
         if not os.path.exists(filename):
-            dill.dump(self._create_lazy_data(), file=open(filename, "wb"))
+            dill.dump(self._create_lazy_data(), file=open(filename, "wb"), protocol=4)
 
     def _create_lazy_data(self):
         return [self._data, self.meta, self.input_data, self.target_data]
@@ -262,6 +273,8 @@ class DataHandlerSingleStation(AbstractDataHandler):
         hash = self._get_hash()
         filename = os.path.join(self.lazy_path, hash + ".pickle")
         try:
+            if self.overwrite_lazy_data is True:
+                raise FileNotFoundError
             with open(filename, "rb") as pickle_file:
                 lazy_data = dill.load(pickle_file)
             self._extract_lazy(lazy_data)
@@ -404,8 +417,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
         """
         chem_vars = ["benzene", "ch4", "co", "ethane", "no", "no2", "nox", "o3", "ox", "pm1", "pm10", "pm2p5",
                      "propane", "so2", "toluene"]
-        # used_chem_vars = list(set(chem_vars) & set(self.statistics_per_var.keys()))
-        used_chem_vars = list(set(chem_vars) & set(data.variables.values))
+        used_chem_vars = list(set(chem_vars) & set(data.coords[self.target_dim].values))
         if len(used_chem_vars) > 0:
             data.loc[..., used_chem_vars] = data.loc[..., used_chem_vars].clip(min=minimum)
         return data
@@ -451,11 +463,8 @@ class DataHandlerSingleStation(AbstractDataHandler):
         :return: this array
         """
         ind = pd.DataFrame({'val': index_value}, index=index_value)
-        # res = xr.Dataset.from_dataframe(ind).to_array().rename({'index': index_name}).squeeze(dim=squeez/e_dim, drop=True)
         res = xr.Dataset.from_dataframe(ind).to_array(squeeze_dim).rename({'index': index_name}).squeeze(
-            dim=squeeze_dim,
-            drop=True
-        )
+            dim=squeeze_dim, drop=True)
         res.name = index_name
         return res
 
@@ -584,6 +593,12 @@ class DataHandlerSingleStation(AbstractDataHandler):
             non_nan_history = self.history.dropna(dim=dim)
             non_nan_label = self.label.dropna(dim=dim)
             non_nan_observation = self.observation.dropna(dim=dim)
+            if non_nan_label.coords[dim].shape[0] == 0:
+                raise ValueError(f'self.label consist of NaNs only - station {self.station} is therefore dropped')
+            if non_nan_history.coords[dim].shape[0] == 0:
+                raise ValueError(f'self.history consist of NaNs only - station {self.station} is therefore dropped')
+            if non_nan_observation.coords[dim].shape[0] == 0:
+                raise ValueError(f'self.observation consist of NaNs only - station {self.station} is therefore dropped')
             intersect = reduce(np.intersect1d, (non_nan_history.coords[dim].values, non_nan_label.coords[dim].values,
                                                 non_nan_observation.coords[dim].values))
 
@@ -738,8 +753,6 @@ class DataHandlerSingleStation(AbstractDataHandler):
 
 
 if __name__ == "__main__":
-    # dp = AbstractDataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
-    # print(dp)
     statistics_per_var = {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}
     sp = DataHandlerSingleStation(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122',
                                   statistics_per_var=statistics_per_var, station_type='background',
diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index e76f396aea80b2db76e01ea5baacf71d024b0d23..4707fd580562a68fd6b2dc0843551905e70d7e50 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -4,6 +4,7 @@ __author__ = 'Lukas Leufen'
 __date__ = '2020-08-26'
 
 import inspect
+import copy
 import numpy as np
 import pandas as pd
 import xarray as xr
@@ -12,7 +13,7 @@ from functools import partial
 import logging
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
 from mlair.data_handler import DefaultDataHandler
-from mlair.helpers import remove_items, to_list, TimeTrackingWrapper
+from mlair.helpers import remove_items, to_list, TimeTrackingWrapper, statistics
 from mlair.helpers.filter import KolmogorovZurbenkoFilterMovingWindow as KZFilter
 from mlair.helpers.filter import FIRFilter, ClimateFIRFilter, omega_null_kzf
 
@@ -126,32 +127,16 @@ class DataHandlerFilter(DefaultDataHandler):
         list_of_args = arg_spec.args + arg_spec.kwonlyargs + super_own_args
         return remove_items(list_of_args, ["self"] + list(args))
 
-    def get_X_original(self):
-        if self.use_filter_branches is True:
-            X = []
-            for data in self._collection:
-                X_total = data.get_X()
-                filter_dim = data.filter_dim
-                for filter_name in data.filter_dim_order:
-                    X.append(X_total.sel({filter_dim: filter_name}, drop=True))
-            return X
-        else:
-            return super().get_X_original()
-
 
 class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
     """Data handler for a single station to be used by a superior data handler. Inputs are FIR filtered."""
 
     _requirements = remove_items(DataHandlerFilterSingleStation.requirements(), "station")
-    _hash = DataHandlerFilterSingleStation._hash + ["filter_cutoff_period", "filter_order", "filter_window_type",
-                                                    "_add_unfiltered"]
+    _hash = DataHandlerFilterSingleStation._hash + ["filter_cutoff_period", "filter_order", "filter_window_type"]
 
     DEFAULT_WINDOW_TYPE = ("kaiser", 5)
-    DEFAULT_ADD_UNFILTERED = False
 
-    def __init__(self, *args, filter_cutoff_period, filter_order, filter_window_type=DEFAULT_WINDOW_TYPE,
-                 filter_add_unfiltered=DEFAULT_ADD_UNFILTERED, **kwargs):
-        # self._check_sampling(**kwargs)
+    def __init__(self, *args, filter_cutoff_period, filter_order, filter_window_type=DEFAULT_WINDOW_TYPE, **kwargs):
         # self.original_data = None  # ToDo: implement here something to store unfiltered data
         self.fs = self._get_fs(**kwargs)
         if filter_window_type == "kzf":
@@ -161,7 +146,7 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
         assert len(self.filter_cutoff_period) == (len(filter_order) - len(removed_index))
         self.filter_order = self._prepare_filter_order(filter_order, removed_index, self.fs)
         self.filter_window_type = filter_window_type
-        self._add_unfiltered = filter_add_unfiltered
+        self.unfiltered_name = "unfiltered"
         super().__init__(*args, **kwargs)
 
     @staticmethod
@@ -223,8 +208,6 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
                         self.filter_window_type, self.target_dim)
         self.fir_coeff = fir.filter_coefficients()
         fir_data = fir.filtered_data()
-        if self._add_unfiltered is True:
-            fir_data.append(self.input_data)
         self.input_data = xr.concat(fir_data, pd.Index(self.create_filter_index(), name=self.filter_dim))
         # this is just a code snippet to check the results of the kz filter
         # import matplotlib
@@ -249,8 +232,6 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
             else:
                 index.append(f"band{band_num}")
                 band_num += 1
-        if self._add_unfiltered:
-            index.append("unfiltered")
         self.filter_dim_order = index
         return pd.Index(index, name=self.filter_dim)
 
@@ -261,13 +242,89 @@ class DataHandlerFirFilterSingleStation(DataHandlerFilterSingleStation):
         _data, _meta, _input_data, _target_data, self.fir_coeff, self.filter_dim_order = lazy_data
         super(__class__, self)._extract_lazy((_data, _meta, _input_data, _target_data))
 
+    def transform(self, data_in, dim: Union[str, int] = 0, inverse: bool = False, opts=None,
+                  transformation_dim=None):
+        """
+        Transform data according to given transformation settings.
+
+        This function transforms a xarray.dataarray (along dim) or pandas.DataFrame (along axis) either with mean=0
+        and std=1 (`method=standardise`) or centers the data with mean=0 and no change in data scale
+        (`method=centre`). Furthermore, this sets an internal instance attribute for later inverse transformation. This
+        method will raise an AssertionError if an internal transform method was already set ('inverse=False') or if the
+        internal transform method, internal mean and internal standard deviation weren't set ('inverse=True').
+
+        :param string/int dim: This param is not used for inverse transformation.
+                | for xarray.DataArray as string: name of dimension which should be standardised
+                | for pandas.DataFrame as int: axis of dimension which should be standardised
+        :param inverse: Switch between transformation and inverse transformation.
+
+        :return: xarray.DataArrays or pandas.DataFrames:
+                #. mean: Mean of data
+                #. std: Standard deviation of data
+                #. data: Standardised data
+        """
+
+        if transformation_dim is None:
+            transformation_dim = self.DEFAULT_TARGET_DIM
+
+        def f(data, method="standardise", feature_range=None):
+            if method == "standardise":
+                return statistics.standardise(data, dim)
+            elif method == "centre":
+                return statistics.centre(data, dim)
+            elif method == "min_max":
+                kwargs = {"feature_range": feature_range} if feature_range is not None else {}
+                return statistics.min_max(data, dim, **kwargs)
+            elif method == "log":
+                return statistics.log(data, dim)
+            else:
+                raise NotImplementedError
+
+        def f_apply(data, method, **kwargs):
+            for k, v in kwargs.items():
+                if not (isinstance(v, xr.DataArray) or v is None):
+                    _, opts = statistics.min_max(data, dim)
+                    helper = xr.ones_like(opts['min'])
+                    kwargs[k] = helper * v
+            mean = kwargs.pop('mean', None)
+            std = kwargs.pop('std', None)
+            min = kwargs.pop('min', None)
+            max = kwargs.pop('max', None)
+            feature_range = kwargs.pop('feature_range', None)
+
+            if method == "standardise":
+                return statistics.standardise_apply(data, mean, std), {"mean": mean, "std": std, "method": method}
+            elif method == "centre":
+                return statistics.centre_apply(data, mean), {"mean": mean, "method": method}
+            elif method == "min_max":
+                return statistics.min_max_apply(data, min, max), {"min": min, "max": max, "method": method,
+                                                                  "feature_range": feature_range}
+            elif method == "log":
+                return statistics.log_apply(data, mean, std), {"mean": mean, "std": std, "method": method}
+            else:
+                raise NotImplementedError
+
+        opts = opts or {}
+        opts_updated = {}
+        if not inverse:
+            transformed_values = []
+            for var in data_in.variables.values:
+                data_var = data_in.sel(**{transformation_dim: [var]})
+                var_opts = opts.get(var, {})
+                _apply = (var_opts.get("mean", None) is not None) or (var_opts.get("min") is not None)
+                values, new_var_opts = locals()["f_apply" if _apply else "f"](data_var, **var_opts)
+                opts_updated[var] = copy.deepcopy(new_var_opts)
+                transformed_values.append(values)
+            return xr.concat(transformed_values, dim=transformation_dim), opts_updated
+        else:
+            return self.inverse_transform(data_in, opts, transformation_dim)
+
 
 class DataHandlerFirFilter(DataHandlerFilter):
     """Data handler using FIR filtered data."""
 
     data_handler = DataHandlerFirFilterSingleStation
     data_handler_transformation = DataHandlerFirFilterSingleStation
-    _requirements = data_handler.requirements()
 
 
 class DataHandlerKzFilterSingleStation(DataHandlerFilterSingleStation):
@@ -393,13 +450,8 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
                                climate_filter.filtered_data]
 
         # create input data with filter index
-        input_data = xr.concat(climate_filter_data, pd.Index(self.create_filter_index(), name=self.filter_dim))
-
-        # add unfiltered raw data
-        if self._add_unfiltered is True:
-            data_raw = self.shift(self.input_data, self.time_dim, -self.window_history_size)
-            data_raw = data_raw.expand_dims({self.filter_dim: ["unfiltered"]}, -1)
-            input_data = xr.concat([input_data, data_raw], self.filter_dim)
+        input_data = xr.concat(climate_filter_data, pd.Index(self.create_filter_index(add_unfiltered_index=False),
+                                                             name=self.filter_dim))
 
         self.input_data = input_data
 
@@ -410,7 +462,7 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         # self.input_data.sel(filter="low", variables="temp", Stations="DEBW107").plot()
         # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
 
-    def create_filter_index(self) -> pd.Index:
+    def create_filter_index(self, add_unfiltered_index=True) -> pd.Index:
         """
         Round cut off periods in days and append 'res' for residuum index.
 
@@ -421,8 +473,6 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
         f = lambda x: int(np.round(x)) if x >= 10 else np.round(x, 1)
         index = list(map(f, index.tolist()))
         index = list(map(lambda x: str(x) + "d", index)) + ["res"]
-        if self._add_unfiltered:
-            index.append("unfiltered")
         self.filter_dim_order = index
         return pd.Index(index, name=self.filter_dim)
 
@@ -491,11 +541,3 @@ class DataHandlerClimateFirFilter(DataHandlerFilter):
     _requirements = data_handler.requirements()
     _store_attributes = data_handler.store_attributes()
 
-    # def get_X_original(self):
-    #     X = []
-    #     for data in self._collection:
-    #         X_total = data.get_X()
-    #         filter_dim = data.filter_dim
-    #         for filter in data.filter_dim_order:
-    #             X.append(X_total.sel({filter_dim: filter}, drop=True))
-    #     return X
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index a17de95407a74d1504877fdce03a82d1c943e868..6fa7952d4bc0a278f17f767073969822924b6d5f 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -8,6 +8,7 @@ import gc
 import logging
 import os
 import pickle
+import random
 import dill
 import shutil
 from functools import reduce
@@ -52,6 +53,7 @@ class DefaultDataHandler(AbstractDataHandler):
         self._Y = None
         self._X_extreme = None
         self._Y_extreme = None
+        self._data_intersection = None
         self._use_multiprocessing = use_multiprocessing
         self._max_number_multiprocessing = max_number_multiprocessing
         _name_affix = str(f"{str(self.id_class)}_{name_affix}" if name_affix is not None else id(self))
@@ -92,7 +94,7 @@ class DefaultDataHandler(AbstractDataHandler):
             data = {"X": self._X, "Y": self._Y, "X_extreme": self._X_extreme, "Y_extreme": self._Y_extreme}
             data = self._force_dask_computation(data)
             with open(self._save_file, "wb") as f:
-                dill.dump(data, f)
+                dill.dump(data, f, protocol=4)
             logging.debug(f"save pickle data to {self._save_file}")
             self._reset_data()
 
@@ -132,7 +134,7 @@ class DefaultDataHandler(AbstractDataHandler):
         return X, Y
 
     def __repr__(self):
-        return ";".join(list(map(lambda x: str(x), self._collection)))
+        return str(self._collection[0])
 
     def get_X_original(self):
         X = []
@@ -171,10 +173,15 @@ class DefaultDataHandler(AbstractDataHandler):
         else:
             X = list(map(lambda x: x.sel({dim: intersect}), X_original))
             Y = Y_original.sel({dim: intersect})
+        self._data_intersection = intersect
         self._X, self._Y = X, Y
 
     def get_observation(self):
-        return self.id_class.observation.copy().squeeze()
+        dim = self.time_dim
+        if self._data_intersection is not None:
+            return self.id_class.observation.sel({dim: self._data_intersection}).copy().squeeze()
+        else:
+            return self.id_class.observation.copy().squeeze()
 
     def apply_transformation(self, data, base="target", dim=0, inverse=False):
         return self.id_class.apply_transformation(data, dim=dim, base=base, inverse=inverse)
@@ -214,30 +221,29 @@ class DefaultDataHandler(AbstractDataHandler):
                 raise TypeError(f"Elements of list extreme_values have to be {number.__args__}, but at least element "
                                 f"{i} is type {type(i)}")
 
+        extremes_X, extremes_Y = None, None
         for extr_val in sorted(extreme_values):
             # check if some extreme values are already extracted
-            if (self._X_extreme is None) or (self._Y_extreme is None):
-                X = self._X
-                Y = self._Y
+            if (extremes_X is None) or (extremes_Y is None):
+                X, Y = self._X, self._Y
+                extremes_X, extremes_Y = X, Y
             else:  # one extr value iteration is done already: self.extremes_label is NOT None...
-                X = self._X_extreme
-                Y = self._Y_extreme
+                X, Y = self._X_extreme, self._Y_extreme
 
             # extract extremes based on occurrence in labels
             other_dims = remove_items(list(Y.dims), dim)
             if extremes_on_right_tail_only:
-                extreme_idx = (Y > extr_val).any(dim=other_dims)
+                extreme_idx = (extremes_Y > extr_val).any(dim=other_dims)
             else:
-                extreme_idx = xr.concat([(Y < -extr_val).any(dim=other_dims[0]),
-                                           (Y > extr_val).any(dim=other_dims[0])],
-                                          dim=other_dims[1]).any(dim=other_dims[1])
+                extreme_idx = xr.concat([(extremes_Y < -extr_val).any(dim=other_dims[0]),
+                                           (extremes_Y > extr_val).any(dim=other_dims[0])],
+                                          dim=other_dims[0]).any(dim=other_dims[0])
 
-            extremes_X = list(map(lambda x: x.sel(**{dim: extreme_idx}), X))
+            sel = extreme_idx[extreme_idx].coords[dim].values
+            extremes_X = list(map(lambda x: x.sel(**{dim: sel}), extremes_X))
             self._add_timedelta(extremes_X, dim, timedelta)
-            # extremes_X = list(map(lambda x: x.coords[dim].values + np.timedelta64(*timedelta), extremes_X))
-
-            extremes_Y = Y.sel(**{dim: extreme_idx})
-            extremes_Y.coords[dim].values += np.timedelta64(*timedelta)
+            extremes_Y = extremes_Y.sel(**{dim: extreme_idx})
+            self._add_timedelta([extremes_Y], dim, timedelta)
 
             self._Y_extreme = xr.concat([Y, extremes_Y], dim=dim)
             self._X_extreme = list(map(lambda x1, x2: xr.concat([x1, x2], dim=dim), X, extremes_X))
@@ -245,10 +251,10 @@ class DefaultDataHandler(AbstractDataHandler):
     @staticmethod
     def _add_timedelta(data, dim, timedelta):
         for d in data:
-            d.coords[dim].values += np.timedelta64(*timedelta)
+            d.coords[dim] = d.coords[dim].values + np.timedelta64(*timedelta)
 
     @classmethod
-    def transformation(cls, set_stations, **kwargs):
+    def transformation(cls, set_stations, tmp_path=None, dh_transformation=None, **kwargs):
         """
         ### supported transformation methods
 
@@ -278,53 +284,45 @@ class DefaultDataHandler(AbstractDataHandler):
         If min and max are not None, the default data handler expects this parameters to match the data and applies
         this values to the data. Make sure that all dimensions and/or coordinates are in agreement.
         """
+        if dh_transformation is None:
+            dh_transformation = cls.data_handler_transformation
 
-        sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs}
+        sp_keys = {k: copy.deepcopy(kwargs[k]) for k in dh_transformation.requirements() if k in kwargs}
         if "transformation" not in sp_keys.keys():
             return
         transformation_dict = ({}, {})
 
-        def _inner():
-            """Inner method that is performed in both serial and parallel approach."""
-            if dh is not None:
-                for i, transformation in enumerate(dh._transformation):
-                    for var in transformation.keys():
-                        if var not in transformation_dict[i].keys():
-                            transformation_dict[i][var] = {}
-                        opts = transformation[var]
-                        if not transformation_dict[i][var].get("method", opts["method"]) == opts["method"]:
-                            # data handlers with filters are allowed to change transformation method to standardise
-                            assert hasattr(dh, "filter_dim") and opts["method"] == "standardise"
-                        transformation_dict[i][var]["method"] = opts["method"]
-                        for k in ["mean", "std", "min", "max"]:
-                            old = transformation_dict[i][var].get(k, None)
-                            new = opts.get(k)
-                            transformation_dict[i][var][k] = new if old is None else old.combine_first(new)
-                        if "feature_range" in opts.keys():
-                            transformation_dict[i][var]["feature_range"] = opts.get("feature_range", None)
-
         max_process = kwargs.get("max_number_multiprocessing", 16)
         n_process = min([psutil.cpu_count(logical=False), len(set_stations), max_process])  # use only physical cpus
         if n_process > 1 and kwargs.get("use_multiprocessing", True) is True:  # parallel solution
             logging.info("use parallel transformation approach")
-            pool = multiprocessing.Pool(
-                min([psutil.cpu_count(logical=False), len(set_stations), 16]))  # use only physical cpus
+            pool = multiprocessing.Pool(n_process)  # use only physical cpus
             logging.info(f"running {getattr(pool, '_processes')} processes in parallel")
+            sp_keys.update({"tmp_path": tmp_path, "return_strategy": "reference"})
             output = [
-                pool.apply_async(f_proc, args=(cls.data_handler_transformation, station), kwds=sp_keys)
+                pool.apply_async(f_proc, args=(dh_transformation, station), kwds=sp_keys)
                 for station in set_stations]
             for p in output:
-                dh, s = p.get()
-                _inner()
+                _res_file, s = p.get()
+                with open(_res_file, "rb") as f:
+                    dh = dill.load(f)
+                os.remove(_res_file)
+                transformation_dict = cls.update_transformation_dict(dh, transformation_dict)
             pool.close()
         else:  # serial solution
             logging.info("use serial transformation approach")
+            sp_keys.update({"return_strategy": "result"})
             for station in set_stations:
-                dh, s = f_proc(cls.data_handler_transformation, station, **sp_keys)
-                _inner()
+                dh, s = f_proc(dh_transformation, station, **sp_keys)
+                transformation_dict = cls.update_transformation_dict(dh, transformation_dict)
 
         # aggregate all information
         iter_dim = sp_keys.get("iter_dim", cls.DEFAULT_ITER_DIM)
+        transformation_dict = cls.aggregate_transformation(transformation_dict, iter_dim)
+        return transformation_dict
+
+    @classmethod
+    def aggregate_transformation(cls, transformation_dict, iter_dim):
         pop_list = []
         for i, transformation in enumerate(transformation_dict):
             for k in transformation.keys():
@@ -345,19 +343,47 @@ class DefaultDataHandler(AbstractDataHandler):
             transformation_dict[i].pop(k)
         return transformation_dict
 
+    @classmethod
+    def update_transformation_dict(cls, dh, transformation_dict):
+        """Inner method that is performed in both serial and parallel approach."""
+        if dh is not None:
+            for i, transformation in enumerate(dh._transformation):
+                for var in transformation.keys():
+                    if var not in transformation_dict[i].keys():
+                        transformation_dict[i][var] = {}
+                    opts = transformation[var]
+                    if not transformation_dict[i][var].get("method", opts["method"]) == opts["method"]:
+                        # data handlers with filters are allowed to change transformation method to standardise
+                        assert hasattr(dh, "filter_dim") and opts["method"] == "standardise"
+                    transformation_dict[i][var]["method"] = opts["method"]
+                    for k in ["mean", "std", "min", "max"]:
+                        old = transformation_dict[i][var].get(k, None)
+                        new = opts.get(k)
+                        transformation_dict[i][var][k] = new if old is None else old.combine_first(new)
+                    if "feature_range" in opts.keys():
+                        transformation_dict[i][var]["feature_range"] = opts.get("feature_range", None)
+        return transformation_dict
+
     def get_coordinates(self):
         return self.id_class.get_coordinates()
 
 
-def f_proc(data_handler, station, **sp_keys):
+def f_proc(data_handler, station, return_strategy="", tmp_path=None, **sp_keys):
     """
     Try to create a data handler for given arguments. If build fails, this station does not fulfil all requirements and
     therefore f_proc will return None as indication. On a successful build, f_proc returns the built data handler and
     the station that was used. This function must be implemented globally to work together with multiprocessing.
     """
+    assert return_strategy in ["result", "reference"]
     try:
         res = data_handler(station, **sp_keys)
     except (AttributeError, EmptyQueryResult, KeyError, ValueError) as e:
         logging.info(f"remove station {station} because it raised an error: {e}")
         res = None
-    return res, station
+    if return_strategy == "result":
+        return res, station
+    else:
+        _tmp_file = os.path.join(tmp_path, f"{station}_{'%032x' % random.getrandbits(128)}.pickle")
+        with open(_tmp_file, "wb") as f:
+            dill.dump(res, f, protocol=4)
+        return _tmp_file, station
diff --git a/mlair/data_handler/bootstraps.py b/mlair/data_handler/input_bootstraps.py
similarity index 91%
rename from mlair/data_handler/bootstraps.py
rename to mlair/data_handler/input_bootstraps.py
index e03881484bfc9b8275ede8a4432072c74643994a..187f09050bb39a953ac58c2b7fca54b6a207aed1 100644
--- a/mlair/data_handler/bootstraps.py
+++ b/mlair/data_handler/input_bootstraps.py
@@ -28,12 +28,13 @@ class BootstrapIterator(Iterator):
 
     _position: int = None
 
-    def __init__(self, data: "BootStraps", method):
-        assert isinstance(data, BootStraps)
+    def __init__(self, data: "Bootstraps", method, return_reshaped=False):
+        assert isinstance(data, Bootstraps)
         self._data = data
         self._dimension = data.bootstrap_dimension
         self.boot_dim = "boots"
         self._method = method
+        self._return_reshaped = return_reshaped
         self._collection = self.create_collection(self._data.data, self._dimension)
         self._position = 0
 
@@ -46,12 +47,15 @@ class BootstrapIterator(Iterator):
         raise NotImplementedError
 
     def _reshape(self, d):
-        if isinstance(d, list):
-            return list(map(lambda x: self._reshape(x), d))
-            # return list(map(lambda x: np.rollaxis(x, -1, 0).reshape(x.shape[0] * x.shape[-1], *x.shape[1:-1]), d))
+        if self._return_reshaped:
+            if isinstance(d, list):
+                return list(map(lambda x: self._reshape(x), d))
+                # return list(map(lambda x: np.rollaxis(x, -1, 0).reshape(x.shape[0] * x.shape[-1], *x.shape[1:-1]), d))
+            else:
+                shape = d.shape
+                return np.rollaxis(d, -1, 0).reshape(shape[0] * shape[-1], *shape[1:-1])
         else:
-            shape = d.shape
-            return np.rollaxis(d, -1, 0).reshape(shape[0] * shape[-1], *shape[1:-1])
+            return d
 
     def _to_numpy(self, d):
         if isinstance(d, list):
@@ -75,8 +79,8 @@ class BootstrapIterator(Iterator):
 class BootstrapIteratorSingleInput(BootstrapIterator):
     _position: int = None
 
-    def __init__(self, *args):
-        super().__init__(*args)
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
 
     def __next__(self):
         """Return next element or stop iteration."""
@@ -107,8 +111,8 @@ class BootstrapIteratorSingleInput(BootstrapIterator):
 
 class BootstrapIteratorVariable(BootstrapIterator):
 
-    def __init__(self, *args):
-        super().__init__(*args)
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
 
     def __next__(self):
         """Return next element or stop iteration."""
@@ -140,8 +144,8 @@ class BootstrapIteratorVariable(BootstrapIterator):
 
 class BootstrapIteratorBranch(BootstrapIterator):
 
-    def __init__(self, *args):
-        super().__init__(*args)
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
 
     def __next__(self):
         try:
@@ -184,7 +188,7 @@ class MeanBootstraps:
         return np.ones_like(data) * self._mean
 
 
-class BootStraps(Iterable):
+class Bootstraps(Iterable):
     """
     Main class to perform bootstrap operations.
 
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index a63cef975888162f335e4528c2f99bdfc7a892d5..36c93b04486fc9be013af2c4f34d2b3ee1bd84c2 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -173,8 +173,10 @@ class ClimateFIRFilter:
 
         # visualize
         if self.plot_path is not None:
-            self.PlotClimateFirFilter(self.plot_path, self.plot_data, sampling, plot_name)
-            # self._plot(sampling, new_dim=new_dim)
+            try:
+                self.PlotClimateFirFilter(self.plot_path, self.plot_data, sampling, plot_name)
+            except Exception as e:
+                logging.info(f"Could not plot climate fir filter due to following reason:\n{e}")
 
     @staticmethod
     def _minimum_length(order, minimum_length, pos, window):
@@ -382,7 +384,7 @@ class ClimateFIRFilter:
             _end = pd.to_datetime(data.coords[time_dim].max().values).year
             filt_coll = []
             for _year in range(_start, _end + 1):
-                logging.info(f"{data.coords['Stations'].values[0]} ({var}): year={_year}")
+                logging.debug(f"{data.coords['Stations'].values[0]} ({var}): year={_year}")
 
                 time_slice = self._create_time_range_extend(_year, sampling, extend_length_history)
                 d = data.sel({var_dim: [var], time_dim: time_slice})
@@ -504,137 +506,6 @@ class ClimateFIRFilter:
         res.name = index_name
         return res
 
-    def _plot(self, sampling, new_dim="window"):
-        h = None
-        td_type = {"1d": "D", "1H": "h"}.get(sampling)
-        if self.plot_path is None:
-            return
-        plot_folder = os.path.join(os.path.abspath(self.plot_path), "climFIR")
-        if not os.path.exists(plot_folder):
-            os.makedirs(plot_folder)
-
-        # set plot parameter
-        rc_params = {'axes.labelsize': 'large',
-                     'xtick.labelsize': 'large',
-                     'ytick.labelsize': 'large',
-                     'legend.fontsize': 'medium',
-                     'axes.titlesize': 'large',
-                     }
-        plt.rcParams.update(rc_params)
-
-        plot_dict = {}
-        for i, o in enumerate(range(len(self.plot_data))):
-            plot_data = self.plot_data[i]
-            for p_d in plot_data:
-                var = p_d.get("var")
-                t0 = p_d.get("t0")
-                filter_input = p_d.get("filter_input")
-                filter_input_nc = p_d.get("filter_input_nc")
-                valid_range = p_d.get("valid_range")
-                time_range = p_d.get("time_range")
-                new_dim = p_d.get("new_dim")
-                h = p_d.get("h")
-                plot_dict_var = plot_dict.get(var, {})
-                plot_dict_t0 = plot_dict_var.get(t0, {})
-                plot_dict_order = {"filter_input": filter_input,
-                                   "filter_input_nc": filter_input_nc,
-                                   "valid_range": valid_range,
-                                   "time_range": time_range,
-                                   "order": o, "h": h}
-                plot_dict_t0[i] = plot_dict_order
-                plot_dict_var[t0] = plot_dict_t0
-                plot_dict[var] = plot_dict_var
-
-        for var, viz_date_dict in plot_dict.items():
-            for it0, t0 in enumerate(viz_date_dict.keys()):
-                viz_data = viz_date_dict[t0]
-                residuum_true = None
-                for ifilter in sorted(viz_data.keys()):
-                    data = viz_data[ifilter]
-                    filter_input = data["filter_input"]
-                    filter_input_nc = data["filter_input_nc"] if residuum_true is None else residuum_true.sel(
-                        {new_dim: filter_input.coords[new_dim]})
-                    valid_range = data["valid_range"]
-                    time_axis = data["time_range"]
-                    # time_axis = pd.date_range(t_minus, t_plus, freq=sampling)
-                    filter_order = data["order"]
-                    h = data["h"]
-                    t_minus = t0 + np.timedelta64(-int(1.5 * valid_range.start), td_type)
-                    t_plus = t0 + np.timedelta64(int(0.5 * valid_range.start), td_type)
-                    fig, ax = plt.subplots()
-                    ax.axvspan(t0 + np.timedelta64(-valid_range.start, td_type),
-                               t0 + np.timedelta64(valid_range.stop - 1, td_type), color="whitesmoke",
-                               label="valid area")
-                    ax.axvline(t0, color="lightgrey", lw=6, label="time of interest ($t_0$)")
-
-                    # original data
-                    ax.plot(time_axis, filter_input_nc.values.flatten(), color="darkgrey", linestyle="dashed",
-                            label="original")
-
-                    # clim apriori
-                    if ifilter == 0:
-                        d_tmp = filter_input.sel(
-                            {new_dim: slice(0, filter_input.coords[new_dim].values.max())}).values.flatten()
-                    else:
-                        d_tmp = filter_input.values.flatten()
-                    ax.plot(time_axis[len(time_axis) - len(d_tmp):], d_tmp, color="darkgrey", linestyle="solid",
-                            label="estimated future")
-
-                    # clim filter response
-                    filt = xr.apply_ufunc(fir_filter_convolve, filter_input,
-                                          input_core_dims=[[new_dim]],
-                                          output_core_dims=[[new_dim]],
-                                          vectorize=True,
-                                          kwargs={"h": h},
-                                          output_dtypes=[filter_input.dtype])
-                    ax.plot(time_axis, filt.values.flatten(), color="black", linestyle="solid",
-                            label="clim filter response", linewidth=2)
-                    residuum_estimated = filter_input - filt
-
-                    # ideal filter response
-                    filt = xr.apply_ufunc(fir_filter_convolve, filter_input_nc,
-                                          input_core_dims=[[new_dim]],
-                                          output_core_dims=[[new_dim]],
-                                          vectorize=True,
-                                          kwargs={"h": h},
-                                          output_dtypes=[filter_input.dtype])
-                    ax.plot(time_axis, filt.values.flatten(), color="black", linestyle="dashed",
-                            label="ideal filter response", linewidth=2)
-                    residuum_true = filter_input_nc - filt
-
-                    # set title, legend, and save plot
-                    ax_start = max(t_minus, time_axis[0])
-                    ax_end = min(t_plus, time_axis[-1])
-                    ax.set_xlim((ax_start, ax_end))
-                    plt.title(f"Input of ClimFilter ({str(var)})")
-                    plt.legend()
-                    fig.autofmt_xdate()
-                    plt.tight_layout()
-                    plot_name = os.path.join(plot_folder,
-                                             f"climFIR_{self.plot_name}_{str(var)}_{it0}_{ifilter}.pdf")
-                    plt.savefig(plot_name, dpi=300)
-                    plt.close('all')
-
-                    # plot residuum
-                    fig, ax = plt.subplots()
-                    ax.axvspan(t0 + np.timedelta64(-valid_range.start, td_type),
-                               t0 + np.timedelta64(valid_range.stop - 1, td_type), color="whitesmoke",
-                               label="valid area")
-                    ax.axvline(t0, color="lightgrey", lw=6, label="time of interest ($t_0$)")
-                    ax.plot(time_axis, residuum_true.values.flatten(), color="black", linestyle="dashed",
-                            label="ideal filter residuum", linewidth=2)
-                    ax.plot(time_axis, residuum_estimated.values.flatten(), color="black", linestyle="solid",
-                            label="clim filter residuum", linewidth=2)
-                    ax.set_xlim((ax_start, ax_end))
-                    plt.title(f"Residuum of ClimFilter ({str(var)})")
-                    plt.legend(loc="upper left")
-                    fig.autofmt_xdate()
-                    plt.tight_layout()
-                    plot_name = os.path.join(plot_folder,
-                                             f"climFIR_{self.plot_name}_{str(var)}_{it0}_{ifilter}_residuum.pdf")
-                    plt.savefig(plot_name, dpi=300)
-                    plt.close('all')
-
     @property
     def filter_coefficients(self):
         return self._h
@@ -897,6 +768,7 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
 
 
 def firwin_kzf(m, k):
+    m, k = int(m), int(k)
     coef = np.ones(m)
     for i in range(1, k):
         t = np.zeros((m, m + i * (m - 1)))
diff --git a/mlair/helpers/helpers.py b/mlair/helpers/helpers.py
index 5ddaa3ee3fe505eeb7c8082274d9cd888cec720f..1f5a86cde01752b74be82476e2e0fd8cad514a9e 100644
--- a/mlair/helpers/helpers.py
+++ b/mlair/helpers/helpers.py
@@ -4,6 +4,7 @@ __date__ = '2019-10-21'
 
 import inspect
 import math
+import sys
 
 import numpy as np
 import xarray as xr
@@ -80,8 +81,10 @@ def remove_items(obj: Union[List, Dict, Tuple], items: Any):
 
     def remove_from_list(list_obj, item_list):
         """Remove implementation for lists."""
-        if len(items) > 1:
+        if len(item_list) > 1:
             return [e for e in list_obj if e not in item_list]
+        elif len(item_list) == 0:
+            return list_obj
         else:
             list_obj = list_obj.copy()
             try:
@@ -179,3 +182,34 @@ def convert2xrda(arr: Union[xr.DataArray, xr.Dataset, np.ndarray, int, float],
             kwargs.update({'dims': dims, 'coords': coords})
 
         return xr.DataArray(arr, **kwargs)
+
+
+# def convert_size(size_bytes):
+#     if size_bytes == 0:
+#         return "0B"
+#     size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
+#     i = int(math.floor(math.log(size_bytes, 1024)))
+#     p = math.pow(1024, i)
+#     s = round(size_bytes / p, 2)
+#     return "%s %s" % (s, size_name[i])
+#
+#
+# def get_size(obj, seen=None):
+#     """Recursively finds size of objects"""
+#     size = sys.getsizeof(obj)
+#     if seen is None:
+#         seen = set()
+#     obj_id = id(obj)
+#     if obj_id in seen:
+#         return 0
+#     # Important mark as seen *before* entering recursion to gracefully handle
+#     # self-referential objects
+#     seen.add(obj_id)
+#     if isinstance(obj, dict):
+#         size += sum([get_size(v, seen) for v in obj.values()])
+#         size += sum([get_size(k, seen) for k in obj.keys()])
+#     elif hasattr(obj, '__dict__'):
+#         size += get_size(obj.__dict__, seen)
+#     elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
+#         size += sum([get_size(i, seen) for i in obj])
+#     return size
diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index a1e713a8c135800d02ff7c27894485a5da7fae37..af7975f3a042163a885f590c6624076fe91f03aa 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -152,7 +152,10 @@ def min_max_apply(data: Data, _min: Data, _max: Data, feature_range: Data = (0,
     :param feature_range: scale data to any interval given in feature range. Default is scaling on interval [0, 1].
     :return: min/max scaled data
     """
-    return (data - _min) / (_max - _min) * (max(feature_range) - min(feature_range)) + min(feature_range)
+    if not isinstance(feature_range, xr.DataArray):
+        return (data - _min) / (_max - _min) * (max(feature_range) - min(feature_range)) + min(feature_range)
+    else:
+        return (data - _min) / (_max - _min) * (feature_range.max() - feature_range.min()) + feature_range.min()
 
 
 def log(data: Data, dim: Union[str, int]) -> Tuple[Data, Dict[(str, Data)]]:
@@ -284,7 +287,7 @@ class SkillScores:
         combination_strings = [f"{first}-{second}" for (first, second) in combinations]
         return combinations, combination_strings
 
-    def skill_scores(self) -> pd.DataFrame:
+    def skill_scores(self) -> [pd.DataFrame, pd.DataFrame]:
         """
         Calculate skill scores for all combinations of model names.
 
@@ -293,6 +296,7 @@ class SkillScores:
         ahead_names = list(self.external_data[self.ahead_dim].data)
         combinations, combination_strings = self.get_model_name_combinations()
         skill_score = pd.DataFrame(index=combination_strings)
+        count = pd.DataFrame(index=combination_strings)
         for iahead in ahead_names:
             data = self.external_data.sel({self.ahead_dim: iahead})
             skill_score[iahead] = [self.general_skill_score(data,
@@ -300,7 +304,12 @@ class SkillScores:
                                                             reference_name=second,
                                                             observation_name=self.observation_name)
                                    for (first, second) in combinations]
-        return skill_score
+            count[iahead] = [self.get_count(data,
+                                            forecast_name=first,
+                                            reference_name=second,
+                                            observation_name=self.observation_name)
+                             for (first, second) in combinations]
+        return skill_score, count
 
     def climatological_skill_scores(self, internal_data: Data, forecast_name: str) -> xr.DataArray:
         """
@@ -314,7 +323,10 @@ class SkillScores:
 
         :return: all CASES as well as all terms
         """
-        ahead_names = list(self.external_data[self.ahead_dim].data)
+        if self.external_data is not None:
+            ahead_names = list(self.external_data[self.ahead_dim].data)
+        else:
+            ahead_names = list(internal_data[self.ahead_dim].data)
 
         all_terms = ['AI', 'AII', 'AIII', 'AIV', 'BI', 'BII', 'BIV', 'CI', 'CIV', 'CASE I', 'CASE II', 'CASE III',
                      'CASE IV']
@@ -349,7 +361,7 @@ class SkillScores:
                                                                        **kwargs)
 
     def general_skill_score(self, data: Data, forecast_name: str, reference_name: str,
-                            observation_name: str = None) -> np.ndarray:
+                            observation_name: str = None, dim: str = "index") -> np.ndarray:
         r"""
         Calculate general skill score based on mean squared error.
 
@@ -362,14 +374,29 @@ class SkillScores:
         """
         if observation_name is None:
             observation_name = self.observation_name
-        data = data.dropna("index")
+        data = data.dropna(dim)
         observation = data.sel(type=observation_name)
         forecast = data.sel(type=forecast_name)
         reference = data.sel(type=reference_name)
         mse = mean_squared_error
-        skill_score = 1 - mse(observation, forecast) / mse(observation, reference)
+        skill_score = 1 - mse(observation, forecast, dim=dim) / mse(observation, reference, dim=dim)
         return skill_score.values
 
+    def get_count(self, data: Data, forecast_name: str, reference_name: str,
+                            observation_name: str = None) -> np.ndarray:
+        r"""
+        Calculate general skill score based on mean squared error.
+
+        :param data: internal data containing data for observation, forecast and reference
+        :param observation_name: name of observation
+        :param forecast_name: name of forecast
+        :param reference_name: name of reference
+
+        :return: skill score of forecast
+        """
+        data = data.dropna("index")
+        return data.count("index").max().values
+
     def skill_score_pre_calculations(self, data: Data, observation_name: str, forecast_name: str) -> Tuple[np.ndarray,
                                                                                                            np.ndarray,
                                                                                                            np.ndarray,
@@ -475,3 +502,58 @@ class SkillScores:
 
         return monthly_mean
 
+
+def create_single_bootstrap_realization(data: xr.DataArray, dim_name_time: str) -> xr.DataArray:
+    """
+    Return a bootstraped realization of data
+    :param data: data from which to draw ONE bootstrap realization
+    :param dim_name_time: name of time dimension
+    :return: bootstrapped realization of data
+    """
+
+    num_of_blocks = data.coords[dim_name_time].shape[0]
+    boot_idx = np.random.choice(num_of_blocks, size=num_of_blocks, replace=True)
+    return data.isel({dim_name_time: boot_idx})
+
+
+def calculate_average(data: xr.DataArray, **kwargs) -> xr.DataArray:
+    """
+    Calculate mean of data
+    :param data: data for which to calculate mean
+    :return: mean of data
+    """
+    return data.mean(**kwargs)
+
+
+def create_n_bootstrap_realizations(data: xr.DataArray, dim_name_time: str, dim_name_model: str, n_boots: int = 1000,
+                                    dim_name_boots: str = 'boots') -> xr.DataArray:
+    """
+    Create n bootstrap realizations and calculate averages across realizations
+
+    :param data: original data from which to create bootstrap realizations
+    :param dim_name_time: name of time dimension
+    :param dim_name_model: name of model dimension
+    :param n_boots: number of bootstap realizations
+    :param dim_name_boots: name of bootstap dimension
+    :return:
+    """
+    res_dims = [dim_name_boots]
+    dims = list(data.dims)
+    coords = {dim_name_boots: range(n_boots), dim_name_model: data.coords[dim_name_model] }
+    if len(dims) > 1:
+        res_dims = res_dims + dims[1:]
+    res = xr.DataArray(np.nan, dims=res_dims, coords=coords)
+    for boot in range(n_boots):
+        res[boot] = (calculate_average(
+            create_single_bootstrap_realization(data, dim_name_time=dim_name_time),
+            dim=dim_name_time, skipna=True))
+    return res
+
+
+
+
+
+
+
+
+
diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py
index 513f64f2c174d94cb7230b141387c9a850d678cb..6a837993fcf849a860e029d441de910d55888a1b 100644
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
@@ -8,6 +8,7 @@ import os
 import logging
 import multiprocessing
 import psutil
+import sys
 
 import numpy as np
 import pandas as pd
@@ -20,6 +21,8 @@ from mlair.data_handler import DataCollection
 from mlair.helpers import TimeTrackingWrapper, to_list, remove_items
 from mlair.plotting.abstract_plot_class import AbstractPlotClass
 
+matplotlib.use("Agg")
+
 
 @TimeTrackingWrapper
 class PlotStationMap(AbstractPlotClass):  # pragma: no cover
@@ -459,7 +462,7 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
     """
 
     def __init__(self, generators: Dict[str, DataCollection], plot_folder: str = ".", plot_name="histogram",
-                 variables_dim="variables", time_dim="datetime", window_dim="window"):
+                 variables_dim="variables", time_dim="datetime", window_dim="window", upsampling=False):
         super().__init__(plot_folder, plot_name)
         self.variables_dim = variables_dim
         self.time_dim = time_dim
@@ -468,6 +471,8 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
         self.bins = {}
         self.interval_width = {}
         self.bin_edges = {}
+        if upsampling is True:
+            self._handle_upsampling(generators)
 
         # input plots
         for branch_pos in range(number_of_branches):
@@ -483,6 +488,11 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
             self._plot(add_name="target", subset=subset)
         self._plot_combined(add_name="target")
 
+    @staticmethod
+    def _handle_upsampling(generators):
+        if "train" in generators:
+            generators.update({"train_upsampled": generators["train"]})
+
     @staticmethod
     def _get_inputs_targets(gens, dim):
         k = list(gens.keys())[0]
@@ -495,11 +505,15 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
     def _calculate_hist(self, generators, variables, input_data=True, branch_pos=0):
         n_bins = 100
         for set_type, generator in generators.items():
+            upsampling = "upsampled" in set_type
             tmp_bins = {}
             tmp_edges = {}
             end = {}
             start = {}
-            f = lambda x: x.get_X(as_numpy=False)[branch_pos] if input_data is True else x.get_Y(as_numpy=False)
+            if input_data is True:
+                f = lambda x: x.get_X(as_numpy=False, upsampling=upsampling)[branch_pos]
+            else:
+                f = lambda x: x.get_Y(as_numpy=False, upsampling=upsampling)
             for gen in generator:
                 w = min(abs(f(gen).coords[self.window_dim].values))
                 data = f(gen).sel({self.window_dim: w})
@@ -536,6 +550,7 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
         bin_edges = self.bin_edges[subset]
         interval_width = self.interval_width[subset]
         colors = self.get_dataset_colors()
+        colors.update({"train_upsampled": colors.get("train_val", "#000000")})
         for var in bins.keys():
             fig, ax = plt.subplots()
             hist_var = bins[var]
@@ -555,6 +570,7 @@ class PlotDataHistogram(AbstractPlotClass):  # pragma: no cover
         pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
         variables = self.bins[list(self.bins.keys())[0]].keys()
         colors = self.get_dataset_colors()
+        colors.update({"train_upsampled": colors.get("train_val", "#000000")})
         for var in variables:
             fig, ax = plt.subplots()
             for subset in self.bins.keys():
@@ -618,23 +634,27 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
             self._plot_total(raw=True)
             self._plot_total(raw=False)
             if multiple > 1:
-                self._plot_difference(label_names)
+                self._plot_difference(label_names, plot_name_add="_last")
+                self._prepare_pgram(generator, pos, multiple, use_multiprocessing=use_multiprocessing,
+                                    use_last_input_value=False)
+                self._plot_difference(label_names, plot_name_add="_first")
 
     @staticmethod
     def _has_filter_dimension(g, pos):
-        # check if coords raw data differs from input / target data
-        check_data = g.id_class
-        if "filter" not in [check_data.input_data, check_data.target_data][pos].coords.dims:
+        """Inspect if filtered data is provided and return number and labels of filtered components."""
+        check_class = g.id_class
+        check_data = [check_class.get_X(as_numpy=False), check_class.get_Y(as_numpy=False)][pos]
+        if not hasattr(check_class, "filter_dim"):  # data handler has no filtered data
             return 1, []
         else:
-            if len(set(check_data._data[0].coords.dims).symmetric_difference(check_data.input_data.coords.dims)) > 0:
-                return g.id_class.input_data.coords["filter"].shape[0], g.id_class.input_data.coords[
-                    "filter"].values.tolist()
-            else:
+            filter_dim = check_class.filter_dim
+            if filter_dim not in check_data.coords.dims:  # current data has no filter (e.g. target data)
                 return 1, []
+            else:
+                return check_data.coords[filter_dim].shape[0], check_data.coords[filter_dim].values.tolist()
 
     @TimeTrackingWrapper
-    def _prepare_pgram(self, generator, pos, multiple=1, use_multiprocessing=False):
+    def _prepare_pgram(self, generator, pos, multiple=1, use_multiprocessing=False, use_last_input_value=True):
         """
         Create periodogram data.
         """
@@ -648,8 +668,8 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
             plot_data_raw_single = dict()
             plot_data_mean_single = dict()
             self.f_index = np.logspace(-3, 0 if self._sampling == "daily" else np.log10(24), 1000)
-            raw_data_single = self._prepare_pgram_parallel_gen(generator, m, pos, use_multiprocessing)
-            # raw_data_single = self._prepare_pgram_parallel_var(generator, m, pos, use_multiprocessing)
+            raw_data_single = self._prepare_pgram_parallel_gen(generator, m, pos, use_multiprocessing,
+                                                               use_last_input_value=use_last_input_value)
             for var in raw_data_single.keys():
                 pgram_com = []
                 pgram_mean = 0
@@ -701,7 +721,7 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
                     raw_data_single[var_str] = raw_data_single[var_str] + [(f, pgram)]
         return raw_data_single
 
-    def _prepare_pgram_parallel_gen(self, generator, m, pos, use_multiprocessing):
+    def _prepare_pgram_parallel_gen(self, generator, m, pos, use_multiprocessing, use_last_input_value=True):
         """Implementation of data preprocessing using parallel generator element processing."""
         raw_data_single = dict()
         res = []
@@ -709,14 +729,15 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
             pool = multiprocessing.Pool(
                 min([psutil.cpu_count(logical=False), len(generator), 16]))  # use only physical cpus
             output = [
-                pool.apply_async(f_proc_2, args=(g, m, pos, self.variables_dim, self.time_dim, self.f_index))
+                pool.apply_async(f_proc_2, args=(g, m, pos, self.variables_dim, self.time_dim, self.f_index,
+                                                 use_last_input_value))
                 for g in generator]
             for i, p in enumerate(output):
                 res.append(p.get())
             pool.close()
         else:
             for g in generator:
-                res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim, self.f_index))
+                res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim, self.f_index, use_last_input_value))
         for res_dict in res:
             for k, v in res_dict.items():
                 if k not in raw_data_single.keys():
@@ -802,8 +823,8 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         pdf_pages.close()
         plt.close('all')
 
-    def _plot_difference(self, label_names):
-        plot_name = f"{self.plot_name}_{self._sampling}_{self._add_text}_filter.pdf"
+    def _plot_difference(self, label_names, plot_name_add = ""):
+        plot_name = f"{self.plot_name}_{self._sampling}_{self._add_text}_filter{plot_name_add}.pdf"
         plot_path = os.path.join(os.path.abspath(self.plot_folder), plot_name)
         logging.info(f"... plotting {plot_name}")
         pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
@@ -832,21 +853,22 @@ class PlotPeriodogram(AbstractPlotClass):  # pragma: no cover
         plt.close('all')
 
 
-def f_proc(var, d_var, f_index, time_dim="datetime"):  # pragma: no cover
+def f_proc(var, d_var, f_index, time_dim="datetime", use_last_value=True):  # pragma: no cover
     var_str = str(var)
     t = (d_var[time_dim] - d_var[time_dim][0]).astype("timedelta64[h]").values / np.timedelta64(1, "D")
     if len(d_var.shape) > 1:  # use only max value if dimensions are remaining (e.g. max(window) -> latest value)
         to_remove = remove_items(d_var.coords.dims, time_dim)
         for e in to_list(to_remove):
-            d_var = d_var.sel({e: d_var[e].max()})
+            d_var = d_var.sel({e: d_var[e].max() if use_last_value is True else d_var[e].min()})
     pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").power(f_index)
     # f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").autopower()
     return var_str, f_index, pgram
 
 
-
-def f_proc_2(g, m, pos, variables_dim, time_dim, f_index):  # pragma: no cover
+def f_proc_2(g, m, pos, variables_dim, time_dim, f_index, use_last_value):  # pragma: no cover
     raw_data_single = dict()
+    if hasattr(g.id_class, "lazy"):
+        g.id_class.load_lazy() if g.id_class.lazy is True else None
     if m == 0:
         d = g.id_class._data
     else:
@@ -856,8 +878,10 @@ def f_proc_2(g, m, pos, variables_dim, time_dim, f_index):  # pragma: no cover
     d = d[pos] if isinstance(d, tuple) else d
     for var in d[variables_dim].values:
         d_var = d.loc[{variables_dim: var}].squeeze().dropna(time_dim)
-        var_str, f, pgram = f_proc(var, d_var, f_index)
+        var_str, f, pgram = f_proc(var, d_var, f_index, use_last_value=use_last_value)
         raw_data_single[var_str] = [(f, pgram)]
+    if hasattr(g.id_class, "lazy"):
+        g.id_class.clean_up() if g.id_class.lazy is True else None
     return raw_data_single
 
 
@@ -888,6 +912,8 @@ class PlotClimateFirFilter(AbstractPlotClass):
 
         from mlair.helpers.filter import fir_filter_convolve
 
+        logging.info(f"start PlotClimateFirFilter for ({name})")
+
         # adjust default plot parameters
         rc_params = {
             'axes.labelsize': 'large',
@@ -951,59 +977,63 @@ class PlotClimateFirFilter(AbstractPlotClass):
             for it0, t0 in enumerate(viz_date_dict.keys()):
                 viz_data = viz_date_dict[t0]
                 residuum_true = None
-                for ifilter in sorted(viz_data.keys()):
-                    data = viz_data[ifilter]
-                    filter_input = data["filter_input"]
-                    filter_input_nc = data["filter_input_nc"] if residuum_true is None else residuum_true.sel(
-                        {new_dim: filter_input.coords[new_dim]})
-                    valid_range = data["valid_range"]
-                    time_axis = data["time_range"]
-                    filter_order = data["order"]
-                    h = data["h"]
-                    fig, ax = plt.subplots()
-
-                    # plot backgrounds
-                    self._plot_valid_area(ax, t0, valid_range, td_type)
-                    self._plot_t0(ax, t0)
-
-                    # original data
-                    self._plot_original_data(ax, time_axis, filter_input_nc)
-
-                    # clim apriori
-                    self._plot_apriori(ax, time_axis, filter_input, new_dim, ifilter)
-
-                    # clim filter response
-                    residuum_estimated = self._plot_clim_filter(ax, time_axis, filter_input, new_dim, h,
+                try:
+                    for ifilter in sorted(viz_data.keys()):
+                        data = viz_data[ifilter]
+                        filter_input = data["filter_input"]
+                        filter_input_nc = data["filter_input_nc"] if residuum_true is None else residuum_true.sel(
+                            {new_dim: filter_input.coords[new_dim]})
+                        valid_range = data["valid_range"]
+                        time_axis = data["time_range"]
+                        filter_order = data["order"]
+                        h = data["h"]
+                        fig, ax = plt.subplots()
+
+                        # plot backgrounds
+                        self._plot_valid_area(ax, t0, valid_range, td_type)
+                        self._plot_t0(ax, t0)
+
+                        # original data
+                        self._plot_original_data(ax, time_axis, filter_input_nc)
+
+                        # clim apriori
+                        self._plot_apriori(ax, time_axis, filter_input, new_dim, ifilter)
+
+                        # clim filter response
+                        residuum_estimated = self._plot_clim_filter(ax, time_axis, filter_input, new_dim, h,
+                                                                    output_dtypes=filter_input.dtype)
+
+                        # ideal filter response
+                        residuum_true = self._plot_ideal_filter(ax, time_axis, filter_input_nc, new_dim, h,
                                                                 output_dtypes=filter_input.dtype)
 
-                    # ideal filter response
-                    residuum_true = self._plot_ideal_filter(ax, time_axis, filter_input_nc, new_dim, h,
-                                                            output_dtypes=filter_input.dtype)
-
-                    # set title, legend, and save plot
-                    xlims = self._set_xlim(ax, t0, filter_order, valid_range, td_type, time_axis)
-
-                    plt.title(f"Input of ClimFilter ({str(var)})")
-                    plt.legend()
-                    fig.autofmt_xdate()
-                    plt.tight_layout()
-                    self.plot_name = f"climFIR_{self._name}_{str(var)}_{it0}_{ifilter}"
-                    self._save()
-
-                    # plot residuum
-                    fig, ax = plt.subplots()
-                    self._plot_valid_area(ax, t0, valid_range, td_type)
-                    self._plot_t0(ax, t0)
-                    self._plot_series(ax, time_axis, residuum_true.values.flatten(), style="ideal")
-                    self._plot_series(ax, time_axis, residuum_estimated.values.flatten(), style="clim")
-                    ax.set_xlim(xlims)
-                    plt.title(f"Residuum of ClimFilter ({str(var)})")
-                    plt.legend(loc="upper left")
-                    fig.autofmt_xdate()
-                    plt.tight_layout()
-
-                    self.plot_name = f"climFIR_{self._name}_{str(var)}_{it0}_{ifilter}_residuum"
-                    self._save()
+                        # set title, legend, and save plot
+                        xlims = self._set_xlim(ax, t0, filter_order, valid_range, td_type, time_axis)
+
+                        plt.title(f"Input of ClimFilter ({str(var)})")
+                        plt.legend()
+                        fig.autofmt_xdate()
+                        plt.tight_layout()
+                        self.plot_name = f"climFIR_{self._name}_{str(var)}_{it0}_{ifilter}"
+                        self._save()
+
+                        # plot residuum
+                        fig, ax = plt.subplots()
+                        self._plot_valid_area(ax, t0, valid_range, td_type)
+                        self._plot_t0(ax, t0)
+                        self._plot_series(ax, time_axis, residuum_true.values.flatten(), style="ideal")
+                        self._plot_series(ax, time_axis, residuum_estimated.values.flatten(), style="clim")
+                        ax.set_xlim(xlims)
+                        plt.title(f"Residuum of ClimFilter ({str(var)})")
+                        plt.legend(loc="upper left")
+                        fig.autofmt_xdate()
+                        plt.tight_layout()
+
+                        self.plot_name = f"climFIR_{self._name}_{str(var)}_{it0}_{ifilter}_residuum"
+                        self._save()
+                except Exception as e:
+                    logging.info(f"Could not create plot because of:\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
+                    pass
 
     def _set_xlim(self, ax, t0, order, valid_range, td_type, time_axis):
         """
diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index ecba8a4e0a3369fbb170a7427ef81365d531bc3b..43f1864f7354c1f711bb886f4f97eda56439ab89 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -15,6 +15,7 @@ import pandas as pd
 import seaborn as sns
 import xarray as xr
 from matplotlib.backends.backend_pdf import PdfPages
+from matplotlib.offsetbox import AnchoredText
 
 from mlair import helpers
 from mlair.data_handler.iterator import DataCollection
@@ -30,7 +31,7 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
 
 
 @TimeTrackingWrapper
-class PlotMonthlySummary(AbstractPlotClass):
+class PlotMonthlySummary(AbstractPlotClass):  # pragma: no cover
     """
     Show a monthly summary over all stations for each lead time ("ahead") as box and whiskers plot.
 
@@ -128,7 +129,7 @@ class PlotMonthlySummary(AbstractPlotClass):
         logging.debug("... start plotting")
         color_palette = [matplotlib.colors.cnames["green"]] + sns.color_palette("Blues_d",
                                                                                 self._window_lead_time).as_hex()
-        ax = sns.boxplot(x='index', y='values', hue='ahead', data=data.compute(), whis=1., palette=color_palette,
+        ax = sns.boxplot(x='index', y='values', hue='ahead', data=data.compute(), whis=1.5, palette=color_palette,
                          flierprops={'marker': '.', 'markersize': 1}, showmeans=True,
                          meanprops={'markersize': 1, 'markeredgecolor': 'k'})
         ylabel = self._spell_out_chemical_concentrations(target_var)
@@ -137,7 +138,7 @@ class PlotMonthlySummary(AbstractPlotClass):
 
 
 @TimeTrackingWrapper
-class PlotConditionalQuantiles(AbstractPlotClass):
+class PlotConditionalQuantiles(AbstractPlotClass):  # pragma: no cover
     """
     Create cond.quantile plots as originally proposed by Murphy, Brown and Chen (1989) [But in log scale].
 
@@ -381,7 +382,7 @@ class PlotConditionalQuantiles(AbstractPlotClass):
 
 
 @TimeTrackingWrapper
-class PlotClimatologicalSkillScore(AbstractPlotClass):
+class PlotClimatologicalSkillScore(AbstractPlotClass):  # pragma: no cover
     """
     Create plot of climatological skill score after Murphy (1988) as box plot over all stations.
 
@@ -448,7 +449,7 @@ class PlotClimatologicalSkillScore(AbstractPlotClass):
         fig, ax = plt.subplots()
         if not score_only:
             fig.set_size_inches(11.7, 8.27)
-        sns.boxplot(x="terms", y="data", hue="ahead", data=self._data, ax=ax, whis=1., palette="Blues_d",
+        sns.boxplot(x="terms", y="data", hue="ahead", data=self._data, ax=ax, whis=1.5, palette="Blues_d",
                     showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
         ax.axhline(y=0, color="grey", linewidth=.5)
         ax.set(ylabel=f"{self._label_add(score_only)}skill score", xlabel="", title="summary of all stations",
@@ -473,7 +474,7 @@ class PlotClimatologicalSkillScore(AbstractPlotClass):
 
 
 @TimeTrackingWrapper
-class PlotCompetitiveSkillScore(AbstractPlotClass):
+class PlotCompetitiveSkillScore(AbstractPlotClass):  # pragma: no cover
     """
     Create competitive skill score plot.
 
@@ -491,12 +492,12 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
 
     """
 
-    def __init__(self, data: pd.DataFrame, plot_folder=".", model_setup="NN"):
+    def __init__(self, data: Dict[str, pd.DataFrame], plot_folder=".", model_setup="NN"):
         """Initialise."""
         super().__init__(plot_folder, f"skill_score_competitive_{model_setup}")
         self._model_setup = model_setup
         self._labels = None
-        self._data = self._prepare_data(data)
+        self._data = self._prepare_data(helpers.remove_items(data, "total"))
         default_plot_name = self.plot_name
         # draw full detail plot
         self.plot_name = default_plot_name + "_full_detail"
@@ -538,7 +539,7 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
         fig, ax = plt.subplots(figsize=(size, size * 0.8))
         data = self._filter_comparisons(self._data) if single_model_comparison is True else self._data
         order = self._create_pseudo_order(data)
-        sns.boxplot(x="comparison", y="data", hue="ahead", data=data, whis=1., ax=ax, palette="Blues_d",
+        sns.boxplot(x="comparison", y="data", hue="ahead", data=data, whis=1.5, ax=ax, palette="Blues_d",
                     showmeans=True, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."},
                     order=order)
         ax.axhline(y=0, color="grey", linewidth=.5)
@@ -553,7 +554,7 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
         fig, ax = plt.subplots()
         data = self._filter_comparisons(self._data) if single_model_comparison is True else self._data
         order = self._create_pseudo_order(data)
-        sns.boxplot(y="comparison", x="data", hue="ahead", data=data, whis=1., ax=ax, palette="Blues_d",
+        sns.boxplot(y="comparison", x="data", hue="ahead", data=data, whis=1.5, ax=ax, palette="Blues_d",
                     showmeans=True, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."},
                     order=order)
         ax.axvline(x=0, color="grey", linewidth=.5)
@@ -590,14 +591,9 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):
 
 
 @TimeTrackingWrapper
-class PlotBootstrapSkillScore(AbstractPlotClass):
+class PlotFeatureImportanceSkillScore(AbstractPlotClass):  # pragma: no cover
     """
-    Create plot of climatological skill score after Murphy (1988) as box plot over all stations.
-
-    A forecast time step (called "ahead") is separately shown to highlight the differences for each prediction time
-    step. Either each single term is plotted (score_only=False) or only the resulting scores CASE I to IV are displayed
-    (score_only=True, default). Y-axis is adjusted following the data and not hard coded. The plot is saved under
-    plot_folder path with name skill_score_clim_{extra_name_tag}{model_setup}.pdf and resolution of 500dpi.
+    Create plot of feature importance analysis.
 
     By passing a list `separate_vars` containing variable names, a second plot is created showing the `separate_vars`
     and the remaining variables side by side with different scaling.
@@ -612,7 +608,8 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
 
     def __init__(self, data: Dict, plot_folder: str = ".", model_setup: str = "", separate_vars: List = None,
                  sampling: str = "daily", ahead_dim: str = "ahead", bootstrap_type: str = None,
-                 bootstrap_method: str = None):
+                 bootstrap_method: str = None, boot_dim: str = "boots", model_name: str = "NN",
+                 branch_names: list = None, ylim: tuple = None):
         """
         Set attributes and create plot.
 
@@ -625,24 +622,32 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         :param bootstrap_annotation: additional information to use in the file name (default: None)
         """
         annotation = ["_".join([s for s in ["", bootstrap_type, bootstrap_method] if s is not None])][0]
-        super().__init__(plot_folder, f"skill_score_bootstrap_{model_setup}{annotation}")
+        super().__init__(plot_folder, f"feature_importance_{model_setup}{annotation}")
         if separate_vars is None:
             separate_vars = ['o3']
         self._labels = None
         self._x_name = "boot_var"
         self._ahead_dim = ahead_dim
+        self._boot_dim = boot_dim
         self._boot_type = self._set_bootstrap_type(bootstrap_type)
         self._boot_method = self._set_bootstrap_method(bootstrap_method)
+        self._number_of_bootstraps = 0
+        self._branches_names = branch_names
+        self._ylim = ylim
 
-        self._title = f"Bootstrap analysis ({self._boot_method}, {self._boot_type})"
         self._data = self._prepare_data(data, sampling)
+        self._set_title(model_name)
         if "branch" in self._data.columns:
             plot_name = self.plot_name
             for branch in self._data["branch"].unique():
-                self._title = f"Bootstrap analysis ({self._boot_method}, {self._boot_type}, {branch})"
+                self._set_title(model_name, branch)
                 self._plot(branch=branch)
                 self.plot_name = f"{plot_name}_{branch}"
                 self._save()
+                if len(set(separate_vars).intersection(self._data[self._x_name].unique())) > 0:
+                    self.plot_name += '_separated'
+                    self._plot(branch=branch, separate_vars=separate_vars)
+                    self._save(bbox_inches='tight')
         else:
             self._plot()
             self._save()
@@ -655,6 +660,21 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
     def _set_bootstrap_type(boot_type):
         return {"singleinput": "single input"}.get(boot_type, boot_type)
 
+    def _set_title(self, model_name, branch=None):
+        title_d = {"single input": "Single Inputs", "branch": "Input Branches", "variable": "Variables"}
+        base_title = f"{model_name}\nImportance of {title_d[self._boot_type]}"
+
+        additional = []
+        if branch is not None:
+            branch_name = self._branches_names[branch] if self._branches_names is not None else branch
+            additional.append(branch_name)
+        if self._number_of_bootstraps > 1:
+            additional.append(f"n={self._number_of_bootstraps}")
+        additional_title = ", ".join(additional)
+        if len(additional_title) > 0:
+            additional_title = f" ({additional_title})"
+        self._title = base_title + additional_title
+
     @staticmethod
     def _set_bootstrap_method(boot_method):
         return {"zero_mean": "zero mean", "shuffle": "shuffled"}.get(boot_method, boot_method)
@@ -671,14 +691,16 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         """
         station_dim = "station"
         data = helpers.dict_to_xarray(data, station_dim).sortby(self._x_name)
+        data = data.transpose(station_dim, self._ahead_dim, self._boot_dim, self._x_name)
         if self._boot_type == "single input":
             number_tags = self._get_number_tag(data.coords[self._x_name].values, split_by='_')
             new_boot_coords = self._return_vars_without_number_tag(data.coords[self._x_name].values, split_by='_',
                                                                    keep=1, as_unique=True)
-            values = data.values.reshape((data.shape[0], len(new_boot_coords), len(number_tags), data.shape[-1]))
-            data = xr.DataArray(values, coords={station_dim: data.coords["station"], self._x_name: new_boot_coords,
-                                                "branch": number_tags, self._ahead_dim: data.coords[self._ahead_dim]},
-                                dims=[station_dim, self._x_name, "branch", self._ahead_dim])
+            values = data.values.reshape((*data.shape[:3], len(number_tags), len(new_boot_coords)))
+            data = xr.DataArray(values, coords={station_dim: data.coords[station_dim], self._x_name: new_boot_coords,
+                                                "branch": number_tags, self._ahead_dim: data.coords[self._ahead_dim],
+                                                self._boot_dim: data.coords[self._boot_dim]},
+                                dims=[station_dim, self._ahead_dim, self._boot_dim, "branch", self._x_name])
         else:
             try:
                 new_boot_coords = self._return_vars_without_number_tag(data.coords[self._x_name].values, split_by='_',
@@ -690,6 +712,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         self._labels = [str(i) + sampling_letter for i in data.coords[self._ahead_dim].values]
         if station_dim not in data.dims:
             data = data.expand_dims(station_dim)
+        self._number_of_bootstraps = np.unique(data.coords[self._boot_dim].values).shape[0]
         return data.to_dataframe("data").reset_index(level=np.arange(len(data.dims)).tolist())
 
     @staticmethod
@@ -738,10 +761,10 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         if separate_vars is None:
             self._plot_all_variables(branch)
         else:
-            self._plot_selected_variables(separate_vars)
+            self._plot_selected_variables(separate_vars, branch)
 
-    def _plot_selected_variables(self, separate_vars: List):
-        data = self._data
+    def _plot_selected_variables(self, separate_vars: List, branch=None):
+        data = self._data if branch is None else self._data[self._data["branch"] == str(branch)]
         self.raise_error_if_separate_vars_do_not_exist(data, separate_vars, self._x_name)
         all_variables = self._get_unique_values_from_column_of_df(data, self._x_name)
         remaining_vars = helpers.remove_items(all_variables, separate_vars)
@@ -749,22 +772,30 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         data_second = self._select_data(df=data, variables=remaining_vars, column_name=self._x_name)
 
         fig, ax = plt.subplots(nrows=1, ncols=2, gridspec_kw={'width_ratios': [len(separate_vars),
-                                                                               len(remaining_vars)]})
+                                                                               len(remaining_vars)]},
+                               figsize=(len(remaining_vars),len(remaining_vars)/2.))
         if len(separate_vars) > 1:
             first_box_width = .8
         else:
-            first_box_width = 2.
+            first_box_width = .8
 
-        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=data_first, ax=ax[0], whis=1.,
+        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=data_first, ax=ax[0], whis=1.5,
                     palette="Blues_d", showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
-                    flierprops={"marker": "."}, width=first_box_width)
+                    showfliers=False, width=first_box_width)
         ax[0].set(ylabel=f"skill score", xlabel="")
+        if self._ylim is not None:
+            _ylim = self._ylim if isinstance(self._ylim, tuple) else self._ylim[0]
+            ax[0].set(ylim=_ylim)
 
-        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=data_second, ax=ax[1], whis=1.,
+        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=data_second, ax=ax[1], whis=1.5,
                     palette="Blues_d", showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
-                    flierprops={"marker": "."})
+                    showfliers=False)
         ax[1].set(ylabel="", xlabel="")
         ax[1].yaxis.tick_right()
+        if self._ylim is not None and isinstance(self._ylim, list):
+            _ylim = self._ylim[1]
+            ax[1].set(ylim=_ylim)
+
         handles, _ = ax[1].get_legend_handles_labels()
         for sax in ax:
             matplotlib.pyplot.sca(sax)
@@ -772,7 +803,8 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
             plt.xticks(rotation=45, ha='right')
             sax.legend_.remove()
 
-        fig.legend(handles, self._labels, loc='upper center', ncol=len(handles) + 1, )
+        # fig.legend(handles, self._labels, loc='upper center', ncol=len(handles) + 1, )
+        ax[1].legend(handles, self._labels, loc='lower center', ncol=len(handles) + 1, fontsize="medium")
 
         def align_yaxis(ax1, ax2):
             """
@@ -797,6 +829,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
 
         align_yaxis(ax[0], ax[1])
         align_yaxis(ax[0], ax[1])
+        plt.subplots_adjust(right=0.8)
         plt.title(self._title)
 
     @staticmethod
@@ -826,12 +859,29 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
         """
 
         """
-        fig, ax = plt.subplots()
         plot_data = self._data if branch is None else self._data[self._data["branch"] == str(branch)]
-        sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=plot_data, ax=ax, whis=1., palette="Blues_d",
-                    showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
+        if self._boot_type == "branch":
+            fig, ax = plt.subplots(figsize=(0.5 + 2 / len(plot_data[self._x_name].unique()) + len(plot_data[self._x_name].unique()),4))
+            sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=plot_data, ax=ax, whis=1.,
+                        palette="Blues_d", showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
+                        showfliers=False, width=0.8)
+        else:
+            fig, ax = plt.subplots()
+            sns.boxplot(x=self._x_name, y="data", hue=self._ahead_dim, data=plot_data, ax=ax, whis=1.5, palette="Blues_d",
+                        showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, showfliers=False)
         ax.axhline(y=0, color="grey", linewidth=.5)
-        plt.xticks(rotation=45)
+
+        if self._ylim is not None:
+            if isinstance(self._ylim, tuple):
+                _ylim = self._ylim
+            else:
+                _ylim = (min(self._ylim[0][0], self._ylim[1][0]), max(self._ylim[0][1], self._ylim[1][1]))
+            ax.set(ylim=_ylim)
+
+        if self._boot_type == "branch":
+            plt.xticks()
+        else:
+            plt.xticks(rotation=45)
         ax.set(ylabel=f"skill score", xlabel="", title=self._title)
         handles, _ = ax.get_legend_handles_labels()
         ax.legend(handles, self._labels)
@@ -839,7 +889,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
 
 
 @TimeTrackingWrapper
-class PlotTimeSeries:
+class PlotTimeSeries:  # pragma: no cover
     """
     Create time series plot.
 
@@ -847,13 +897,14 @@ class PlotTimeSeries:
     """
 
     def __init__(self, stations: List, data_path: str, name: str, window_lead_time: int = None, plot_folder: str = ".",
-                 sampling="daily", model_name="nn", obs_name="obs"):
+                 sampling="daily", model_name="nn", obs_name="obs", ahead_dim="ahead"):
         """Initialise."""
         self._data_path = data_path
         self._data_name = name
         self._stations = stations
         self._model_name = model_name
         self._obs_name = obs_name
+        self._ahead_dim = ahead_dim
         self._window_lead_time = self._get_window_lead_time(window_lead_time)
         self._sampling = self._get_sampling(sampling)
         self._plot(plot_folder)
@@ -876,7 +927,7 @@ class PlotTimeSeries:
         :param window_lead_time: lead time from arguments to validate
         :return: validated lead time, comes either from given argument or from data itself
         """
-        ahead_steps = len(self._load_data(self._stations[0]).ahead)
+        ahead_steps = len(self._load_data(self._stations[0]).coords[self._ahead_dim])
         if window_lead_time is None:
             window_lead_time = ahead_steps
         return min(ahead_steps, window_lead_time)
@@ -898,10 +949,13 @@ class PlotTimeSeries:
                 data_year = data.sel(index=f"{start + i_year}")
                 for i_half_of_year in range(factor):
                     pos = factor * i_year + i_half_of_year
-                    plot_data = self._create_plot_data(data_year, factor, i_half_of_year)
-                    self._plot_obs(axes[pos], plot_data)
-                    self._plot_ahead(axes[pos], plot_data)
-                    if np.isnan(plot_data.values).all():
+                    try:
+                        plot_data = self._create_plot_data(data_year, factor, i_half_of_year)
+                        self._plot_obs(axes[pos], plot_data)
+                        self._plot_ahead(axes[pos], plot_data)
+                        if np.isnan(plot_data.values).all():
+                            nan_list.append(pos)
+                    except Exception:
                         nan_list.append(pos)
             self._clean_up_axes(nan_list, axes, fig)
             self._save_page(station, pdf_pages)
@@ -938,9 +992,8 @@ class PlotTimeSeries:
 
     def _plot_ahead(self, ax, data):
         color = sns.color_palette("Blues_d", self._window_lead_time).as_hex()
-        for ahead in data.coords["ahead"].values:
-            plot_data = data.sel(type=self._model_name, ahead=ahead).drop(["type", "ahead"]).squeeze().shift(
-                index=ahead)
+        for ahead in data.coords[self._ahead_dim].values:
+            plot_data = data.sel({"type": self._model_name, self._ahead_dim: ahead}).drop(["type", self._ahead_dim]).squeeze().shift(index=ahead)
             label = f"{ahead}{self._sampling}"
             ax.plot(plot_data, color=color[ahead - 1], label=label)
 
@@ -969,7 +1022,7 @@ class PlotTimeSeries:
 
 
 @TimeTrackingWrapper
-class PlotSeparationOfScales(AbstractPlotClass):
+class PlotSeparationOfScales(AbstractPlotClass):  # pragma: no cover
 
     def __init__(self, collection: DataCollection, plot_folder: str = ".", time_dim="datetime", window_dim="window",
                  filter_dim="filter", target_dim="variables"):
@@ -995,6 +1048,80 @@ class PlotSeparationOfScales(AbstractPlotClass):
             self._save()
 
 
+@TimeTrackingWrapper
+class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
+
+    def __init__(self, data: xr.DataArray, plot_folder: str = ".", model_type_dim: str = "type",
+                 error_measure: str = "mse", error_unit: str = None, dim_name_boots: str = 'boots',
+                 block_length: str = None):
+        super().__init__(plot_folder, "sample_uncertainty_from_bootstrap")
+        default_name = self.plot_name
+        self.model_type_dim = model_type_dim
+        self.error_measure = error_measure
+        self.dim_name_boots = dim_name_boots
+        self.error_unit = error_unit
+        self.block_length = block_length
+        self.prepare_data(data)
+        self._plot(orientation="v")
+
+        self.plot_name = default_name + "_horizontal"
+        self._plot(orientation="h")
+
+        self._apply_root()
+
+        self.plot_name = default_name + "_sqrt"
+        self._plot(orientation="v")
+
+        self.plot_name = default_name + "_horizontal_sqrt"
+        self._plot(orientation="h")
+
+        self._data_table = None
+        self._n_boots = None
+
+    def prepare_data(self, data: xr.DataArray):
+        self._data_table = data.to_pandas()
+        if "persi" in self._data_table.columns:
+            self._data_table["persi"] = self._data_table.pop("persi")
+        self._n_boots = self._data_table.shape[0]
+
+    def _apply_root(self):
+        self._data_table = np.sqrt(self._data_table)
+        self.error_measure = f"root {self.error_measure}"
+        self.error_unit = self.error_unit.replace("$^2$", "")
+
+    def _plot(self, orientation: str = "v"):
+        data_table = self._data_table
+        n_boots = self._n_boots
+        size = len(np.unique(data_table.columns))
+        if orientation == "v":
+            figsize, width = (size, 5), 0.4
+        elif orientation == "h":
+            figsize, width = (6, (1+.5*size)), 0.65
+        else:
+            raise ValueError(f"orientation must be `v' or `h' but is: {orientation}")
+        fig, ax = plt.subplots(figsize=figsize)
+        sns.boxplot(data=data_table, ax=ax, whis=1.5, color="white",
+                    showmeans=True, meanprops={"markersize": 6, "markeredgecolor": "k"},
+                    flierprops={"marker": "o", "markerfacecolor": "black", "markeredgecolor": "none", "markersize": 3},
+                    boxprops={'facecolor': 'none', 'edgecolor': 'k'},
+                    width=width, orient=orientation)
+        if orientation == "v":
+            ax.set_ylabel(f"{self.error_measure} (in {self.error_unit})")
+            ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
+        elif orientation == "h":
+            ax.set_xlabel(f"{self.error_measure} (in {self.error_unit})")
+        else:
+            raise ValueError(f"orientation must be `v' or `h' but is: {orientation}")
+        text = f"n={n_boots}" if self.block_length is None else f"{self.block_length}, n={n_boots}"
+        text_box = AnchoredText(text, frameon=True, loc=1, pad=0.5)
+        plt.setp(text_box.patch, edgecolor='k', facecolor='w')
+        ax.add_artist(text_box)
+        plt.setp(ax.lines, color='k')
+        plt.tight_layout()
+        self._save()
+        plt.close("all")
+
+
 if __name__ == "__main__":
     stations = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']
     path = "../../testrun_network/forecasts"
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index 209859c1ff38efe2667c918aa5b79c96f2524be0..63be6eb4c6e8b5f8d3149df023e07d23805f077f 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -18,10 +18,12 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
     DEFAULT_WINDOW_DIM, DEFAULT_DIMENSIONS, DEFAULT_TIME_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_INTERPOLATION_LIMIT, \
     DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \
     DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
-    DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
-    DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \
+    DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_FEATURE_IMPORTANCE, DEFAULT_FEATURE_IMPORTANCE_CREATE_NEW_BOOTSTRAPS, \
+    DEFAULT_FEATURE_IMPORTANCE_N_BOOTS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \
     DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG, DEFAULT_MAX_NUMBER_MULTIPROCESSING, \
-    DEFAULT_BOOTSTRAP_TYPE, DEFAULT_BOOTSTRAP_METHOD
+    DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE, DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_METHOD, DEFAULT_OVERWRITE_LAZY_DATA, \
+    DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH, DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS, \
+    DEFAULT_UNCERTAINTY_ESTIMATE_N_BOOTS, DEFAULT_DO_UNCERTAINTY_ESTIMATE
 from mlair.data_handler import DefaultDataHandler
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel
@@ -212,13 +214,17 @@ class ExperimentSetup(RunEnvironment):
                  sampling: str = None,
                  create_new_model=None, bootstrap_path=None, permute_data_on_training=None, transformation=None,
                  train_min_length=None, val_min_length=None, test_min_length=None, extreme_values: list = None,
-                 extremes_on_right_tail_only: bool = None, evaluate_bootstraps=None, plot_list=None,
-                 number_of_bootstraps=None, create_new_bootstraps=None, bootstrap_method=None, bootstrap_type=None,
+                 extremes_on_right_tail_only: bool = None, evaluate_feature_importance: bool = None, plot_list=None,
+                 feature_importance_n_boots: int = None, feature_importance_create_new_bootstraps: bool = None,
+                 feature_importance_bootstrap_method=None, feature_importance_bootstrap_type=None,
                  data_path: str = None, batch_path: str = None, login_nodes=None,
                  hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None,
                  data_origin: Dict = None, competitors: list = None, competitor_path: str = None,
                  use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None,
-                 max_number_multiprocessing: int = None, start_script: Union[Callable, str] = None, **kwargs):
+                 max_number_multiprocessing: int = None, start_script: Union[Callable, str] = None,
+                 overwrite_lazy_data: bool = None, uncertainty_estimate_block_length: str = None,
+                 uncertainty_estimate_evaluate_competitors: bool = None, uncertainty_estimate_n_boots: int = None,
+                 do_uncertainty_estimate: bool = None, **kwargs):
 
         # create run framework
         super().__init__()
@@ -287,6 +293,10 @@ class ExperimentSetup(RunEnvironment):
         self._set_param("logging_path", None, os.path.join(experiment_path, "logging"))
         path_config.check_path_and_create(self.data_store.get("logging_path"))
 
+        # set tmp path
+        self._set_param("tmp_path", None, os.path.join(experiment_path, "tmp"))
+        path_config.check_path_and_create(self.data_store.get("tmp_path"), remove_existing=True)
+
         # setup for data
         self._set_param("stations", stations, default=DEFAULT_STATIONS, apply=helpers.to_list)
         self._set_param("statistics_per_var", statistics_per_var, default=DEFAULT_VAR_ALL_DICT)
@@ -297,6 +307,8 @@ class ExperimentSetup(RunEnvironment):
         self._set_param("window_history_size", window_history_size, default=DEFAULT_WINDOW_HISTORY_SIZE)
         self._set_param("overwrite_local_data", overwrite_local_data, default=DEFAULT_OVERWRITE_LOCAL_DATA,
                         scope="preprocessing")
+        self._set_param("overwrite_lazy_data", overwrite_lazy_data, default=DEFAULT_OVERWRITE_LAZY_DATA,
+                        scope="preprocessing")
         self._set_param("transformation", transformation, default={})
         self._set_param("transformation", None, scope="preprocessing")
         self._set_param("data_handler", data_handler, default=DefaultDataHandler)
@@ -342,15 +354,28 @@ class ExperimentSetup(RunEnvironment):
                         default=DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS)
 
         # set post-processing instructions
-        self._set_param("evaluate_bootstraps", evaluate_bootstraps, default=DEFAULT_EVALUATE_BOOTSTRAPS,
-                        scope="general.postprocessing")
-        create_new_bootstraps = max([self.data_store.get("train_model", "general"),
-                                     create_new_bootstraps or DEFAULT_CREATE_NEW_BOOTSTRAPS])
-        self._set_param("create_new_bootstraps", create_new_bootstraps, scope="general.postprocessing")
-        self._set_param("number_of_bootstraps", number_of_bootstraps, default=DEFAULT_NUMBER_OF_BOOTSTRAPS,
-                        scope="general.postprocessing")
-        self._set_param("bootstrap_method", bootstrap_method, default=DEFAULT_BOOTSTRAP_METHOD)
-        self._set_param("bootstrap_type", bootstrap_type, default=DEFAULT_BOOTSTRAP_TYPE)
+        self._set_param("do_uncertainty_estimate", do_uncertainty_estimate,
+                        default=DEFAULT_DO_UNCERTAINTY_ESTIMATE, scope="general.postprocessing")
+        self._set_param("block_length", uncertainty_estimate_block_length,
+                        default=DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH, scope="uncertainty_estimate")
+        self._set_param("evaluate_competitors", uncertainty_estimate_evaluate_competitors,
+                        default=DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS, scope="uncertainty_estimate")
+        self._set_param("n_boots", uncertainty_estimate_n_boots,
+                        default=DEFAULT_UNCERTAINTY_ESTIMATE_N_BOOTS, scope="uncertainty_estimate")
+
+        self._set_param("evaluate_feature_importance", evaluate_feature_importance,
+                        default=DEFAULT_EVALUATE_FEATURE_IMPORTANCE, scope="general.postprocessing")
+        feature_importance_create_new_bootstraps = max([self.data_store.get("train_model", "general"),
+                                                        feature_importance_create_new_bootstraps or
+                                                        DEFAULT_FEATURE_IMPORTANCE_CREATE_NEW_BOOTSTRAPS])
+        self._set_param("create_new_bootstraps", feature_importance_create_new_bootstraps, scope="feature_importance")
+        self._set_param("n_boots", feature_importance_n_boots, default=DEFAULT_FEATURE_IMPORTANCE_N_BOOTS,
+                        scope="feature_importance")
+        self._set_param("bootstrap_method", feature_importance_bootstrap_method,
+                        default=DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_METHOD, scope="feature_importance")
+        self._set_param("bootstrap_type", feature_importance_bootstrap_type,
+                        default=DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE, scope="feature_importance")
+
         self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing")
         self._set_param("neighbors", ["DEBW030"])  # TODO: just for testing
 
@@ -377,8 +402,10 @@ class ExperimentSetup(RunEnvironment):
                 if len(self.data_store.search_name(k)) == 0:
                     self._set_param(k, v)
                 else:
+                    s = ", ".join([f"{k}({s})={self.data_store.get(k, scope=s)}"
+                                   for s in self.data_store.search_name(k)])
                     raise KeyError(f"Given argument {k} with value {v} cannot be set for this experiment due to a "
-                                   f"conflict with an existing entry with same naming: {k}={self.data_store.get(k)}")
+                                   f"conflict with an existing entry with same naming: {s}")
 
     def _set_param(self, param: str, value: Any, default: Any = None, scope: str = "general",
                    apply: Callable = None) -> Any:
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index f3909fde29b466af1bf64124ab1d57873ae70d18..e3aa2154559622fdd699d430bc4d386499f5114d 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -6,6 +6,8 @@ __date__ = '2019-12-11'
 import inspect
 import logging
 import os
+import sys
+import traceback
 from typing import Dict, Tuple, Union, List, Callable
 
 import keras
@@ -14,13 +16,14 @@ import pandas as pd
 import xarray as xr
 
 from mlair.configuration import path_config
-from mlair.data_handler import BootStraps, KerasIterator
+from mlair.data_handler import Bootstraps, KerasIterator
 from mlair.helpers.datastore import NameNotFoundInDataStore
 from mlair.helpers import TimeTracking, statistics, extract_value, remove_items, to_list, tables
 from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel
 from mlair.model_modules import AbstractModelClass
 from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \
-    PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, PlotSeparationOfScales
+    PlotCompetitiveSkillScore, PlotTimeSeries, PlotFeatureImportanceSkillScore, PlotConditionalQuantiles, \
+    PlotSeparationOfScales, PlotSampleUncertaintyFromBootstrap
 from mlair.plotting.data_insight_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \
     PlotPeriodogram, PlotDataHistogram
 from mlair.run_modules.run_environment import RunEnvironment
@@ -46,7 +49,7 @@ class PostProcessing(RunEnvironment):
         * `target_var` [.]
         * `sampling` [.]
         * `output_shape` [model]
-        * `evaluate_bootstraps` [postprocessing] and if enabled:
+        * `evaluate_feature_importance` [postprocessing] and if enabled:
 
             * `create_new_bootstraps` [postprocessing]
             * `bootstrap_path` [postprocessing]
@@ -81,11 +84,17 @@ class PostProcessing(RunEnvironment):
         self._sampling = self.data_store.get("sampling")
         self.window_lead_time = extract_value(self.data_store.get("output_shape", "model"))
         self.skill_scores = None
-        self.bootstrap_skill_scores = None
+        self.feature_importance_skill_scores = None
+        self.uncertainty_estimate = None
         self.competitor_path = self.data_store.get("competitor_path")
         self.competitors = to_list(self.data_store.get_default("competitors", default=[]))
         self.forecast_indicator = "nn"
+        self.observation_indicator = "obs"
         self.ahead_dim = "ahead"
+        self.boot_var_dim = "boot_var"
+        self.uncertainty_estimate_boot_dim = "boots"
+        self.model_type_dim = "type"
+        self.index_dim = "index"
         self._run()
 
     def _run(self):
@@ -99,25 +108,132 @@ class PostProcessing(RunEnvironment):
         # calculate error metrics on test data
         self.calculate_test_score()
 
-        # bootstraps
-        if self.data_store.get("evaluate_bootstraps", "postprocessing"):
-            with TimeTracking(name="calculate bootstraps"):
-                create_new_bootstraps = self.data_store.get("create_new_bootstraps", "postprocessing")
-                bootstrap_method = self.data_store.get("bootstrap_method", "postprocessing")
-                bootstrap_type = self.data_store.get("bootstrap_type", "postprocessing")
-                self.bootstrap_postprocessing(create_new_bootstraps, bootstrap_type=bootstrap_type,
-                                              bootstrap_method=bootstrap_method)
+        # sample uncertainty
+        if self.data_store.get("do_uncertainty_estimate", "postprocessing"):
+            self.estimate_sample_uncertainty()
+
+        # feature importance bootstraps
+        if self.data_store.get("evaluate_feature_importance", "postprocessing"):
+            with TimeTracking(name="calculate feature importance using bootstraps"):
+                create_new_bootstraps = self.data_store.get("create_new_bootstraps", "feature_importance")
+                bootstrap_method = self.data_store.get("bootstrap_method", "feature_importance")
+                bootstrap_type = self.data_store.get("bootstrap_type", "feature_importance")
+                self.calculate_feature_importance(create_new_bootstraps, bootstrap_type=bootstrap_type,
+                                                  bootstrap_method=bootstrap_method)
+            if self.feature_importance_skill_scores is not None:
+                self.report_feature_importance_results(self.feature_importance_skill_scores)
 
         # skill scores and error metrics
         with TimeTracking(name="calculate skill scores"):
-            skill_score_competitive, skill_score_climatological, errors = self.calculate_error_metrics()
+            skill_score_competitive, _, skill_score_climatological, errors = self.calculate_error_metrics()
             self.skill_scores = (skill_score_competitive, skill_score_climatological)
         self.report_error_metrics(errors)
-        self.report_error_metrics(skill_score_climatological)
+        self.report_error_metrics({self.forecast_indicator: skill_score_climatological})
+        self.report_error_metrics({"skill_score": skill_score_competitive})
 
         # plotting
         self.plot()
 
+    def estimate_sample_uncertainty(self, separate_ahead=False):
+        """
+        Estimate sample uncertainty by using a bootstrap approach. Forecasts are split into individual blocks along time
+        and randomly drawn with replacement. The resulting behaviour of the error indicates the robustness of each
+        analyzed model to quantify which model might be superior compared to others.
+        """
+        n_boots = self.data_store.get_default("n_boots", default=1000, scope="uncertainty_estimate")
+        block_length = self.data_store.get_default("block_length", default="1m", scope="uncertainty_estimate")
+        evaluate_competitors = self.data_store.get_default("evaluate_competitors", default=True,
+                                                           scope="uncertainty_estimate")
+        block_mse = self.calculate_block_mse(evaluate_competitors=evaluate_competitors, separate_ahead=separate_ahead,
+                                             block_length=block_length)
+        self.uncertainty_estimate = statistics.create_n_bootstrap_realizations(
+            block_mse, dim_name_time=self.index_dim, dim_name_model=self.model_type_dim,
+            dim_name_boots=self.uncertainty_estimate_boot_dim, n_boots=n_boots)
+        self.report_sample_uncertainty()
+
+    def report_sample_uncertainty(self, percentiles: list = None):
+        """
+        Store raw results of uncertainty estimate and calculate aggregate statistcs and store as raw data but also as
+        markdown and latex.
+        """
+        report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
+        path_config.check_path_and_create(report_path)
+
+        # store raw results as nc
+        file_name = os.path.join(report_path, "uncertainty_estimate_raw_results.nc")
+        self.uncertainty_estimate.to_netcdf(path=file_name)
+
+        # store statistics
+        if percentiles is None:
+            percentiles = [.05, .1, .25, .5, .75, .9, .95]
+        df_descr = self.uncertainty_estimate.to_pandas().describe(percentiles=percentiles).astype("float32")
+        column_format = tables.create_column_format_for_tex(df_descr)
+        file_name = os.path.join(report_path, "uncertainty_estimate_statistics.%s")
+        tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df_descr)
+        tables.save_to_md(report_path, file_name % "md", df=df_descr)
+        df_descr.to_csv(file_name % "csv", sep=";")
+
+    def calculate_block_mse(self, evaluate_competitors=True, separate_ahead=False, block_length="1m"):
+        """
+        Transform data into blocks along time axis. Block length can be any frequency like '1m' or '7d. Data are only
+        split along time axis, which means that a single block can have very diverse quantities regarding the number of
+        station or actual data contained. This is intended to analyze not only the robustness against the time but also
+        against the number of observations and diversity ot stations.
+        """
+        path = self.data_store.get("forecast_path")
+        all_stations = self.data_store.get("stations")
+        start = self.data_store.get("start", "test")
+        end = self.data_store.get("end", "test")
+        index_dim = self.index_dim
+        coll_dim = "station"
+        collector = []
+        for station in all_stations:
+            # test data
+            external_data = self._get_external_data(station, path)
+            if external_data is not None:
+                pass
+            # competitors
+            if evaluate_competitors is True:
+                competitor = self.load_competitors(station)
+                combined = self._combine_forecasts(external_data, competitor, dim=self.model_type_dim)
+            else:
+                combined = external_data
+
+            if combined is None:
+                continue
+            else:
+                combined = self.create_full_time_dim(combined, index_dim, self._sampling, start, end)
+                # get squared errors
+                errors = self.create_error_array(combined)
+                # calc mse for each block (single station)
+                mse = errors.resample(indexer={index_dim: block_length}).mean(skipna=True)
+                collector.append(mse.assign_coords({coll_dim: station}))
+        # calc mse for each block (average over all stations)
+        mse_blocks = xr.concat(collector, dim=coll_dim).mean(dim=coll_dim, skipna=True)
+        # average also on ahead steps
+        if separate_ahead is False:
+            mse_blocks = mse_blocks.mean(dim=self.ahead_dim, skipna=True)
+        return mse_blocks
+
+    def create_error_array(self, data):
+        """Calculate squared error of all given time series in relation to observation."""
+        errors = data.drop_sel({self.model_type_dim: self.observation_indicator})
+        errors1 = errors - data.sel({self.model_type_dim: self.observation_indicator})
+        errors2 = errors1 ** 2
+        return errors2
+
+    @staticmethod
+    def create_full_time_dim(data, dim, sampling, start, end):
+        """Ensure time dimension to be equidistant. Sometimes dates if missing values have been dropped."""
+        start_data = data.coords[dim].values[0]
+        freq = {"daily": "1D", "hourly": "1H"}.get(sampling)
+        datetime_index = pd.DataFrame(index=pd.date_range(start, end, freq=freq))
+        t = data.sel({dim: start_data}, drop=True)
+        res = xr.DataArray(coords=[datetime_index.index, *[t.coords[c] for c in t.coords]], dims=[dim, *t.coords])
+        res = res.transpose(*data.dims)
+        res.loc[data.coords] = data
+        return res
+
     def load_competitors(self, station_name: str) -> xr.DataArray:
         """
         Load all requested and available competitors for a given station. Forecasts must be available in the competitor
@@ -137,10 +253,10 @@ class PostProcessing(RunEnvironment):
             except (FileNotFoundError, KeyError):
                 logging.debug(f"No competitor found for combination '{station_name}' and '{competitor_name}'.")
                 continue
-        return xr.concat(competing_predictions, "type") if len(competing_predictions) > 0 else None
+        return xr.concat(competing_predictions, self.model_type_dim) if len(competing_predictions) > 0 else None
 
-    def bootstrap_postprocessing(self, create_new_bootstraps: bool, _iter: int = 0, bootstrap_type="singleinput",
-                                 bootstrap_method="shuffle") -> None:
+    def calculate_feature_importance(self, create_new_bootstraps: bool, _iter: int = 0, bootstrap_type="singleinput",
+                                     bootstrap_method="shuffle") -> None:
         """
         Calculate skill scores of bootstrapped data.
 
@@ -153,52 +269,64 @@ class PostProcessing(RunEnvironment):
         :param _iter: internal counter to reduce unnecessary recursive calls (maximum number is 2, otherwise something
             went wrong).
         """
-        self.bootstrap_skill_scores = {}
+        if _iter == 0:
+            self.feature_importance_skill_scores = {}
         for boot_type in to_list(bootstrap_type):
-            self.bootstrap_skill_scores[boot_type] = {}
+            if _iter == 0:
+                self.feature_importance_skill_scores[boot_type] = {}
             for boot_method in to_list(bootstrap_method):
                 try:
                     if create_new_bootstraps:
-                        self.create_bootstrap_forecast(bootstrap_type=boot_type, bootstrap_method=boot_method)
-                    boot_skill_score = self.calculate_bootstrap_skill_scores(bootstrap_type=boot_type,
-                                                                             bootstrap_method=boot_method)
-                    self.bootstrap_skill_scores[boot_type][boot_method] = boot_skill_score
-                except FileNotFoundError:
+                        self.create_feature_importance_bootstrap_forecast(bootstrap_type=boot_type,
+                                                                          bootstrap_method=boot_method)
+                    boot_skill_score = self.calculate_feature_importance_skill_scores(bootstrap_type=boot_type,
+                                                                                      bootstrap_method=boot_method)
+                    self.feature_importance_skill_scores[boot_type][boot_method] = boot_skill_score
+                except (FileNotFoundError, ValueError):
                     if _iter != 0:
-                        raise RuntimeError(f"bootstrap_postprocessing ({boot_type}, {boot_type}) was called for the 2nd"
-                                           f" time. This means, that something internally goes wrong. Please check for "
-                                           f"possible errors")
-                    logging.info(f"Could not load all files for bootstrapping ({boot_type}, {boot_type}), restart "
-                                 f"bootstrap postprocessing with create_new_bootstraps=True.")
-                    self.bootstrap_postprocessing(True, _iter=1, bootstrap_type=boot_type, bootstrap_method=boot_method)
-
-    def create_bootstrap_forecast(self, bootstrap_type, bootstrap_method) -> None:
+                        raise RuntimeError(f"calculate_feature_importance ({boot_type}, {boot_type}) was called for the "
+                                           f"2nd time. This means, that something internally goes wrong. Please check "
+                                           f"for possible errors")
+                    logging.info(f"Could not load all files for feature importance ({boot_type}, {boot_type}), restart "
+                                 f"calculate_feature_importance with create_new_bootstraps=True.")
+                    self.calculate_feature_importance(True, _iter=1, bootstrap_type=boot_type,
+                                                      bootstrap_method=boot_method)
+
+    def create_feature_importance_bootstrap_forecast(self, bootstrap_type, bootstrap_method) -> None:
         """
         Create bootstrapped predictions for all stations and variables.
 
         These forecasts are saved in bootstrap_path with the names `bootstraps_{var}_{station}.nc` and
         `bootstraps_labels_{station}.nc`.
         """
+
+        def _reshape(d, pos):
+            if isinstance(d, list):
+                return list(map(lambda x: _reshape(x, pos), d))
+            else:
+                return d[..., pos]
+
         # forecast
         with TimeTracking(name=f"{inspect.stack()[0].function} ({bootstrap_type}, {bootstrap_method})"):
             # extract all requirements from data store
             forecast_path = self.data_store.get("forecast_path")
-            number_of_bootstraps = self.data_store.get("number_of_bootstraps", "postprocessing")
-            dims = ["index", self.ahead_dim, "type"]
+            number_of_bootstraps = self.data_store.get("n_boots", "feature_importance")
+            dims = [self.uncertainty_estimate_boot_dim, self.index_dim, self.ahead_dim, self.model_type_dim]
             for station in self.test_data:
                 X, Y = None, None
-                bootstraps = BootStraps(station, number_of_bootstraps, bootstrap_type=bootstrap_type,
+                bootstraps = Bootstraps(station, number_of_bootstraps, bootstrap_type=bootstrap_type,
                                         bootstrap_method=bootstrap_method)
+                number_of_bootstraps = bootstraps.number_of_bootstraps
                 for boot in bootstraps:
                     X, Y, (index, dimension) = boot
                     # make bootstrap predictions
-                    bootstrap_predictions = self.model.predict(X)
-                    if isinstance(bootstrap_predictions, list):  # if model is branched model
-                        bootstrap_predictions = bootstrap_predictions[-1]
+                    bootstrap_predictions = [self.model.predict(_reshape(X, pos)) for pos in range(number_of_bootstraps)]
+                    if isinstance(bootstrap_predictions[0], list):  # if model is branched model
+                        bootstrap_predictions = list(map(lambda x: x[-1], bootstrap_predictions))
                     # save bootstrap predictions separately for each station and variable combination
-                    bootstrap_predictions = np.expand_dims(bootstrap_predictions, axis=-1)
-                    shape = bootstrap_predictions.shape
-                    coords = (range(shape[0]), range(1, shape[1] + 1))
+                    bootstrap_predictions = list(map(lambda x: np.expand_dims(x, axis=-1), bootstrap_predictions))
+                    shape = bootstrap_predictions[0].shape
+                    coords = (range(number_of_bootstraps), range(shape[0]), range(1, shape[1] + 1))
                     var = f"{index}_{dimension}" if index is not None else str(dimension)
                     tmp = xr.DataArray(bootstrap_predictions, coords=(*coords, [var]), dims=dims)
                     file_name = os.path.join(forecast_path,
@@ -206,12 +334,12 @@ class PostProcessing(RunEnvironment):
                     tmp.to_netcdf(file_name)
                 else:
                     # store also true labels for each station
-                    labels = np.expand_dims(Y, axis=-1)
+                    labels = np.expand_dims(Y[..., 0], axis=-1)
                     file_name = os.path.join(forecast_path, f"bootstraps_{station}_{bootstrap_method}_labels.nc")
-                    labels = xr.DataArray(labels, coords=(*coords, ["obs"]), dims=dims)
+                    labels = xr.DataArray(labels, coords=(*coords[1:], [self.observation_indicator]), dims=dims[1:])
                     labels.to_netcdf(file_name)
 
-    def calculate_bootstrap_skill_scores(self, bootstrap_type, bootstrap_method) -> Dict[str, xr.DataArray]:
+    def calculate_feature_importance_skill_scores(self, bootstrap_type, bootstrap_method) -> Dict[str, xr.DataArray]:
         """
         Calculate skill score of bootstrapped variables.
 
@@ -224,10 +352,11 @@ class PostProcessing(RunEnvironment):
         with TimeTracking(name=f"{inspect.stack()[0].function} ({bootstrap_type}, {bootstrap_method})"):
             # extract all requirements from data store
             forecast_path = self.data_store.get("forecast_path")
-            number_of_bootstraps = self.data_store.get("number_of_bootstraps", "postprocessing")
+            number_of_bootstraps = self.data_store.get("n_boots", "feature_importance")
             forecast_file = f"forecasts_norm_%s_test.nc"
+            reference_name = "orig"
 
-            bootstraps = BootStraps(self.test_data[0], number_of_bootstraps, bootstrap_type=bootstrap_type,
+            bootstraps = Bootstraps(self.test_data[0], number_of_bootstraps, bootstrap_type=bootstrap_type,
                                     bootstrap_method=bootstrap_method)
             number_of_bootstraps = bootstraps.number_of_bootstraps
             bootstrap_iter = bootstraps.bootstraps()
@@ -238,16 +367,13 @@ class PostProcessing(RunEnvironment):
                 file_name = os.path.join(forecast_path, f"bootstraps_{str(station)}_{bootstrap_method}_labels.nc")
                 with xr.open_dataarray(file_name) as da:
                     labels = da.load()
-                shape = labels.shape
 
                 # get original forecasts
-                orig = self.get_orig_prediction(forecast_path, forecast_file % str(station), number_of_bootstraps)
-                orig = orig.reshape(shape)
-                coords = (range(shape[0]), range(1, shape[1] + 1), ["orig"])
-                orig = xr.DataArray(orig, coords=coords, dims=["index", self.ahead_dim, "type"])
+                orig = self.get_orig_prediction(forecast_path, forecast_file % str(station), reference_name=reference_name)
+                orig.coords[self.index_dim] = labels.coords[self.index_dim]
 
                 # calculate skill scores for each variable
-                skill = pd.DataFrame(columns=range(1, self.window_lead_time + 1))
+                skill = []
                 for boot_set in bootstrap_iter:
                     boot_var = f"{boot_set[0]}_{boot_set[1]}" if isinstance(boot_set, tuple) else str(boot_set)
                     file_name = os.path.join(forecast_path,
@@ -259,27 +385,34 @@ class PostProcessing(RunEnvironment):
                     for ahead in range(1, self.window_lead_time + 1):
                         data = boot_data.sel({self.ahead_dim: ahead})
                         boot_scores.append(
-                            skill_scores.general_skill_score(data, forecast_name=boot_var, reference_name="orig"))
-                    skill.loc[boot_var] = np.array(boot_scores)
+                            skill_scores.general_skill_score(data, forecast_name=boot_var,
+                                                             reference_name=reference_name, dim=self.index_dim))
+                    tmp = xr.DataArray(np.expand_dims(np.array(boot_scores), axis=-1),
+                                       coords={self.ahead_dim: range(1, self.window_lead_time + 1),
+                                               self.uncertainty_estimate_boot_dim: range(number_of_bootstraps),
+                                               self.boot_var_dim: [boot_var]},
+                                       dims=[self.ahead_dim, self.uncertainty_estimate_boot_dim, self.boot_var_dim])
+                    skill.append(tmp)
 
                 # collect all results in single dictionary
-                score[str(station)] = xr.DataArray(skill, dims=["boot_var", self.ahead_dim])
+                score[str(station)] = xr.concat(skill, dim=self.boot_var_dim)
             return score
 
-    def get_orig_prediction(self, path, file_name, number_of_bootstraps, prediction_name=None):
+    def get_orig_prediction(self, path, file_name, prediction_name=None, reference_name=None):
         if prediction_name is None:
             prediction_name = self.forecast_indicator
         file = os.path.join(path, file_name)
         with xr.open_dataarray(file) as da:
-            prediction = da.load().sel(type=prediction_name).squeeze()
-        return self.repeat_data(prediction, number_of_bootstraps)
+            prediction = da.load().sel({self.model_type_dim: [prediction_name]})
+        if reference_name is not None:
+            prediction.coords[self.model_type_dim] = [reference_name]
+        return prediction.dropna(dim=self.index_dim)
 
     @staticmethod
     def repeat_data(data, number_of_repetition):
         if isinstance(data, xr.DataArray):
             data = data.data
-        vals = np.tile(data, (number_of_repetition, 1))
-        return vals[~np.isnan(vals).any(axis=1), :]
+        return np.repeat(np.expand_dims(data, axis=-1), number_of_repetition, axis=-1)
 
     def _get_model_name(self):
         """Return model name without path information."""
@@ -336,35 +469,40 @@ class PostProcessing(RunEnvironment):
                 PlotSeparationOfScales(self.test_data, plot_folder=self.plot_path, time_dim=time_dim,
                                        window_dim=window_dim, target_dim=target_dim, **{"filter_dim": filter_dim})
         except Exception as e:
-            logging.error(f"Could not create plot PlotSeparationOfScales due to the following error: {e}")
+            logging.error(f"Could not create plot PlotSeparationOfScales due to the following error:"
+                          f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
         try:
-            if (self.bootstrap_skill_scores is not None) and ("PlotBootstrapSkillScore" in plot_list):
-                for boot_type, boot_data in self.bootstrap_skill_scores.items():
+            if (self.feature_importance_skill_scores is not None) and ("PlotFeatureImportanceSkillScore" in plot_list):
+                for boot_type, boot_data in self.feature_importance_skill_scores.items():
                     for boot_method, boot_skill_score in boot_data.items():
                         try:
-                            PlotBootstrapSkillScore(boot_skill_score, plot_folder=self.plot_path,
-                                                    model_setup=self.forecast_indicator, sampling=self._sampling,
-                                                    ahead_dim=self.ahead_dim, separate_vars=to_list(self.target_var),
-                                                    bootstrap_type=boot_type, bootstrap_method=boot_method)
+                            PlotFeatureImportanceSkillScore(
+                                boot_skill_score, plot_folder=self.plot_path, model_setup=self.forecast_indicator,
+                                sampling=self._sampling, ahead_dim=self.ahead_dim,
+                                separate_vars=to_list(self.target_var), bootstrap_type=boot_type,
+                                bootstrap_method=boot_method)
                         except Exception as e:
-                            logging.error(f"Could not create plot PlotBootstrapSkillScore ({boot_type}, {boot_method}) "
-                                          f"due to the following error: {e}")
+                            logging.error(f"Could not create plot PlotFeatureImportanceSkillScore ({boot_type}, "
+                                          f"{boot_method}) due to the following error:\n{sys.exc_info()[0]}\n"
+                                          f"{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
         except Exception as e:
-            logging.error(f"Could not create plot PlotBootstrapSkillScore due to the following error: {e}")
+            logging.error(f"Could not create plot PlotFeatureImportanceSkillScore due to the following error: {e}")
 
         try:
             if "PlotConditionalQuantiles" in plot_list:
                 PlotConditionalQuantiles(self.test_data.keys(), data_pred_path=path, plot_folder=self.plot_path)
         except Exception as e:
-            logging.error(f"Could not create plot PlotConditionalQuantiles due to the following error: {e}")
+            logging.error(f"Could not create plot PlotConditionalQuantiles due to the following error:"
+                          f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
         try:
             if "PlotMonthlySummary" in plot_list:
                 PlotMonthlySummary(self.test_data.keys(), path, r"forecasts_%s_test.nc", self.target_var,
                                    plot_folder=self.plot_path)
         except Exception as e:
-            logging.error(f"Could not create plot PlotMonthlySummary due to the following error: {e}")
+            logging.error(f"Could not create plot PlotMonthlySummary due to the following error:"
+                          f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
         try:
             if "PlotClimatologicalSkillScore" in plot_list:
@@ -373,21 +511,24 @@ class PostProcessing(RunEnvironment):
                 PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path, score_only=False,
                                              extra_name_tag="all_terms_", model_setup=self.forecast_indicator)
         except Exception as e:
-            logging.error(f"Could not create plot PlotClimatologicalSkillScore due to the following error: {e}")
+            logging.error(f"Could not create plot PlotClimatologicalSkillScore due to the following error: {e}"
+                          f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
         try:
             if "PlotCompetitiveSkillScore" in plot_list:
                 PlotCompetitiveSkillScore(self.skill_scores[0], plot_folder=self.plot_path,
                                           model_setup=self.forecast_indicator)
         except Exception as e:
-            logging.error(f"Could not create plot PlotCompetitiveSkillScore due to the following error: {e}")
+            logging.error(f"Could not create plot PlotCompetitiveSkillScore due to the following error: {e}"
+                          f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
         try:
             if "PlotTimeSeries" in plot_list:
                 PlotTimeSeries(self.test_data.keys(), path, r"forecasts_%s_test.nc", plot_folder=self.plot_path,
-                               sampling=self._sampling)
+                               sampling=self._sampling, ahead_dim=self.ahead_dim)
         except Exception as e:
-            logging.error(f"Could not create plot PlotTimeSeries due to the following error: {e}")
+            logging.error(f"Could not create plot PlotTimeSeries due to the following error:\n{sys.exc_info()[0]}\n"
+                          f"{sys.exc_info()[1]}\n{sys.exc_info()[2]}\n{traceback.format_exc()}")
 
         try:
             if "PlotStationMap" in plot_list:
@@ -404,7 +545,8 @@ class PostProcessing(RunEnvironment):
                             (self.test_data, {"marker": 9, "ms": 9})]
                     PlotStationMap(generators=gens, plot_folder=self.plot_path, plot_name="station_map_var")
         except Exception as e:
-            logging.error(f"Could not create plot PlotStationMap due to the following error: {e}")
+            logging.error(f"Could not create plot PlotStationMap due to the following error: {e}"
+                          f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
         try:
             if "PlotAvailability" in plot_list:
@@ -412,7 +554,8 @@ class PostProcessing(RunEnvironment):
                 PlotAvailability(avail_data, plot_folder=self.plot_path, time_dimension=time_dim,
                                  window_dimension=window_dim)
         except Exception as e:
-            logging.error(f"Could not create plot PlotAvailability due to the following error: {e}")
+            logging.error(f"Could not create plot PlotAvailability due to the following error: {e}"
+                          f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
         try:
             if "PlotAvailabilityHistogram" in plot_list:
@@ -420,7 +563,8 @@ class PostProcessing(RunEnvironment):
                 PlotAvailabilityHistogram(avail_data, plot_folder=self.plot_path, station_dim=iter_dim,
                                           history_dim=window_dim)
         except Exception as e:
-            logging.error(f"Could not create plot PlotAvailabilityHistogram due to the following error: {e}")
+            logging.error(f"Could not create plot PlotAvailabilityHistogram due to the following error: {e}"
+                          f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
         try:
             if "PlotPeriodogram" in plot_list:
@@ -428,14 +572,29 @@ class PostProcessing(RunEnvironment):
                                 variables_dim=target_dim, sampling=self._sampling,
                                 use_multiprocessing=use_multiprocessing)
         except Exception as e:
-            logging.error(f"Could not create plot PlotPeriodogram due to the following error: {e}")
+            logging.error(f"Could not create plot PlotPeriodogram due to the following error: {e}"
+                          f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
         try:
             if "PlotDataHistogram" in plot_list:
+                upsampling = self.data_store.get_default("upsampling", scope="train", default=False)
                 gens = {"train": self.train_data, "val": self.val_data, "test": self.test_data}
-                PlotDataHistogram(gens, plot_folder=self.plot_path, time_dim=time_dim, variables_dim=target_dim)
+                PlotDataHistogram(gens, plot_folder=self.plot_path, time_dim=time_dim, variables_dim=target_dim,
+                                  upsampling=upsampling)
+        except Exception as e:
+            logging.error(f"Could not create plot PlotDataHistogram due to the following error: {e}"
+                          f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
+
+        try:
+            if "PlotSampleUncertaintyFromBootstrap" in plot_list and self.uncertainty_estimate is not None:
+                block_length = self.data_store.get_default("block_length", default="1m", scope="uncertainty_estimate")
+                PlotSampleUncertaintyFromBootstrap(
+                    data=self.uncertainty_estimate, plot_folder=self.plot_path, model_type_dim=self.model_type_dim,
+                    dim_name_boots=self.uncertainty_estimate_boot_dim, error_measure="mean squared error",
+                    error_unit=r"ppb$^2$", block_length=block_length)
         except Exception as e:
-            logging.error(f"Could not create plot PlotDataHistogram due to the following error: {e}")
+            logging.error(f"Could not create plot PlotSampleUncertaintyFromBootstrap due to the following error: {e}"
+                          f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
 
     def calculate_test_score(self):
         """Evaluate test score of model and save locally."""
@@ -496,10 +655,11 @@ class PostProcessing(RunEnvironment):
                 full_index = self.create_fullindex(observation_data.indexes[time_dimension], self._get_frequency())
                 prediction_dict = {self.forecast_indicator: nn_prediction,
                                    "persi": persistence_prediction,
-                                   "obs": observation,
+                                   self.observation_indicator: observation,
                                    "ols": ols_prediction}
                 all_predictions = self.create_forecast_arrays(full_index, list(target_data.indexes[window_dim]),
                                                               time_dimension, ahead_dim=self.ahead_dim,
+                                                              index_dim=self.index_dim, type_dim=self.model_type_dim,
                                                               **prediction_dict)
 
                 # save all forecasts locally
@@ -529,7 +689,7 @@ class PostProcessing(RunEnvironment):
         with xr.open_dataarray(file) as da:
             data = da.load()
         forecast = data.sel(type=[self.forecast_indicator])
-        forecast.coords["type"] = [competitor_name]
+        forecast.coords[self.model_type_dim] = [competitor_name]
         return forecast
 
     def _create_observation(self, data, _, transformation_func: Callable, normalised: bool) -> xr.DataArray:
@@ -659,7 +819,7 @@ class PostProcessing(RunEnvironment):
 
     @staticmethod
     def create_forecast_arrays(index: pd.DataFrame, ahead_names: List[Union[str, int]], time_dimension,
-                               ahead_dim="ahead", **kwargs):
+                               ahead_dim="ahead", index_dim="index", type_dim="type", **kwargs):
         """
         Combine different forecast types into single xarray.
 
@@ -672,7 +832,7 @@ class PostProcessing(RunEnvironment):
         """
         keys = list(kwargs.keys())
         res = xr.DataArray(np.full((len(index.index), len(ahead_names), len(keys)), np.nan),
-                           coords=[index.index, ahead_names, keys], dims=['index', ahead_dim, 'type'])
+                           coords=[index.index, ahead_names, keys], dims=[index_dim, ahead_dim, type_dim])
         for k, v in kwargs.items():
             intersection = set(res.index.values) & set(v.indexes[time_dimension].values)
             match_index = np.array(list(intersection))
@@ -711,18 +871,19 @@ class PostProcessing(RunEnvironment):
         except (IndexError, KeyError, FileNotFoundError):
             return None
 
-    @staticmethod
-    def _combine_forecasts(forecast, competitor, dim="type"):
+    def _combine_forecasts(self, forecast, competitor, dim=None):
         """
         Combine forecast and competitor if both are xarray. If competitor is None, this returns forecasts and vise
         versa.
         """
+        if dim is None:
+            dim = self.model_type_dim
         try:
             return xr.concat([forecast, competitor], dim=dim)
         except (TypeError, AttributeError):
             return forecast if competitor is None else competitor
 
-    def calculate_error_metrics(self) -> Tuple[Dict, Dict, Dict]:
+    def calculate_error_metrics(self) -> Tuple[Dict, Dict, Dict, Dict]:
         """
         Calculate error metrics and skill scores of NN forecast.
 
@@ -735,6 +896,7 @@ class PostProcessing(RunEnvironment):
         path = self.data_store.get("forecast_path")
         all_stations = self.data_store.get("stations")
         skill_score_competitive = {}
+        skill_score_competitive_count = {}
         skill_score_climatological = {}
         errors = {}
         for station in all_stations:
@@ -742,24 +904,61 @@ class PostProcessing(RunEnvironment):
 
             # test errors
             if external_data is not None:
-                errors[station] = statistics.calculate_error_metrics(*map(lambda x: external_data.sel(type=x),
-                                                                          [self.forecast_indicator, "obs"]),
-                                                                     dim="index")
-            # skill score
+                model_type_list = external_data.coords[self.model_type_dim].values.tolist()
+                for model_type in remove_items(model_type_list, self.observation_indicator):
+                    if model_type not in errors.keys():
+                        errors[model_type] = {}
+                    errors[model_type][station] = statistics.calculate_error_metrics(
+                        *map(lambda x: external_data.sel(**{self.model_type_dim: x}),
+                             [model_type, self.observation_indicator]), dim=self.index_dim)
+
+            # load competitors
             competitor = self.load_competitors(station)
-            combined = self._combine_forecasts(external_data, competitor, dim="type")
-            model_list = remove_items(list(combined.type.values), "obs") if combined is not None else None
+            combined = self._combine_forecasts(external_data, competitor, dim=self.model_type_dim)
+            if combined is not None:
+                model_list = remove_items(combined.coords[self.model_type_dim].values.tolist(),
+                                          self.observation_indicator)
+            else:
+                model_list = None
+
+            # test errors of competitors
+            for model_type in remove_items(model_list or [], list(errors.keys())):
+                if self.observation_indicator not in combined.coords[self.model_type_dim]:
+                    continue
+                if model_type not in errors.keys():
+                    errors[model_type] = {}
+                errors[model_type][station] = statistics.calculate_error_metrics(
+                    *map(lambda x: combined.sel(**{self.model_type_dim: x}),
+                         [model_type, self.observation_indicator]), dim=self.index_dim)
+
+            # skill score
             skill_score = statistics.SkillScores(combined, models=model_list, ahead_dim=self.ahead_dim)
             if external_data is not None:
-                skill_score_competitive[station] = skill_score.skill_scores()
+                skill_score_competitive[station], skill_score_competitive_count[station] = skill_score.skill_scores()
 
             internal_data = self._get_internal_data(station, path)
             if internal_data is not None:
                 skill_score_climatological[station] = skill_score.climatological_skill_scores(
                     internal_data, forecast_name=self.forecast_indicator)
 
-        errors.update({"total": self.calculate_average_errors(errors)})
-        return skill_score_competitive, skill_score_climatological, errors
+        for model_type in errors.keys():
+            errors[model_type].update({"total": self.calculate_average_errors(errors[model_type])})
+        skill_score_competitive.update({"total": self.calculate_average_skill_scores(skill_score_competitive,
+                                                                                     skill_score_competitive_count)})
+        return skill_score_competitive, skill_score_competitive_count, skill_score_climatological, errors
+
+    @staticmethod
+    def calculate_average_skill_scores(scores, counts):
+        avg_skill_score = 0
+        n_total = None
+        for vals in counts.values():
+            n_total = vals if n_total is None else n_total.add(vals, fill_value=0)
+        for station, station_scores in scores.items():
+            n = counts.get(station)
+            avg_skill_score = station_scores.mul(n.div(n_total, fill_value=0), fill_value=0).add(avg_skill_score,
+                                                                                                 fill_value=0)
+        return avg_skill_score
+
 
     @staticmethod
     def calculate_average_errors(errors):
@@ -772,28 +971,57 @@ class PostProcessing(RunEnvironment):
                 avg_error[error_metric] = new_val
         return avg_error
 
+    def report_feature_importance_results(self, results):
+        """Create a csv file containing all results from feature importance."""
+        report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
+        path_config.check_path_and_create(report_path)
+        res = []
+        for boot_type, d0 in results.items():
+            for boot_method, d1 in d0.items():
+                for station_name, vals in d1.items():
+                    for boot_var in vals.coords[self.boot_var_dim].values.tolist():
+                        for ahead in vals.coords[self.ahead_dim].values.tolist():
+                            res.append([boot_type, boot_method, station_name, boot_var, ahead,
+                                        *vals.sel({self.boot_var_dim: boot_var,
+                                                   self.ahead_dim: ahead}).values.round(5).tolist()])
+        col_names = [self.model_type_dim, "method", "station", self.boot_var_dim, self.ahead_dim,
+                     *list(range(len(res[0]) - 5))]
+        df = pd.DataFrame(res, columns=col_names)
+        file_name = "feature_importance_skill_score_report_raw.csv"
+        df.to_csv(os.path.join(report_path, file_name), sep=";")
+
     def report_error_metrics(self, errors):
         report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
         path_config.check_path_and_create(report_path)
-        metric_collection = {}
-        for station, station_errors in errors.items():
-            if isinstance(station_errors, xr.DataArray):
-                dim = station_errors.dims[0]
-                sel_index = [sel for sel in station_errors.coords[dim] if "CASE" in str(sel)]
-                station_errors = {str(i.values): station_errors.sel(**{dim: i}) for i in sel_index}
-            for metric, vals in station_errors.items():
-                if metric == "n":
-                    continue
-                pd_vals = pd.DataFrame.from_dict({station: vals}).T
-                pd_vals.columns = [f"{metric}(t+{x})" for x in vals.coords["ahead"].values]
-                mc = metric_collection.get(metric, pd.DataFrame())
-                mc = mc.append(pd_vals)
-                metric_collection[metric] = mc
-        for metric, error_df in metric_collection.items():
-            df = error_df.sort_index()
-            if "total" in df.index:
-                df.reindex(df.index.drop(["total"]).to_list() + ["total"], )
-            column_format = tables.create_column_format_for_tex(df)
-            file_name = f"error_report_{metric}.%s".replace(' ', '_')
-            tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df)
-            tables.save_to_md(report_path, file_name % "md", df=df)
+        for model_type in errors.keys():
+            metric_collection = {}
+            for station, station_errors in errors[model_type].items():
+                if isinstance(station_errors, xr.DataArray):
+                    dim = station_errors.dims[0]
+                    sel_index = [sel for sel in station_errors.coords[dim] if "CASE" in str(sel)]
+                    station_errors = {str(i.values): station_errors.sel(**{dim: i}) for i in sel_index}
+                elif isinstance(station_errors, pd.DataFrame):
+                    sel_index = station_errors.index.tolist()
+                    ahead = station_errors.columns.values
+                    station_errors = {k: xr.DataArray(station_errors[station_errors.index == k].values.flatten(),
+                                                      dims=["ahead"], coords={"ahead": ahead}).astype(float)
+                                      for k in sel_index}
+                for metric, vals in station_errors.items():
+                    if metric == "n":
+                        metric = "count"
+                    pd_vals = pd.DataFrame.from_dict({station: vals}).T
+                    pd_vals.columns = [f"{metric}(t+{x})" for x in vals.coords["ahead"].values]
+                    mc = metric_collection.get(metric, pd.DataFrame())
+                    mc = mc.append(pd_vals)
+                    metric_collection[metric] = mc
+            for metric, error_df in metric_collection.items():
+                df = error_df.sort_index()
+                if "total" in df.index:
+                    df.reindex(df.index.drop(["total"]).to_list() + ["total"], )
+                column_format = tables.create_column_format_for_tex(df)
+                if model_type == "skill_score":
+                    file_name = f"error_report_{model_type}_{metric}.%s".replace(' ', '_')
+                else:
+                    file_name = f"error_report_{metric}_{model_type}.%s".replace(' ', '_')
+                tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df)
+                tables.save_to_md(report_path, file_name % "md", df=df)
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 08bff85c9c1fe06111ddb47e7a3952404e05c0ac..873919fa93af3e4a43c3b16c382d9746ec26a573 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -10,6 +10,8 @@ from typing import Tuple
 import multiprocessing
 import requests
 import psutil
+import random
+import dill
 
 import pandas as pd
 
@@ -242,6 +244,7 @@ class PreProcessing(RunEnvironment):
         valid_stations = []
         kwargs = self.data_store.create_args_dict(data_handler.requirements(), scope=set_name)
         use_multiprocessing = self.data_store.get("use_multiprocessing")
+        tmp_path = self.data_store.get("tmp_path")
 
         max_process = self.data_store.get("max_number_multiprocessing")
         n_process = min([psutil.cpu_count(logical=False), len(set_stations), max_process])  # use only physical cpus
@@ -249,18 +252,23 @@ class PreProcessing(RunEnvironment):
             logging.info("use parallel validate station approach")
             pool = multiprocessing.Pool(n_process)
             logging.info(f"running {getattr(pool, '_processes')} processes in parallel")
+            kwargs.update({"tmp_path": tmp_path, "return_strategy": "reference"})
             output = [
                 pool.apply_async(f_proc, args=(data_handler, station, set_name, store_processed_data), kwds=kwargs)
                 for station in set_stations]
             for i, p in enumerate(output):
-                dh, s = p.get()
+                _res_file, s = p.get()
                 logging.info(f"...finished: {s} ({int((i + 1.) / len(output) * 100)}%)")
+                with open(_res_file, "rb") as f:
+                    dh = dill.load(f)
+                os.remove(_res_file)
                 if dh is not None:
                     collection.add(dh)
                     valid_stations.append(s)
             pool.close()
         else:  # serial solution
             logging.info("use serial validate station approach")
+            kwargs.update({"return_strategy": "result"})
             for station in set_stations:
                 dh, s = f_proc(data_handler, station, set_name, store_processed_data, **kwargs)
                 if dh is not None:
@@ -268,7 +276,7 @@ class PreProcessing(RunEnvironment):
                     valid_stations.append(s)
 
         logging.info(f"run for {t_outer} to check {len(set_stations)} station(s). Found {len(collection)}/"
-                     f"{len(set_stations)} valid stations.")
+                     f"{len(set_stations)} valid stations ({set_name}).")
         if set_name == "train":
             self.store_data_handler_attributes(data_handler, collection)
         return collection, valid_stations
@@ -288,7 +296,8 @@ class PreProcessing(RunEnvironment):
     def transformation(self, data_handler: AbstractDataHandler, stations):
         if hasattr(data_handler, "transformation"):
             kwargs = self.data_store.create_args_dict(data_handler.requirements(), scope="train")
-            transformation_dict = data_handler.transformation(stations, **kwargs)
+            tmp_path = self.data_store.get_default("tmp_path", default=None)
+            transformation_dict = data_handler.transformation(stations, tmp_path=tmp_path, **kwargs)
             if transformation_dict is not None:
                 self.data_store.set("transformation", transformation_dict)
 
@@ -312,12 +321,13 @@ class PreProcessing(RunEnvironment):
             logging.info("No preparation required because no competitor was provided to the workflow.")
 
 
-def f_proc(data_handler, station, name_affix, store, **kwargs):
+def f_proc(data_handler, station, name_affix, store, return_strategy="", tmp_path=None, **kwargs):
     """
     Try to create a data handler for given arguments. If build fails, this station does not fulfil all requirements and
-    therefore f_proc will return None as indication. On a successfull build, f_proc returns the built data handler and
+    therefore f_proc will return None as indication. On a successful build, f_proc returns the built data handler and
     the station that was used. This function must be implemented globally to work together with multiprocessing.
     """
+    assert return_strategy in ["result", "reference"]
     try:
         res = data_handler.build(station, name_affix=name_affix, store_processed_data=store, **kwargs)
     except (AttributeError, EmptyQueryResult, KeyError, requests.ConnectionError, ValueError, IndexError) as e:
@@ -326,7 +336,15 @@ def f_proc(data_handler, station, name_affix, store, **kwargs):
             f"remove station {station} because it raised an error: {e} -> {' | '.join(f_inspect_error(formatted_lines))}")
         logging.debug(f"detailed information for removal of station {station}: {traceback.format_exc()}")
         res = None
-    return res, station
+    if return_strategy == "result":
+        return res, station
+    else:
+        if tmp_path is None:
+            tmp_path = os.getcwd()
+        _tmp_file = os.path.join(tmp_path, f"{station}_{'%032x' % random.getrandbits(128)}.pickle")
+        with open(_tmp_file, "wb") as f:
+            dill.dump(res, f, protocol=4)
+        return _tmp_file, station
 
 
 def f_inspect_error(formatted):
diff --git a/mlair/workflows/default_workflow.py b/mlair/workflows/default_workflow.py
index 5894555a6af52299efcd8d88d76c0d3791a1599e..961979cb774e928bda96d4cd1a3a7b0f8565e968 100644
--- a/mlair/workflows/default_workflow.py
+++ b/mlair/workflows/default_workflow.py
@@ -31,7 +31,6 @@ class DefaultWorkflow(Workflow):
                  permute_data_on_training=None, extreme_values=None, extremes_on_right_tail_only=None,
                  transformation=None,
                  train_min_length=None, val_min_length=None, test_min_length=None,
-                 evaluate_bootstraps=None, number_of_bootstraps=None, create_new_bootstraps=None,
                  plot_list=None,
                  model=None,
                  batch_size=None,
diff --git a/run.py b/run.py
index eb703e11cf9a3028dda0368ac3f7b7ca1578bd2a..11cc01257fdf4535845a2cfedb065dd27942ef66 100644
--- a/run.py
+++ b/run.py
@@ -25,7 +25,7 @@ def main(parser_args):
         # stations=["DEBW087","DEBW013", "DEBW107",  "DEBW076"],
         stations=["DEBW013", "DEBW087", "DEBW107", "DEBW076"],
         train_model=False, create_new_model=True, network="UBA",
-        evaluate_bootstraps=False,  # plot_list=["PlotCompetitiveSkillScore"],
+        evaluate_feature_importance=False,  # plot_list=["PlotCompetitiveSkillScore"],
         competitors=["test_model", "test_model2"],
         competitor_path=os.path.join(os.getcwd(), "data", "comp_test"),
         **parser_args.__dict__, start_script=__file__)
diff --git a/run_climate_filter.py b/run_climate_filter.py
new file mode 100755
index 0000000000000000000000000000000000000000..7f3fcbaaba94506faeadd884073620496155f8ea
--- /dev/null
+++ b/run_climate_filter.py
@@ -0,0 +1,91 @@
+__author__ = "Lukas Leufen"
+__date__ = '2019-11-14'
+
+import argparse
+
+from mlair.workflows import DefaultWorkflow
+from mlair.data_handler.data_handler_mixed_sampling import DataHandlerMixedSamplingWithClimateFirFilter
+from mlair.model_modules.fully_connected_networks import BranchedInputFCN as NN
+from mlair.configuration.defaults import DEFAULT_PLOT_LIST, DEFAULT_TRAIN_END
+
+
+STATS = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values',
+         'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values',
+         'pblheight': 'maximum'}
+
+DATA_ORIGIN = {"no": "", "no2": "", "o3": "",
+               "cloudcover": "REA", "pblheight": "REA", "relhum": "REA",
+               "temp": "REA", "u": "REA", "v": "REA"}
+
+
+def main(parser_args):
+    # plots = remove_items(DEFAULT_PLOT_LIST, "PlotConditionalQuantiles")
+    args = dict(
+        sampling=("hourly", "daily"),  # T1A, T1D
+        stations=["DEBW107", "DEBW013"],  # [:5],  # T1B, TODO: remove indexing for meaningful experiments
+        network="UBA",  # T1B
+        variables=["o3", "no", "no2", "cloudcover", "pblheight", "relhum", "temp", "u", "v"],  # T1B
+        statistics_per_var=STATS,  # T1A, T1D, T1F
+
+        data_origin=DATA_ORIGIN,
+        #         data_handler=DataHandlerMixedSampling,
+        #        window_history_offset=16,
+        #         window_history_size=6 * 24 + 16,  # T1D
+        data_handler=DataHandlerMixedSamplingWithClimateFirFilter,
+        filter_cutoff_period=[21],
+        filter_order=[42],
+        filter_window_type=("kaiser", 5),
+        filter_add_unfiltered=True,
+        apriori_sel_opts=slice(DEFAULT_TRAIN_END),
+        apriori_type="residuum_stats",
+        apriori_diurnal=True,
+        use_filter_branches=True,
+
+        window_history_size=2 * 24 + 16,
+        window_history_offset=16,  # T1F
+        window_lead_time=4,  # T1D
+        target_var="o3",  # T1D
+        interpolation_limit=(24, 2),  # T1F
+        transformation={
+            "o3": {"method": "log"},
+            "no": {"method": "log"},
+            "no2": {"method": "log"},
+            "cloudcover": {"method": "min_max", "feature_range": [-1, 1]},
+            "pblheight": {"method": "log"},
+            "relhum": {"method": "min_max", "feature_range": [-1, 1]},
+            "temp": {"method": "standardise"},
+            "u": {"method": "standardise"},
+            "v": {"method": "standardise"}, },  # T1F, also apply same target transformation
+
+        start="2006-01-01",
+        train_start="2006-01-01",
+        end="2011-12-31",
+        test_end="2011-12-31",
+
+        train_model=False, create_new_model=True,
+        epochs=1,
+        model=NN,
+        evaluate_feature_importance=False,
+        feature_importance_bootstrap_type=["singleinput", "branch", "variable"],
+        feature_importance_bootstrap_method=["shuffle", "zero_mean"],
+        plot_list=DEFAULT_PLOT_LIST,
+        # competitors=["IntelliO3-ts-v1", "MFCN_64_32_16_climFIR", "FCN_1449_512_32_4"],  # "FCN_1449_16_8_4",],
+        lazy_preprocessing=True,
+        use_multiprocessing=True,
+        use_multiprocessing_on_debug=False,
+        overwrite_local_data=False,
+        overwrite_lazy_data=False,
+        max_number_multiprocessing=2,
+        **parser_args.__dict__
+    )
+    print(parser_args.__dict__)
+    workflow = DefaultWorkflow(**args, start_script=__file__)
+    workflow.run()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default=None,
+                        help="set experiment date as string")
+    args = parser.parse_args(["--experiment_date", "testrun"])
+    main(args)
diff --git a/run_mixed_sampling.py b/run_mixed_sampling.py
index 819ef51129854b4539632ef91a55e33a2607eb55..784f653fbfb2eb4c78e6e858acf67cd0ae47a593 100644
--- a/run_mixed_sampling.py
+++ b/run_mixed_sampling.py
@@ -20,7 +20,7 @@ data_origin = {'o3': '', 'no': '', 'no2': '',
 def main(parser_args):
     args = dict(stations=["DEBW107", "DEBW013"],
                 network="UBA",
-                evaluate_bootstraps=False, plot_list=[],
+                evaluate_feature_importance=False, plot_list=[],
                 data_origin=data_origin, data_handler=DataHandlerMixedSampling,
                 interpolation_limit=(3, 1), overwrite_local_data=False,
                 sampling=("hourly", "daily"),
diff --git a/test/test_configuration/test_defaults.py b/test/test_configuration/test_defaults.py
index b6bdd9556f73ff711003b01c3a2b65a1c20c66d3..f6bc6d24724c2620083602d3864bcbca0a709681 100644
--- a/test/test_configuration/test_defaults.py
+++ b/test/test_configuration/test_defaults.py
@@ -62,10 +62,16 @@ class TestAllDefaults:
         assert DEFAULT_HPC_LOGIN_LIST == ["ju", "hdfmll"]
 
     def test_postprocessing_parameters(self):
-        assert DEFAULT_EVALUATE_BOOTSTRAPS is True
-        assert DEFAULT_CREATE_NEW_BOOTSTRAPS is False
-        assert DEFAULT_NUMBER_OF_BOOTSTRAPS == 20
+        assert DEFAULT_DO_UNCERTAINTY_ESTIMATE is True
+        assert DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH == "1m"
+        assert DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS is True
+        assert DEFAULT_UNCERTAINTY_ESTIMATE_N_BOOTS == 1000
+        assert DEFAULT_EVALUATE_FEATURE_IMPORTANCE is True
+        assert DEFAULT_FEATURE_IMPORTANCE_CREATE_NEW_BOOTSTRAPS is False
+        assert DEFAULT_FEATURE_IMPORTANCE_N_BOOTS == 20
+        assert DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE == "singleinput"
+        assert DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_METHOD == "shuffle"
         assert DEFAULT_PLOT_LIST == ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore",
-                                     "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore",
+                                     "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotFeatureImportanceSkillScore",
                                      "PlotConditionalQuantiles", "PlotAvailability", "PlotAvailabilityHistogram",
-                                     "PlotDataHistogram", "PlotPeriodogram"]
+                                     "PlotDataHistogram", "PlotPeriodogram", "PlotSampleUncertaintyFromBootstrap"]
diff --git a/test/test_data_handler/old_t_bootstraps.py b/test/test_data_handler/old_t_bootstraps.py
index 21c18c6c2d6f6a6a38a41250f00d3d14a29ed457..e21af9f614d4cfaaadf946cdab72cc69cd5b19a7 100644
--- a/test/test_data_handler/old_t_bootstraps.py
+++ b/test/test_data_handler/old_t_bootstraps.py
@@ -7,7 +7,7 @@ import numpy as np
 import pytest
 import xarray as xr
 
-from mlair.data_handler.bootstraps import BootStraps
+from mlair.data_handler.input_bootstraps import Bootstraps
 from src.data_handler import DataPrepJoin
 
 
@@ -171,22 +171,22 @@ class TestBootStraps:
 
     @pytest.fixture
     def bootstrap(self, orig_generator, data_path):
-        return BootStraps(orig_generator, data_path, 20)
+        return Bootstraps(orig_generator, data_path, 20)
 
     @pytest.fixture
     @mock.patch("mlair.data_handling.bootstraps.CreateShuffledData", return_value=None)
     def bootstrap_no_shuffling(self, mock_create_shuffle_data, orig_generator, data_path):
         shutil.rmtree(data_path)
-        return BootStraps(orig_generator, data_path, 20)
+        return Bootstraps(orig_generator, data_path, 20)
 
     def test_init_no_shuffling(self, bootstrap_no_shuffling, data_path):
-        assert isinstance(bootstrap_no_shuffling, BootStraps)
+        assert isinstance(bootstrap_no_shuffling, Bootstraps)
         assert bootstrap_no_shuffling.number_of_bootstraps == 20
         assert bootstrap_no_shuffling.bootstrap_path == data_path
 
     def test_init_with_shuffling(self, orig_generator, data_path, caplog):
         caplog.set_level(logging.INFO)
-        BootStraps(orig_generator, data_path, 20)
+        Bootstraps(orig_generator, data_path, 20)
         assert caplog.record_tuples[0] == ('root', logging.INFO, "create / check shuffled bootstrap data")
 
     def test_stations(self, bootstrap_no_shuffling, orig_generator):
@@ -213,9 +213,9 @@ class TestBootStraps:
 
     @mock.patch("mlair.data_handling.data_generator.DataGenerator._load_pickle_data", side_effect=FileNotFoundError)
     def test_get_generator_different_generator(self, mock_load_pickle, data_path, orig_generator):
-        BootStraps(orig_generator, data_path, 20)  # to create
+        Bootstraps(orig_generator, data_path, 20)  # to create
         orig_generator.window_history_size = 4
-        bootstrap = BootStraps(orig_generator, data_path, 20)
+        bootstrap = Bootstraps(orig_generator, data_path, 20)
         station = bootstrap.stations[0]
         var = bootstrap.variables[0]
         var_others = bootstrap.variables[1:]
diff --git a/test/test_helpers/test_statistics.py b/test/test_helpers/test_statistics.py
index 2a77f0806b886bee1a3961ff0a972e8f0ee62873..f5148cdc293939d5711afb57c2fa009c47b6c86d 100644
--- a/test/test_helpers/test_statistics.py
+++ b/test/test_helpers/test_statistics.py
@@ -4,7 +4,8 @@ import pytest
 import xarray as xr
 
 from mlair.helpers.statistics import standardise, standardise_inverse, standardise_apply, centre, centre_inverse, \
-    centre_apply, apply_inverse_transformation, min_max, min_max_inverse, min_max_apply, log, log_inverse, log_apply
+    centre_apply, apply_inverse_transformation, min_max, min_max_inverse, min_max_apply, log, log_inverse, log_apply, \
+    create_single_bootstrap_realization, calculate_average, create_n_bootstrap_realizations
 
 lazy = pytest.lazy_fixture
 
@@ -221,3 +222,36 @@ class TestLog:
         data_ref, opts = log(data_orig, dim)
         data_test = log_apply(data_orig, opts["mean"], opts["std"])
         assert np.testing.assert_array_almost_equal(data_ref, data_test) is None
+
+
+class TestCreateBootstrapRealizations:
+
+    @pytest.fixture
+    def data(self):
+        return xr.DataArray(np.array(range(20)).reshape(2 , -1).T,
+                            dims={'time': range(10), 'model': ['m1', 'm2']},
+                            coords={'time': range(10), 'model': ['m1', 'm2']})
+
+    def test_create_single_bootstrap_realization(self, data):
+        np.random.seed(42)
+        proc_data = create_single_bootstrap_realization(data, "time")
+        assert isinstance(proc_data, xr.DataArray)
+        assert (proc_data.coords['time'].values == np.array([6, 3, 7, 4, 6, 9, 2, 6, 7, 4])).all()
+        # check if all time index values of proc_data are from data
+        assert np.in1d(proc_data.indexes['time'].values, data.indexes['time'].values).all()
+
+    def test_calculate_average(self, data):
+        assert isinstance(data, xr.DataArray)
+        assert calculate_average(data) == data.mean()
+        assert (calculate_average(data, axis=0) == data.mean(axis=0)).all()
+
+    def test_create_n_bootstrap_realizations(self, data):
+        boot_data = create_n_bootstrap_realizations(data, dim_name_time='time', dim_name_model='model',
+                                                    n_boots=1000, dim_name_boots='boots')
+        assert isinstance(boot_data, xr.DataArray)
+        assert boot_data.shape == (1000, 2)
+
+        boot_data = create_n_bootstrap_realizations(data.sel(model='m1').squeeze(), dim_name_time='time',
+                                                    dim_name_model='model', n_boots=1000, dim_name_boots='boots')
+        assert isinstance(boot_data, xr.DataArray)
+        assert boot_data.shape == (1000,)