diff --git a/German_background_stations.json b/German_background_stations.json
new file mode 100755
index 0000000000000000000000000000000000000000..2997eefbaa9a72f4e94b940b6d0ebb7f6a34370d
--- /dev/null
+++ b/German_background_stations.json
@@ -0,0 +1 @@
+["DENW094", "DEBW029", "DENI052", "DENI063", "DEBY109", "DEUB022", "DESN001", "DEUB013", "DETH016", "DEBY002", "DEBY005", "DEBY099", "DEUB038", "DEBE051", "DEBE056", "DEBE062", "DEBE032", "DEBE034", "DEBE010", "DEHE046", "DEST031", "DEBY122", "DERP022", "DEBY079", "DEBW102", "DEBW076", "DEBW045", "DESH016", "DESN004", "DEHE032", "DEBB050", "DEBW042", "DEBW046", "DENW067", "DESL019", "DEST014", "DENW062", "DEHE033", "DENW081", "DESH008", "DEBB055", "DENI011", "DEHB001", "DEHB004", "DEHB002", "DEHB003", "DEHB005", "DEST039", "DEUB003", "DEBW072", "DEST002", "DEBB001", "DEHE039", "DEBW035", "DESN005", "DEBW047", "DENW004", "DESN011", "DESN076", "DEBB064", "DEBB006", "DEHE001", "DESN012", "DEST030", "DESL003", "DEST104", "DENW050", "DENW008", "DETH026", "DESN085", "DESN014", "DESN092", "DENW071", "DEBW004", "DENI028", "DETH013", "DENI059", "DEBB007", "DEBW049", "DENI043", "DETH020", "DEBY017", "DEBY113", "DENW247", "DENW028", "DEBW025", "DEUB039", "DEBB009", "DEHE027", "DEBB042", "DEHE008", "DESN017", "DEBW084", "DEBW037", "DEHE058", "DEHE028", "DEBW112", "DEBY081", "DEBY082", "DEST032", "DETH009", "DEHE010", "DESN019", "DEHE023", "DETH036", "DETH040", "DEMV017", "DEBW028", "DENI042", "DEMV004", "DEMV019", "DEST044", "DEST050", "DEST072", "DEST022", "DEHH049", "DEHH047", "DEHH033", "DEHH050", "DEHH008", "DEHH021", "DENI054", "DEST070", "DEBB053", "DENW029", "DEBW050", "DEUB034", "DENW018", "DEST052", "DEBY020", "DENW063", "DESN050", "DETH061", "DERP014", "DETH024", "DEBW094", "DENI031", "DETH041", "DERP019", "DEBW081", "DEHE013", "DEBW021", "DEHE060", "DEBY031", "DESH021", "DESH033", "DEHE052", "DEBY004", "DESN024", "DEBW052", "DENW042", "DEBY032", "DENW053", "DENW059", "DEBB082", "DEBB031", "DEHE025", "DEBW053", "DEHE048", "DENW051", "DEBY034", "DEUB035", "DEUB032", "DESN028", "DESN059", "DEMV024", "DENW079", "DEHE044", "DEHE042", "DEBB043", "DEBB036", "DEBW024", "DERP001", "DEMV012", "DESH005", "DESH023", "DEUB031", "DENI062", "DENW006", "DEBB065", "DEST077", "DEST005", "DERP007", "DEBW006", "DEBW007", "DEHE030", "DENW015", "DEBY013", "DETH025", "DEUB033", "DEST025", "DEHE045", "DESN057", "DENW036", "DEBW044", "DEUB036", "DENW096", "DETH095", "DENW038", "DEBY089", "DEBY039", "DENW095", "DEBY047", "DEBB067", "DEBB040", "DEST078", "DENW065", "DENW066", "DEBY052", "DEUB030", "DETH027", "DEBB048", "DENW047", "DEBY049", "DERP021", "DEHE034", "DESN079", "DESL008", "DETH018", "DEBW103", "DEHE017", "DEBW111", "DENI016", "DENI038", "DENI058", "DENI029", "DEBY118", "DEBW032", "DEBW110", "DERP017", "DESN036", "DEBW026", "DETH042", "DEBB075", "DEBB052", "DEBB021", "DEBB038", "DESN051", "DEUB041", "DEBW020", "DEBW113", "DENW078", "DEHE018", "DEBW065", "DEBY062", "DEBW027", "DEBW041", "DEHE043", "DEMV007", "DEMV021", "DEBW054", "DETH005", "DESL012", "DESL011", "DEST069", "DEST071", "DEUB004", "DESH006", "DEUB029", "DEUB040", "DESN074", "DEBW031", "DENW013", "DENW179", "DEBW056", "DEBW087", "DEST061", "DEMV001", "DEBB024", "DEBW057", "DENW064", "DENW068", "DENW080", "DENI019", "DENI077", "DEHE026", "DEBB066", "DEBB083", "DEST063", "DEBW013", "DETH086", "DESL018", "DETH096", "DEBW059", "DEBY072", "DEBY088", "DEBW060", "DEBW107", "DEBW036", "DEUB026", "DEBW019", "DENW010", "DEST098", "DEHE019", "DEBW039", "DESL017", "DEBW034", "DEUB005", "DEBB051", "DEHE051", "DEBW023", "DEBY092", "DEBW008", "DEBW030", "DENI060", "DEST011", "DENW030", "DENI041", "DERP015", "DEUB001", "DERP016", "DERP028", "DERP013", "DEHE022", "DEUB021", "DEBW010", "DEST066", "DEBB063", "DEBB028", "DEHE024", "DENI020", "DENI051", "DERP025", "DEBY077", "DEMV018", "DEST089", "DEST028", "DETH060", "DEHE050", "DEUB028", "DESN045", "DEUB042"]
diff --git a/requirements.txt b/requirements.txt
index 9cd9ea44c3cd0068c985c52b07a7cfaa746d9b7c..e7c2f439966f6b085348af3078c814c7f0511024 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -43,6 +43,7 @@ pytest-cov==2.8.1
 pytest-html==2.0.1
 pytest-lazy-fixture==0.6.3
 pytest-metadata==1.8.0
+pytest-sugar
 python-dateutil==2.8.1
 pytz==2019.3
 PyYAML==5.3
diff --git a/requirements_gpu.txt b/requirements_gpu.txt
index 8e5a31e476e47b17d3f271199bbc151fc0dc0b50..9d1c2d62da0864d2626c7ada1aac4dcf6f633630 100644
--- a/requirements_gpu.txt
+++ b/requirements_gpu.txt
@@ -43,6 +43,7 @@ pytest-cov==2.8.1
 pytest-html==2.0.1
 pytest-lazy-fixture==0.6.3
 pytest-metadata==1.8.0
+pytest-sugar
 python-dateutil==2.8.1
 pytz==2019.3
 PyYAML==5.3
diff --git a/run.py b/run.py
index eb7d1e35f6c5449c2566a9cba83231be9e2a4478..9809712876dc886007b042a52d7b46c027800faf 100644
--- a/run.py
+++ b/run.py
@@ -16,7 +16,8 @@ def main(parser_args):
 
     with RunEnvironment():
         ExperimentSetup(parser_args, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'],
-                        station_type='background', trainable=False, create_new_model=False)
+                        station_type='background', trainable=False, create_new_model=False, window_history_size=6,
+                        create_new_bootstraps=True)
         PreProcessing()
 
         ModelSetup()
diff --git a/run_zam347.py b/run_zam347.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e140f48188a6df7207e04d048f38d9701c69d4b
--- /dev/null
+++ b/run_zam347.py
@@ -0,0 +1,55 @@
+__author__ = "Lukas Leufen"
+__date__ = '2019-11-14'
+
+
+import argparse
+import json
+import logging
+
+from src.run_modules.experiment_setup import ExperimentSetup
+from src.run_modules.model_setup import ModelSetup
+from src.run_modules.post_processing import PostProcessing
+from src.run_modules.pre_processing import PreProcessing
+from src.run_modules.run_environment import RunEnvironment
+from src.run_modules.training import Training
+
+
+def load_stations():
+
+    try:
+        filename = 'German_background_stations.json'
+        with open(filename, 'r') as jfile:
+            stations = json.load(jfile)
+            logging.info(f"load station IDs from file: {filename} ({len(stations)} stations)")
+            # stations.remove('DEUB042')
+    except FileNotFoundError:
+        stations = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001']
+        # stations = ['DEBB050', 'DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001']
+        logging.info(f"Use stations from list: {stations}")
+
+    return stations
+
+
+def main(parser_args):
+
+    with RunEnvironment():
+
+        ExperimentSetup(parser_args, stations=load_stations(), station_type='background', trainable=False,
+                        create_new_model=True)
+        PreProcessing()
+
+        ModelSetup()
+
+        Training()
+
+        PostProcessing()
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default=None,
+                        help="set experiment date as string")
+    args = parser.parse_args(["--experiment_date", "testrun"])
+
+    main(args)
diff --git a/src/data_handling/bootstraps.py b/src/data_handling/bootstraps.py
index 3e69950267f9c95ccc636e560a21731ade388432..46fa7c2be39d3dadb1922a1b710065aa42d9e2d2 100644
--- a/src/data_handling/bootstraps.py
+++ b/src/data_handling/bootstraps.py
@@ -2,152 +2,108 @@ __author__ = 'Felix Kleinert, Lukas Leufen'
 __date__ = '2020-02-07'
 
 
-from src.run_modules.run_environment import RunEnvironment
 from src.data_handling.data_generator import DataGenerator
 import numpy as np
 import logging
+import keras
 import dask.array as da
 import xarray as xr
 import os
 import re
 from src import helpers
+from typing import List, Union, Pattern, Tuple
 
 
-class BootStrapGenerator:
+class BootStrapGenerator(keras.utils.Sequence):
+    """
+    generator for bootstraps as keras sequence inheritance. Initialise with number of boots, the original history, the
+    shuffled data, all used variables and the current shuffled variable. While iterating over this generator, it returns
+    the bootstrapped history for given boot index (this is the iterator index) in the same format like the original
+    history ready to use. Note, that in some cases some samples can contain nan values (in these cases the entire data
+    row is null, not only single entries).
+    """
+    def __init__(self, number_of_boots: int, history: xr.DataArray, shuffled: xr.DataArray, variables: List[str],
+                 shuffled_variable: str):
+        self.number_of_boots = number_of_boots
+        self.variables = variables
+        self.history_orig = history
+        self.history = history.sel(variables=helpers.list_pop(self.variables, shuffled_variable))
+        self.shuffled = shuffled.sel(variables=shuffled_variable)
 
-    def __init__(self, orig_generator, boots, chunksize, bootstrap_path):
-        self.orig_generator: DataGenerator = orig_generator
-        self.stations = self.orig_generator.stations
-        self.variables = self.orig_generator.variables
-        self.boots = boots
-        self.chunksize = chunksize
-        self.bootstrap_path = bootstrap_path
-        self._iterator = 0
-        self.bootstrap_meta = []
-
-    def __len__(self):
-        """
-        display the number of stations
-        """
-        return len(self.orig_generator)*self.boots*len(self.variables)
-
-    def get_labels(self, key):
-        _, label = self.orig_generator[key]
-        for _ in range(self.boots):
-            yield label
-
-    def get_generator(self):
-        """
-        This is the implementation of the __next__ method of the iterator protocol. Get the data generator, and return
-        the history and label data of this generator.
-        :return:
-        """
-        while True:
-            for i, data in enumerate(self.orig_generator):
-                station = self.orig_generator.get_station_key(i)
-                logging.info(f"station: {station}")
-                hist, label = data
-                len_of_label = len(label)
-                shuffled_data = self.load_boot_data(station)
-                for var in self.variables:
-                    logging.info(f"  var: {var}")
-                    for boot in range(self.boots):
-                        logging.debug(f"boot: {boot}")
-                        boot_hist = hist.sel(variables=helpers.list_pop(self.variables, var))
-                        shuffled_var = shuffled_data.sel(variables=var, boots=boot).expand_dims("variables").drop("boots").transpose("datetime", "window", "Stations", "variables")
-                        boot_hist = boot_hist.combine_first(shuffled_var)
-                        boot_hist = boot_hist.sortby("variables")
-                        self.bootstrap_meta.extend([[var, station]]*len_of_label)
-                        yield boot_hist, label
-            return
-
-    def get_orig_prediction(self, path, file_name, prediction_name="CNN"):
-        file = os.path.join(path, file_name)
-        data = xr.open_dataarray(file)
-        for _ in range(self.boots):
-            yield data.sel(type=prediction_name).squeeze()
-
-    def load_boot_data(self, station):
-        files = os.listdir(self.bootstrap_path)
-        regex = re.compile(rf"{station}_\w*\.nc")
-        file_name = os.path.join(self.bootstrap_path, list(filter(regex.search, files))[0])
-        shuffled_data = xr.open_dataarray(file_name, chunks=100)
-        return shuffled_data
+    def __len__(self) -> int:
+        return self.number_of_boots
 
+    def __getitem__(self, index: int) -> xr.DataArray:
+        """
+        return bootstrapped history for given bootstrap index in same index structure like the original history object
+        :param index: boot index e [0, nboots-1]
+        :return: bootstrapped history ready to use
+        """
+        logging.debug(f"boot: {index}")
+        boot_hist = self.history.copy()
+        boot_hist = boot_hist.combine_first(self.__get_shuffled(index))
+        return boot_hist.reindex_like(self.history_orig)
 
-class BootStraps(RunEnvironment):
+    def __get_shuffled(self, index: int) -> xr.DataArray:
+        """
+        returns shuffled data for given boot index from shuffled attribute
+        :param index: boot index e [0, nboots-1]
+        :return: shuffled data
+        """
+        shuffled_var = self.shuffled.sel(boots=index).expand_dims("variables").drop("boots")
+        return shuffled_var.transpose("datetime", "window", "Stations", "variables")
 
-    def __init__(self, data, bootstrap_path, number_bootstraps=10):
 
-        super().__init__()
-        self.data: DataGenerator = data
-        self.number_bootstraps = number_bootstraps
+class CreateShuffledData:
+    """
+    Verify and create shuffled data for all data contained in given data generator class. Starts automatically on
+    initialisation, no further calls are required. Check and new creations are all performed inside bootstrap_path.
+    """
+    def __init__(self, data: DataGenerator, number_of_bootstraps: int, bootstrap_path: str):
+        self.data = data
+        self.number_of_bootstraps = number_of_bootstraps
         self.bootstrap_path = bootstrap_path
-        self.chunks = self.get_chunk_size()
         self.create_shuffled_data()
-        self._boot_strap_generator = BootStrapGenerator(self.data, self.number_bootstraps, self.chunks, self.bootstrap_path)
-
-    def get_boot_strap_meta(self):
-        return self._boot_strap_generator.bootstrap_meta
-
-    def boot_strap_generator(self):
-        return self._boot_strap_generator.get_generator()
-
-    def get_boot_strap_generator_length(self):
-        return self._boot_strap_generator.__len__()
-
-    def get_labels(self, key):
-        labels_list = []
-        chunks = None
-        for labels in self._boot_strap_generator.get_labels(key):
-            if len(labels_list) == 0:
-                chunks = (100, labels.data.shape[1])
-            labels_list.append(da.from_array(labels.data, chunks=chunks))
-        labels_out = da.concatenate(labels_list, axis=0)
-        return labels_out.compute()
-
-    def get_orig_prediction(self, path, name):
-        labels_list = []
-        chunks = None
-        for labels in self._boot_strap_generator.get_orig_prediction(path, name):
-            if len(labels_list) == 0:
-                chunks = (100, labels.data.shape[1])
-            labels_list.append(da.from_array(labels.data, chunks=chunks))
-        labels_out = da.concatenate(labels_list, axis=0)
-        labels_out = labels_out.compute()
-        return labels_out[~np.isnan(labels_out).any(axis=1), :]
-
-    def get_chunk_size(self):
-        hist, _ = self.data[0]
-        return (100, *hist.shape[1:], self.number_bootstraps)
-
-    def create_shuffled_data(self):
+
+    def create_shuffled_data(self) -> None:
         """
         Create shuffled data. Use original test data, add dimension 'boots' with length number of bootstraps and insert
         randomly selected variables. If there is a suitable local file for requested window size and number of
         bootstraps, no additional file will be created inside this function.
         """
-        logging.info("create shuffled bootstrap data")
+        logging.info("create / check shuffled bootstrap data")
         variables_str = '_'.join(sorted(self.data.variables))
         window = self.data.window_history_size
         for station in self.data.stations:
             valid, nboot = self.valid_bootstrap_file(station, variables_str, window)
             if not valid:
                 logging.info(f'create bootstap data for {station}')
-                hist, _ = self.data[station]
-                data = hist.copy()
-                file_name = f"{station}_{variables_str}_hist{window}_nboots{nboot}_shuffled.nc"
-                file_path = os.path.join(self.bootstrap_path, file_name)
-                data = data.expand_dims({'boots': range(nboot)}, axis=-1)
+                hist = self.data.get_data_generator(station).get_transposed_history()
+                file_path = self._set_file_path(station, variables_str, window, nboot)
+                hist = hist.expand_dims({'boots': range(nboot)}, axis=-1)
                 shuffled_variable = []
-                for i, var in enumerate(data.coords['variables']):
-                    single_variable = data.sel(variables=var).values
-                    shuffled_variable.append(self.shuffle_single_variable(single_variable, chunks=(100, *data.shape[1:3], data.shape[-1])))
-                shuffled_variable_da = da.stack(shuffled_variable, axis=-2, ).rechunk("auto")
-                shuffled_data = xr.DataArray(shuffled_variable_da, coords=data.coords, dims=data.dims)
+                chunks = (100, *hist.shape[1:3], hist.shape[-1])
+                for i, var in enumerate(hist.coords['variables']):
+                    single_variable = hist.sel(variables=var).values
+                    shuffled_variable.append(self.shuffle(single_variable, chunks=chunks))
+                shuffled_variable_da = da.stack(shuffled_variable, axis=-2).rechunk("auto")
+                shuffled_data = xr.DataArray(shuffled_variable_da, coords=hist.coords, dims=hist.dims)
                 shuffled_data.to_netcdf(file_path)
 
-    def valid_bootstrap_file(self, station, variables, window):
+    def _set_file_path(self, station: str, variables: str, window: int, nboots: int) -> str:
+        """
+        Set file name following naming convention <station>_<var1>_<var2>_..._hist<window>_nboots<nboots>_shuffled.nc
+        and creates joined path using bootstrap_path attribute set on initialisation.
+        :param station: station name
+        :param variables: variables already preprocessed as single string with all variables seperated by underscore
+        :param window: window length
+        :param nboots: number of boots
+        :return: full file path
+        """
+        file_name = f"{station}_{variables}_hist{window}_nboots{nboots}_shuffled.nc"
+        return os.path.join(self.bootstrap_path, file_name)
+
+    def valid_bootstrap_file(self, station: str, variables: str, window: int) -> [bool, Union[None, int]]:
         """
         Compare local bootstrap file with given settings for station, variables, window and number of bootstraps. If a
         match was found, this method returns a tuple (True, None). In any other case, it returns (False, max_nboot),
@@ -155,32 +111,166 @@ class BootStraps(RunEnvironment):
         length is ge than given window size form args and the number of boots is also ge than the given number of boots
         from this class. Furthermore, this functions deletes local files, if the match the station pattern but don't fit
         the window and bootstrap condition. This is performed, because it is assumed, that the corresponding file will
-        be created with a longer or at least same window size and numbers of bootstraps.
-        :param station:
-        :param variables:
-        :param window:
-        :return:
+        be created with a longer or at the least same window size and numbers of bootstraps.
+        :param station: name of the station to validate
+        :param variables: all variables already merged in single string seperated by underscore
+        :param window: required window size
+        :return: tuple containing information if valid file was found first and second the number of boots that needs to
+            be used for the new boot creation (this is only relevant, if no valid file was found - otherwise the return
+            statement is anyway None).
         """
         regex = re.compile(rf"{station}_{variables}_hist(\d+)_nboots(\d+)_shuffled")
-        max_nboot = self.number_bootstraps
+        max_nboot = self.number_of_bootstraps
         for file in os.listdir(self.bootstrap_path):
             match = regex.match(file)
             if match:
                 window_file = int(match.group(1))
                 nboot_file = int(match.group(2))
                 max_nboot = max([max_nboot, nboot_file])
-                if (window_file >= window) and (nboot_file >= self.number_bootstraps):
+                if (window_file >= window) and (nboot_file >= self.number_of_bootstraps):
                     return True, None
                 else:
                     os.remove(os.path.join(self.bootstrap_path, file))
         return False, max_nboot
 
     @staticmethod
-    def shuffle_single_variable(data: da.array, chunks) -> da.core.Array:
+    def shuffle(data: da.array, chunks: Tuple) -> da.core.Array:
+        """
+        Shuffle randomly from given data (draw elements with replacement)
+        :param data: data to shuffle
+        :param chunks: chunk size for dask
+        :return: shuffled data as dask core array (not computed yet)
+        """
         size = data.shape
         return da.random.choice(data.reshape(-1,), size=size, chunks=chunks)
 
 
+class BootStraps:
+    """
+    Main class to perform bootstrap operations. This class requires a DataGenerator object and a path, where to find and
+    store all data related to the bootstrap operation. In initialisation, this class will automatically call the class
+    CreateShuffleData to set up the shuffled data sets. How to use BootStraps:
+    * call .get_generator(<station>, <variable>) to get a generator for given station and variable combination that
+        iterates over all bootstrap realisations (as keras sequence)
+    * call .get_labels(<station>) to get the measured observations in the same format as bootstrap predictions
+    * call .get_bootstrap_predictions(<station>, <variable>) to get the bootstrapped predictions
+    * call .get_orig_prediction(<station>) to get the non-bootstrapped predictions (referred as original predictions)
+    """
+
+    def __init__(self, data: DataGenerator, bootstrap_path: str, number_of_bootstraps: int = 10):
+        self.data = data
+        self.number_of_bootstraps = number_of_bootstraps
+        self.bootstrap_path = bootstrap_path
+        CreateShuffledData(data, number_of_bootstraps, bootstrap_path)
+
+    @property
+    def stations(self) -> List[str]:
+        return self.data.stations
+
+    @property
+    def variables(self) -> List[str]:
+        return self.data.variables
+
+    @property
+    def window_history_size(self) -> int:
+        return self.data.window_history_size
+
+    def get_generator(self, station: str, variable: str) -> BootStrapGenerator:
+        """
+        Returns the actual generator to use for the bootstrap evaluation. The generator requires information on station
+        and bootstrapped variable. There is only a loop on the bootstrap realisation and not on stations or variables.
+        :param station: name of the station
+        :param variable: name of the variable to bootstrap
+        :return: BootStrapGenerator class ready to use.
+        """
+        hist, _ = self.data[station]
+        shuffled_data = self._load_shuffled_data(station, self.variables).reindex_like(hist)
+        return BootStrapGenerator(self.number_of_bootstraps, hist, shuffled_data, self.variables, variable)
+
+    def get_labels(self, station: str) -> np.ndarray:
+        """
+        Repeats labels for given key by the number of boots and returns as single array.
+        :param station: name of station
+        :return: repeated labels as single array
+        """
+        labels = self.data[station][1]
+        return np.tile(labels.data, (self.number_of_bootstraps, 1))
+
+    def get_orig_prediction(self, path: str, file_name: str, prediction_name: str = "CNN") -> np.ndarray:
+        """
+        Repeats predictions from given file(_name) in path by the number of boots.
+        :param path: path to file
+        :param file_name: file name
+        :param prediction_name: name of the prediction to select from loaded file (default CNN)
+        :return: repeated predictions
+        """
+        file = os.path.join(path, file_name)
+        prediction = xr.open_dataarray(file).sel(type=prediction_name).squeeze()
+        vals = np.tile(prediction.data, (self.number_of_bootstraps, 1))
+        return vals[~np.isnan(vals).any(axis=1), :]
+
+    def _load_shuffled_data(self, station: str, variables: List[str]) -> xr.DataArray:
+        """
+        Load shuffled data from bootstrap path. Data is stored as
+        '<station>_<var1>_<var2>_..._hist<histsize>_nboots<nboots>_shuffled.nc', e.g.
+        'DEBW107_cloudcover_no_no2_temp_u_v_hist13_nboots20_shuffled.nc'
+        :param station: name of station
+        :param variables: list of variables
+        :return: shuffled data as xarray
+        """
+        file_name = self._get_shuffled_data_file(station, variables)
+        shuffled_data = xr.open_dataarray(file_name, chunks=100)
+        return shuffled_data
+
+    def _get_shuffled_data_file(self, station: str, variables: List[str]) -> str:
+        """
+        Looks for data file using regular expressions and returns found file or raise FileNotFoundError
+        :param station: name of station
+        :param variables: name of variables
+        :return: found file with complete path
+        """
+        files = os.listdir(self.bootstrap_path)
+        regex = self._create_file_regex(station, variables)
+        file = self._filter_files(regex, files, self.window_history_size, self.number_of_bootstraps)
+        if file:
+            return os.path.join(self.bootstrap_path, file)
+        else:
+            raise FileNotFoundError(f"Could not find a file to match pattern {regex}")
+
+    @staticmethod
+    def _create_file_regex(station: str, variables: List[str]) -> Pattern:
+        """
+        Creates regex for given station and variables to look for shuffled data with pattern:
+        `<station>(_<var>)*_hist(<hist>)_nboots(<nboots>)_shuffled.nc`
+        :param station: station name to use as prefix
+        :param variables: variables to add after station
+        :return: compiled regular expression
+        """
+        var_regex = "".join([rf"(_\w+)*_{v}(_\w+)*" for v in sorted(variables)])
+        regex = re.compile(rf"{station}{var_regex}_hist(\d+)_nboots(\d+)_shuffled\.nc")
+        return regex
+
+    @staticmethod
+    def _filter_files(regex: Pattern, files: List[str], window: int, nboot: int) -> Union[str, None]:
+        """
+        Filter list of files by regex. Regex has to be structured to match the following string structure
+        `<station>(_<var>)*_hist(<hist>)_nboots(<nboots>)_shuffled.nc`. Hist and nboots values have to be included as
+        group. All matches are compared to given window and nboot parameters. A valid file must have the same value (or
+        larger) than these parameters and contain all variables.
+        :param regex: compiled regular expression pattern following the style from method description
+        :param files: list of file names to filter
+        :param window: minimum length of window to look for
+        :param nboot: minimal number of boots to search
+        :return: matching file name or None, if no valid file was found
+        """
+        for f in files:
+            match = regex.match(f)
+            if match:
+                last = match.lastindex
+                if (int(match.group(last-1)) >= window) and (int(match.group(last)) >= nboot):
+                    return f
+
+
 if __name__ == "__main__":
 
     from src.run_modules.experiment_setup import ExperimentSetup
diff --git a/src/data_handling/data_generator.py b/src/data_handling/data_generator.py
index 0bf0bc35344ecf0f040f5563ddbdbe291b64404d..8d10b3e438e185b9fd158259a6ba49a5612737be 100644
--- a/src/data_handling/data_generator.py
+++ b/src/data_handling/data_generator.py
@@ -182,7 +182,7 @@ class DataGenerator(keras.utils.Sequence):
                 raise FileNotFoundError
             data = self._load_pickle_data(station, self.variables)
         except FileNotFoundError:
-            logging.info(f"load not pickle data for {station}")
+            logging.debug(f"load not pickle data for {station}")
             data = DataPrep(self.data_path, self.network, station, self.variables, station_type=self.station_type,
                             **self.kwargs)
             if self.transformation is not None:
diff --git a/src/data_handling/data_preparation.py b/src/data_handling/data_preparation.py
index bce7bfe81aa8f55273f86402d4058498bbb12dcb..5628394271918dc5631182d7de610db4ad335b7f 100644
--- a/src/data_handling/data_preparation.py
+++ b/src/data_handling/data_preparation.py
@@ -418,13 +418,13 @@ class DataPrep(object):
         data.loc[..., used_chem_vars] = data.loc[..., used_chem_vars].clip(min=minimum)
         return data
 
-    def get_transposed_history(self):
+    def get_transposed_history(self) -> xr.DataArray:
         return self.history.transpose("datetime", "window", "Stations", "variables").copy()
 
-    def get_transposed_label(self):
+    def get_transposed_label(self) -> xr.DataArray:
         return self.label.squeeze("Stations").transpose("datetime", "window").copy()
 
-    def get_extremes_history(self):
+    def get_extremes_history(self) -> xr.DataArray:
         return self.extremes_history.transpose("datetime", "window", "Stations", "variables").copy()
 
     def get_extremes_label(self):
diff --git a/src/helpers.py b/src/helpers.py
index 723acbd57fc1fe1c477cb723fd4a39a0db988e0a..2589cfe88d187ac8ebdf488cc9ab84fb1598ada0 100644
--- a/src/helpers.py
+++ b/src/helpers.py
@@ -82,7 +82,6 @@ class TimeTracking(object):
             self._end()
         else:
             msg = f"Time was already stopped {time.time() - self.end}s ago."
-            logging.error(msg)
             raise AssertionError(msg)
         if get_duration:
             return self.duration()
@@ -117,7 +116,6 @@ def prepare_host(create_new=True, sampling="daily"):
     elif "runner-6HmDp9Qd-project-2411-concurrent" in hostname:
         path = f"/home/{user}/machinelearningtools/data/toar_{sampling}/"
     else:
-        logging.error(f"unknown host '{hostname}'")
         raise OSError(f"unknown host '{hostname}'")
     if not os.path.exists(path):
         try:
@@ -127,7 +125,6 @@ def prepare_host(create_new=True, sampling="daily"):
             else:
                 raise PermissionError
         except PermissionError:
-            logging.error(f"path '{path}' does not exist for host '{hostname}'.")
             raise NotADirectoryError(f"path '{path}' does not exist for host '{hostname}'.")
     else:
         logging.debug(f"set path to: {path}")
@@ -169,6 +166,28 @@ class PyTestRegex:
         return self._regex.pattern
 
 
+class PyTestAllEqual:
+
+    def __init__(self, check_list):
+        self._list = check_list
+
+    def _check_all_equal(self):
+        equal = True
+        for b in self._list:
+            equal *= xr.testing.assert_equal(self._list[0], b) is None
+        return equal == 1
+
+    def is_true(self):
+        return self._check_all_equal()
+
+
+def xr_all_equal(check_list):
+    equal = True
+    for b in check_list:
+        equal *= xr.testing.assert_equal(check_list[0], b) is None
+    return equal == 1
+
+
 def dict_to_xarray(d: Dict, coordinate_name: str) -> xr.DataArray:
     """
     Convert a dictionary of 2D-xarrays to single 3D-xarray. The name of new coordinate axis follows <coordinate_name>.
diff --git a/src/plotting/postprocessing_plotting.py b/src/plotting/postprocessing_plotting.py
index 854182613cdb63456dc8f62d2421560d829ee629..3338ce4c0d6f14aa8c0173779329a2fd81f0f57e 100644
--- a/src/plotting/postprocessing_plotting.py
+++ b/src/plotting/postprocessing_plotting.py
@@ -497,6 +497,7 @@ class PlotBootstrapSkillScore(RunEnvironment):
         """
         super().__init__()
         self._labels = None
+        self._x_name = "boot_var"
         self._data = self._prepare_data(data)
         self._plot(plot_folder, model_setup)
 
@@ -507,7 +508,7 @@ class PlotBootstrapSkillScore(RunEnvironment):
         :param data: dictionary with station names as keys and 2D xarrays as values
         :return: pre-processed data set
         """
-        data = helpers.dict_to_xarray(data, "station")
+        data = helpers.dict_to_xarray(data, "station").sortby(self._x_name)
         self._labels = [str(i) + "d" for i in data.coords["ahead"].values]
         return data.to_dataframe("data").reset_index(level=[0, 1, 2])
 
@@ -526,7 +527,7 @@ class PlotBootstrapSkillScore(RunEnvironment):
         :param model_setup: architecture type to specify plot name
         """
         fig, ax = plt.subplots()
-        sns.boxplot(x="boot_var", y="data", hue="ahead", data=self._data, ax=ax, whis=1., palette="Blues_d",
+        sns.boxplot(x=self._x_name, y="data", hue="ahead", data=self._data, ax=ax, whis=1., palette="Blues_d",
                     showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
         ax.axhline(y=0, color="grey", linewidth=.5)
         ax.set(ylabel=f"skill score", xlabel="", title="summary of all stations")
diff --git a/src/run_modules/experiment_setup.py b/src/run_modules/experiment_setup.py
index 6414b3818cc270799ed6f247857b7b2904665af5..95bd5056febbe06babfd59191332c1f4cb8078d4 100644
--- a/src/run_modules/experiment_setup.py
+++ b/src/run_modules/experiment_setup.py
@@ -20,6 +20,8 @@ DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'max
                         'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values',
                         'pblheight': 'maximum'}
 DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise", "mean": "estimate"}
+DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
+                     "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "plot_conditional_quantiles"]
 
 
 class ExperimentSetup(RunEnvironment):
@@ -36,7 +38,8 @@ class ExperimentSetup(RunEnvironment):
                  experiment_path=None, plot_path=None, forecast_path=None, overwrite_local_data=None, sampling="daily",
                  create_new_model=None, bootstrap_path=None, permute_data_on_training=False, transformation=None,
                  train_min_length=None, val_min_length=None, test_min_length=None, extreme_values=None,
-                 extremes_on_right_tail_only=None):
+                 extremes_on_right_tail_only=None, evaluate_bootstraps=True, plot_list=None, number_of_bootstraps=None,
+                 create_new_bootstraps=None):
 
         # create run framework
         super().__init__()
@@ -126,6 +129,13 @@ class ExperimentSetup(RunEnvironment):
         # use all stations on all data sets (train, val, test)
         self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, default=True)
 
+        # set post-processing instructions
+        self._set_param("evaluate_bootstraps", evaluate_bootstraps, scope="general.postprocessing")
+        create_new_bootstraps = max([self.data_store.get("trainable", "general"), create_new_bootstraps or False])
+        self._set_param("create_new_bootstraps", create_new_bootstraps, scope="general.postprocessing")
+        self._set_param("number_of_bootstraps", number_of_bootstraps, default=20, scope="general.postprocessing")
+        self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing")
+
     def _set_param(self, param: str, value: Any, default: Any = None, scope: str = "general") -> None:
         if value is None and default is not None:
             value = default
diff --git a/src/run_modules/model_setup.py b/src/run_modules/model_setup.py
index d04be92c4209ccad58ff20e97376fa4e96a07636..c558b5fc76ff336dc6a792ec0239fa3b64eab466 100644
--- a/src/run_modules/model_setup.py
+++ b/src/run_modules/model_setup.py
@@ -10,8 +10,8 @@ import tensorflow as tf
 
 from src.model_modules.keras_extensions import HistoryAdvanced, CallbackHandler
 # from src.model_modules.model_class import MyBranchedModel as MyModel
-from src.model_modules.model_class import MyLittleModel as MyModel
-# from src.model_modules.model_class import MyTowerModel as MyModel
+# from src.model_modules.model_class import MyLittleModel as MyModel
+from src.model_modules.model_class import MyTowerModel as MyModel
 # from src.model_modules.model_class import MyPaperModel as MyModel
 from src.run_modules.run_environment import RunEnvironment
 
diff --git a/src/run_modules/post_processing.py b/src/run_modules/post_processing.py
index 9922ba4e655d551ab23fce33bfe40f6f262274f6..158b29c6e25c8d1181872d700cb2a36114fabf6a 100644
--- a/src/run_modules/post_processing.py
+++ b/src/run_modules/post_processing.py
@@ -2,6 +2,7 @@ __author__ = "Lukas Leufen, Felix Kleinert"
 __date__ = '2019-12-11'
 
 
+import inspect
 import logging
 import os
 
@@ -23,6 +24,8 @@ from src.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStation
 from src.plotting.postprocessing_plotting import plot_conditional_quantiles
 from src.run_modules.run_environment import RunEnvironment
 
+from typing import Dict
+
 
 class PostProcessing(RunEnvironment):
 
@@ -51,66 +54,127 @@ class PostProcessing(RunEnvironment):
             self.make_prediction()
             logging.info("take a look on the next reported time measure. If this increases a lot, one should think to "
                          "skip make_prediction() whenever it is possible to save time.")
-        self.bootstrap_skill_scores = self.create_boot_straps()
-        self.skill_scores = self.calculate_skill_scores()
-        self.plot()
 
-    def create_boot_straps(self):
+        # bootstraps
+        if self.data_store.get("evaluate_bootstraps", "general.postprocessing"):
+            with TimeTracking(name="calculate bootstraps"):
+                create_new_bootstraps = self.data_store.get("create_new_bootstraps", "general.postprocessing")
+                self.bootstrap_postprocessing(create_new_bootstraps)
 
+        # skill scores
+        with TimeTracking(name="calculate skill scores"):
+            self.skill_scores = self.calculate_skill_scores()
+
+        # plotting
+        self.plot()
+
+    def bootstrap_postprocessing(self, create_new_bootstraps: bool, _iter: int = 0) -> None:
+        """
+        Create skill scores of bootstrapped data. Also creates these bootstraps if create_new_bootstraps is true or a
+        failure occurred during skill score calculation. Sets class attribute bootstrap_skill_scores.
+        :param create_new_bootstraps: calculate all bootstrap predictions and overwrite already available predictions
+        :param _iter: internal counter to reduce unnecessary recursive calls (maximum number is 2, otherwise something
+            went wrong).
+        """
+        try:
+            if create_new_bootstraps:
+                self.create_bootstrap_forecast()
+            self.bootstrap_skill_scores = self.calculate_bootstrap_skill_scores()
+        except FileNotFoundError:
+            if _iter != 0:
+                raise RuntimeError("bootstrap_postprocessing is called for the 2nd time. This means, that calling"
+                                   "manually the reason for the failure.")
+            logging.info("Couldn't load all files, restart bootstrap postprocessing with create_new_bootstraps=True.")
+            self.bootstrap_postprocessing(True, _iter=1)
+
+    def create_bootstrap_forecast(self) -> None:
+        """
+        Creates the bootstrapped predictions for all stations and variables. These forecasts are saved in bootstrap_path
+        with the names `bootstraps_{var}_{station}.nc` and `bootstraps_labels_{station}.nc`.
+        """
         # forecast
+        with TimeTracking(name=inspect.stack()[0].function):
+            # extract all requirements from data store
+            bootstrap_path = self.data_store.get("bootstrap_path")
+            forecast_path = self.data_store.get("forecast_path")
+            number_of_bootstraps = self.data_store.get("number_of_bootstraps", "general.postprocessing")
+
+            # set bootstrap class
+            bootstraps = BootStraps(self.test_data, bootstrap_path, number_of_bootstraps)
+
+            # create bootstrapped predictions for all stations and variables and save it to disk
+            dims = ["index", "ahead", "type"]
+            for station in bootstraps.stations:
+                with TimeTracking(name=station):
+                    logging.info(station)
+                    for var in bootstraps.variables:
+                        station_bootstrap = bootstraps.get_generator(station, var)
+
+                        # make bootstrap predictions
+                        bootstrap_predictions = self.model.predict_generator(generator=station_bootstrap,
+                                                                             workers=2,
+                                                                             use_multiprocessing=True)
+                        if isinstance(bootstrap_predictions, list):  # if model is branched model
+                            bootstrap_predictions = bootstrap_predictions[-1]
+                        # save bootstrap predictions separately for each station and variable combination
+                        bootstrap_predictions = np.expand_dims(bootstrap_predictions, axis=-1)
+                        shape = bootstrap_predictions.shape
+                        coords = (range(shape[0]), range(1, shape[1] + 1))
+                        tmp = xr.DataArray(bootstrap_predictions, coords=(*coords, [var]), dims=dims)
+                        file_name = os.path.join(forecast_path, f"bootstraps_{var}_{station}.nc")
+                        tmp.to_netcdf(file_name)
+                    # store also true labels for each station
+                    labels = np.expand_dims(bootstraps.get_labels(station), axis=-1)
+                    file_name = os.path.join(forecast_path, f"bootstraps_labels_{station}.nc")
+                    labels = xr.DataArray(labels, coords=(*coords, ["obs"]), dims=dims)
+                    labels.to_netcdf(file_name)
+
+    def calculate_bootstrap_skill_scores(self) -> Dict[str, xr.DataArray]:
+        """
+        Use already created bootstrap predictions and the original predictions (the not-bootstrapped ones) and calculate
+        skill scores for the bootstraps. The result is saved as a xarray DataArray in a dictionary structure separated
+        for each station (keys of dictionary).
+        :return: The result dictionary with station-wise skill scores
+        """
 
-        bootstrap_path = self.data_store.get("bootstrap_path")
-        forecast_path = self.data_store.get("forecast_path")
-        window_lead_time = self.data_store.get("window_lead_time")
-        bootstraps = BootStraps(self.test_data, bootstrap_path, 20)
-        with TimeTracking(name="boot predictions"):
-            bootstrap_predictions = self.model.predict_generator(generator=bootstraps.boot_strap_generator(),
-                                                                 steps=bootstraps.get_boot_strap_generator_length())
-        if isinstance(bootstrap_predictions, list):
-            bootstrap_predictions = bootstrap_predictions[-1]
-        bootstrap_meta = np.array(bootstraps.get_boot_strap_meta())
-        variables = np.unique(bootstrap_meta[:, 0])
-        for station in np.unique(bootstrap_meta[:, 1]):
-            coords = None
-            for boot in variables:
-                ind = np.all(bootstrap_meta == [boot, station], axis=1)
-                length = sum(ind)
-                sel = bootstrap_predictions[ind].reshape((length, window_lead_time, 1))
-                coords = (range(length), range(1, window_lead_time + 1))
-                tmp = xr.DataArray(sel, coords=(*coords, [boot]), dims=["index", "ahead", "type"])
-                file_name = os.path.join(forecast_path, f"bootstraps_{boot}_{station}.nc")
-                tmp.to_netcdf(file_name)
-            labels = bootstraps.get_labels(station).reshape((length, window_lead_time, 1))
-            file_name = os.path.join(forecast_path, f"bootstraps_labels_{station}.nc")
-            labels = xr.DataArray(labels, coords=(*coords, ["obs"]), dims=["index", "ahead", "type"])
-            labels.to_netcdf(file_name)
-
-        # file_name = os.path.join(forecast_path, f"bootstraps_orig.nc")
-        # orig = xr.open_dataarray(file_name)
-
-
-        # calc skill scores
-        skill_scores = statistics.SkillScores(None)
-        score = {}
-        for station in np.unique(bootstrap_meta[:, 1]):
-            file_name = os.path.join(forecast_path, f"bootstraps_labels_{station}.nc")
-            labels = xr.open_dataarray(file_name)
-            shape = labels.shape
-            orig = bootstraps.get_orig_prediction(forecast_path,  f"forecasts_norm_{station}_test.nc").reshape(shape)
-            orig = xr.DataArray(orig, coords=(range(shape[0]), range(1, shape[1] + 1), ["orig"]), dims=["index", "ahead", "type"])
-            skill = pd.DataFrame(columns=range(1, window_lead_time + 1))
-            for boot in variables:
-                file_name = os.path.join(forecast_path, f"bootstraps_{boot}_{station}.nc")
-                boot_data = xr.open_dataarray(file_name)
-                boot_data = boot_data.combine_first(labels)
-                boot_data = boot_data.combine_first(orig)
-                boot_scores = []
-                for iahead in range(window_lead_time):
-                    data = boot_data.sel(ahead=iahead + 1)
-                    boot_scores.append(skill_scores.general_skill_score(data, forecast_name=boot, reference_name="orig"))
-                skill.loc[boot] = np.array(boot_scores)
-            score[station] = xr.DataArray(skill, dims=["boot_var", "ahead"])
-        return score
+        with TimeTracking(name=inspect.stack()[0].function):
+            # extract all requirements from data store
+            bootstrap_path = self.data_store.get("bootstrap_path")
+            forecast_path = self.data_store.get("forecast_path")
+            window_lead_time = self.data_store.get("window_lead_time")
+            number_of_bootstraps = self.data_store.get("number_of_bootstraps", "postprocessing")
+            bootstraps = BootStraps(self.test_data, bootstrap_path, number_of_bootstraps)
+
+            skill_scores = statistics.SkillScores(None)
+            score = {}
+            for station in self.test_data.stations:
+                logging.info(station)
+
+                # get station labels
+                file_name = os.path.join(forecast_path, f"bootstraps_labels_{station}.nc")
+                labels = xr.open_dataarray(file_name)
+                shape = labels.shape
+
+                # get original forecasts
+                orig = bootstraps.get_orig_prediction(forecast_path,  f"forecasts_norm_{station}_test.nc").reshape(shape)
+                coords = (range(shape[0]), range(1, shape[1] + 1), ["orig"])
+                orig = xr.DataArray(orig, coords=coords, dims=["index", "ahead", "type"])
+
+                # calculate skill scores for each variable
+                skill = pd.DataFrame(columns=range(1, window_lead_time + 1))
+                for boot in self.test_data.variables:
+                    file_name = os.path.join(forecast_path, f"bootstraps_{boot}_{station}.nc")
+                    boot_data = xr.open_dataarray(file_name)
+                    boot_data = boot_data.combine_first(labels).combine_first(orig)
+                    boot_scores = []
+                    for ahead in range(1, window_lead_time + 1):
+                        data = boot_data.sel(ahead=ahead)
+                        boot_scores.append(skill_scores.general_skill_score(data, forecast_name=boot, reference_name="orig"))
+                    skill.loc[boot] = np.array(boot_scores)
+
+                # collect all results in single dictionary
+                score[station] = xr.DataArray(skill, dims=["boot_var", "ahead"])
+            return score
 
     def _load_model(self):
         try:
@@ -126,19 +190,29 @@ class PostProcessing(RunEnvironment):
         logging.debug("Run plotting routines...")
         path = self.data_store.get("forecast_path")
 
-        plot_conditional_quantiles(self.test_data.stations, pred_name="CNN", ref_name="obs",
-                                   forecast_path=path, plot_name_affix="cali-ref", plot_folder=self.plot_path)
-        plot_conditional_quantiles(self.test_data.stations, pred_name="obs", ref_name="CNN",
-                                   forecast_path=path, plot_name_affix="like-bas", plot_folder=self.plot_path)
-        PlotStationMap(generators={'b': self.test_data}, plot_folder=self.plot_path)
-        PlotMonthlySummary(self.test_data.stations, path, r"forecasts_%s_test.nc", self.target_var,
-                           plot_folder=self.plot_path)
-        PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path, model_setup="CNN")
-        PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path, score_only=False,
-                                     extra_name_tag="all_terms_", model_setup="CNN")
-        PlotCompetitiveSkillScore(self.skill_scores[0], plot_folder=self.plot_path, model_setup="CNN")
-        PlotBootstrapSkillScore(self.bootstrap_skill_scores, plot_folder=self.plot_path, model_setup="CNN")
-        PlotTimeSeries(self.test_data.stations, path, r"forecasts_%s_test.nc", plot_folder=self.plot_path, sampling=self._sampling)
+        plot_list = self.data_store.get("plot_list", "postprocessing")
+
+        if self.bootstrap_skill_scores is not None and "PlotBootstrapSkillScore" in plot_list:
+            PlotBootstrapSkillScore(self.bootstrap_skill_scores, plot_folder=self.plot_path, model_setup="CNN")
+        if "plot_conditional_quantiles" in plot_list:
+            plot_conditional_quantiles(self.test_data.stations, pred_name="CNN", ref_name="obs",
+                                       forecast_path=path, plot_name_affix="cali-ref", plot_folder=self.plot_path)
+            plot_conditional_quantiles(self.test_data.stations, pred_name="obs", ref_name="CNN",
+                                       forecast_path=path, plot_name_affix="like-bas", plot_folder=self.plot_path)
+        if "PlotStationMap" in plot_list:
+            PlotStationMap(generators={'b': self.test_data}, plot_folder=self.plot_path)
+        if "PlotMonthlySummary" in plot_list:
+            PlotMonthlySummary(self.test_data.stations, path, r"forecasts_%s_test.nc", self.target_var,
+                               plot_folder=self.plot_path)
+        if "PlotClimatologicalSkillScore" in plot_list:
+            PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path, model_setup="CNN")
+            PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path, score_only=False,
+                                         extra_name_tag="all_terms_", model_setup="CNN")
+        if "PlotCompetitiveSkillScore" in plot_list:
+            PlotCompetitiveSkillScore(self.skill_scores[0], plot_folder=self.plot_path, model_setup="CNN")
+        if "PlotTimeSeries" in plot_list:
+            PlotTimeSeries(self.test_data.stations, path, r"forecasts_%s_test.nc", plot_folder=self.plot_path,
+                           sampling=self._sampling)
 
     def calculate_test_score(self):
         test_score = self.model.evaluate_generator(generator=self.test_data_distributed.distribute_on_batches(),
diff --git a/src/run_modules/pre_processing.py b/src/run_modules/pre_processing.py
index f83b659f3d8ae449767e8f125893dc6df0de17d8..b5de28b3c21d83ea00e4319deb34b0a43d41811c 100644
--- a/src/run_modules/pre_processing.py
+++ b/src/run_modules/pre_processing.py
@@ -37,7 +37,7 @@ class PreProcessing(RunEnvironment):
         args = self.data_store.create_args_dict(DEFAULT_ARGS_LIST, scope="preprocessing")
         kwargs = self.data_store.create_args_dict(DEFAULT_KWARGS_LIST, scope="preprocessing")
         stations = self.data_store.get("stations")
-        valid_stations = self.check_valid_stations(args, kwargs, stations, load_tmp=False, save_tmp=False)
+        valid_stations = self.check_valid_stations(args, kwargs, stations, load_tmp=False, save_tmp=False, name="all")
         self.data_store.set("stations", valid_stations)
         self.split_train_val_test()
         self.report_pre_processing()
@@ -99,25 +99,24 @@ class PreProcessing(RunEnvironment):
             this list is ignored.
         :param set_name: name to load/save all information from/to data store.
         """
-        scope = set_name
-        args = self.data_store.create_args_dict(DEFAULT_ARGS_LIST, scope)
-        kwargs = self.data_store.create_args_dict(DEFAULT_KWARGS_LIST, scope)
+        args = self.data_store.create_args_dict(DEFAULT_ARGS_LIST, scope=set_name)
+        kwargs = self.data_store.create_args_dict(DEFAULT_KWARGS_LIST, scope=set_name)
         stations = args["stations"]
-        if self.data_store.get("use_all_stations_on_all_data_sets", scope):
+        if self.data_store.get("use_all_stations_on_all_data_sets", scope=set_name):
             set_stations = stations
         else:
             set_stations = stations[index_list]
         logging.debug(f"{set_name.capitalize()} stations (len={len(set_stations)}): {set_stations}")
-        set_stations = self.check_valid_stations(args, kwargs, set_stations, load_tmp=False)
-        self.data_store.set("stations", set_stations, scope)
-        set_args = self.data_store.create_args_dict(DEFAULT_ARGS_LIST, scope)
+        set_stations = self.check_valid_stations(args, kwargs, set_stations, load_tmp=False, name=set_name)
+        self.data_store.set("stations", set_stations, scope=set_name)
+        set_args = self.data_store.create_args_dict(DEFAULT_ARGS_LIST, scope=set_name)
         data_set = DataGenerator(**set_args, **kwargs)
-        self.data_store.set("generator", data_set, scope)
+        self.data_store.set("generator", data_set, scope=set_name)
         if set_name == "train":
             self.data_store.set("transformation", data_set.transformation)
 
     @staticmethod
-    def check_valid_stations(args: Dict, kwargs: Dict, all_stations: List[str], load_tmp=True, save_tmp=True):
+    def check_valid_stations(args: Dict, kwargs: Dict, all_stations: List[str], load_tmp=True, save_tmp=True, name=None):
         """
         Check if all given stations in `all_stations` are valid. Valid means, that there is data available for the given
         time range (is included in `kwargs`). The shape and the loading time are logged in debug mode.
@@ -126,17 +125,19 @@ class PreProcessing(RunEnvironment):
         :param kwargs: positional parameters for the DataGenerator class (e.g. `start`, `interpolate_method`,
             `window_lead_time`).
         :param all_stations: All stations to check.
+        :param name: name to display in the logging info message
         :return: Corrected list containing only valid station IDs.
         """
         t_outer = TimeTracking()
         t_inner = TimeTracking(start=False)
-        logging.info("check valid stations started")
+        logging.info(f"check valid stations started{' (%s)' % name if name else ''}")
         valid_stations = []
 
         # all required arguments of the DataGenerator can be found in args, positional arguments in args and kwargs
         data_gen = DataGenerator(**args, **kwargs)
-        for station in all_stations:
+        for pos, station in enumerate(all_stations):
             t_inner.run()
+            logging.info(f"check station {station} ({pos + 1} / {len(all_stations)})")
             try:
                 data = data_gen.get_data_generator(key=station, load_local_tmp_storage=load_tmp,
                                                    save_local_tmp_storage=save_tmp)
diff --git a/src/run_modules/run_environment.py b/src/run_modules/run_environment.py
index 04c984b1a00de4d7cec93adab10120370e76002e..7bd5027788934322d704192e1dff2995539fe245 100644
--- a/src/run_modules/run_environment.py
+++ b/src/run_modules/run_environment.py
@@ -63,7 +63,6 @@ class RunEnvironment(object):
         except (NameNotFoundInDataStore, FileNotFoundError):
             pass
 
-
     @staticmethod
     def do_stuff(length=2):
         time.sleep(length)
diff --git a/src/statistics.py b/src/statistics.py
index 26b2be8854c51584f20b753717ea94cc12967369..6510097fc3c31645bc0fa053a5ade05c3e4d908d 100644
--- a/src/statistics.py
+++ b/src/statistics.py
@@ -103,10 +103,9 @@ def mean_squared_error(a, b):
     return np.square(a - b).mean()
 
 
-class SkillScores(RunEnvironment):
+class SkillScores:
 
     def __init__(self, internal_data):
-        super().__init__()
         self.internal_data = internal_data
 
     def skill_scores(self, window_lead_time):
diff --git a/test/test_data_handling/test_bootstraps.py b/test/test_data_handling/test_bootstraps.py
index e74499523751fd74e449bbb25455579f770d17bc..c2b814b7bf173b61b4967c83611cdd3de08ed91b 100644
--- a/test/test_data_handling/test_bootstraps.py
+++ b/test/test_data_handling/test_bootstraps.py
@@ -1,64 +1,293 @@
 
-from src.data_handling.bootstraps import BootStraps
+from src.data_handling.bootstraps import BootStraps, CreateShuffledData, BootStrapGenerator
+from src.data_handling.data_generator import DataGenerator
+from src.helpers import PyTestAllEqual, xr_all_equal
 
-import pytest
+import logging
+import mock
 import os
+import pytest
+import shutil
+import typing
 
 import numpy as np
+import xarray as xr
+
+
+@pytest.fixture
+def orig_generator(data_path):
+    return DataGenerator(data_path, 'AIRBASE', ['DEBW107', 'DEBW013'],
+                         ['o3', 'temp'], 'datetime', 'variables', 'o3', start=2010, end=2014,
+                         statistics_per_var={"o3": "dma8eu", "temp": "maximum"})
 
 
-class TestBootstraps:
+@pytest.fixture
+def data_path():
+    path = os.path.join(os.path.dirname(__file__), "data")
+    if not os.path.exists(path):
+        os.makedirs(path)
+    return path
+
+
+class TestBootStrapGenerator:
 
     @pytest.fixture
-    def path(self):
-        path = os.path.join(os.path.dirname(__file__), "data")
-        if not os.path.exists(path):
-            os.makedirs(path)
-        return path
+    def hist(self, orig_generator):
+        return orig_generator.get_data_generator(0).get_transposed_history()
 
     @pytest.fixture
-    def boot_no_init(self, path):
-        obj = object.__new__(BootStraps)
-        super(BootStraps, obj).__init__()
-        obj.number_bootstraps = 50
-        obj.bootstrap_path = path
-        return obj
-
-    def test_valid_bootstrap_file(self, path, boot_no_init):
-        station = "TESTSTATION"
-        variables = "var1_var2_var3"
-        window = 5
-        # empty case
-        assert len(os.listdir(path)) == 0
-        assert boot_no_init.valid_bootstrap_file(station, variables, window) == (False, 50)
-        # different cases, where files with bigger range are existing
-        os.mknod(os.path.join(path, f"{station}_{variables}_hist5_nboots50_shuffled.dat"))
-        assert boot_no_init.valid_bootstrap_file(station, variables, window) == (True, None)
-        os.mknod(os.path.join(path, f"{station}_{variables}_hist5_nboots100_shuffled.dat"))
-        assert boot_no_init.valid_bootstrap_file(station, variables, window) == (True, None)
-        os.mknod(os.path.join(path, f"{station}_{variables}_hist10_nboots50_shuffled.dat"))
-        os.mknod(os.path.join(path, f"{station}1_{variables}_hist10_nboots50_shuffled.dat"))
-        assert boot_no_init.valid_bootstrap_file(station, variables, window) == (True, None)
-        #  need to reload data and therefore remove not fitting files for this station
-        assert boot_no_init.valid_bootstrap_file(station, variables, 20) == (False, 100)
-        assert len(os.listdir(path)) == 1
+    def boot_gen(self, hist):
+        return BootStrapGenerator(20, hist, hist.expand_dims({"boots": [0]}) + 1, ["o3", "temp"], "o3")
+
+    def test_init(self, boot_gen, hist):
+        assert boot_gen.number_of_boots == 20
+        assert boot_gen.variables == ["o3", "temp"]
+        assert xr.testing.assert_equal(boot_gen.history_orig, hist) is None
+        assert xr.testing.assert_equal(boot_gen.history, hist.sel(variables=["temp"])) is None
+        assert xr.testing.assert_allclose(boot_gen.shuffled - 1, hist.sel(variables="o3").expand_dims({"boots": [0]})) is None
+
+    def test_len(self, boot_gen):
+        assert len(boot_gen) == 20
+
+    def test_get_shuffled(self, boot_gen, hist):
+        shuffled = boot_gen._BootStrapGenerator__get_shuffled(0)
+        expected = hist.sel(variables=["o3"]).transpose("datetime", "window", "Stations", "variables") + 1
+        assert xr.testing.assert_equal(shuffled, expected) is None
+
+    def test_getitem(self, boot_gen, hist):
+        first_element = boot_gen[0]
+        assert xr.testing.assert_equal(first_element.sel(variables="temp"), hist.sel(variables="temp")) is None
+        assert xr.testing.assert_allclose(first_element.sel(variables="o3"), hist.sel(variables="o3") + 1) is None
+
+    def test_next(self, boot_gen, hist):
+        iter_obj = iter(boot_gen)
+        first_element = next(iter_obj)
+        assert xr.testing.assert_equal(first_element.sel(variables="temp"), hist.sel(variables="temp")) is None
+        assert xr.testing.assert_allclose(first_element.sel(variables="o3"), hist.sel(variables="o3") + 1) is None
+        with pytest.raises(KeyError):
+            next(iter_obj)
+
+
+class TestCreateShuffledData:
+
+    @pytest.fixture
+    def shuffled_data(self, orig_generator, data_path):
+        return CreateShuffledData(orig_generator, 20, data_path)
+
+    @pytest.fixture
+    @mock.patch("src.data_handling.bootstraps.CreateShuffledData.create_shuffled_data", return_value=None)
+    def shuffled_data_no_creation(self, mock_create_shuffle_data, orig_generator, data_path):
+        return CreateShuffledData(orig_generator, 20, data_path)
+
+    @pytest.fixture
+    def shuffled_data_clean(self, shuffled_data_no_creation):
+        shutil.rmtree(shuffled_data_no_creation.bootstrap_path)
+        os.makedirs(shuffled_data_no_creation.bootstrap_path)
+        assert os.listdir(shuffled_data_no_creation.bootstrap_path) == []  # just to check for a clean working directory
+        return shuffled_data_no_creation
+
+    def test_init(self, shuffled_data_no_creation, data_path):
+        assert isinstance(shuffled_data_no_creation.data, DataGenerator)
+        assert shuffled_data_no_creation.number_of_bootstraps == 20
+        assert shuffled_data_no_creation.bootstrap_path == data_path
+
+    def test_create_shuffled_data_create_new(self, shuffled_data_clean, data_path, caplog):
+        caplog.set_level(logging.INFO)
+        shuffled_data_clean.data.data_path_tmp = data_path
+        assert shuffled_data_clean.create_shuffled_data() is None
+        assert caplog.record_tuples[0] == ('root', logging.INFO, "create / check shuffled bootstrap data")
+        assert caplog.record_tuples[1] == ('root', logging.INFO, "create bootstap data for DEBW107")
+        assert caplog.record_tuples[5] == ('root', logging.INFO, "create bootstap data for DEBW013")
+        assert "DEBW107_o3_temp_hist7_nboots20_shuffled.nc" in os.listdir(data_path)
+        assert "DEBW013_o3_temp_hist7_nboots20_shuffled.nc" in os.listdir(data_path)
+
+    def test_create_shuffled_data_some_valid(self, shuffled_data_clean, data_path, caplog):
+        shuffled_data_clean.data.data_path_tmp = data_path
+        shuffled_data_clean.create_shuffled_data()
+        caplog.records.clear()
+        caplog.set_level(logging.INFO)
+        os.rename(os.path.join(data_path, "DEBW013_o3_temp_hist7_nboots20_shuffled.nc"),
+                  os.path.join(data_path, "DEBW013_o3_temp_hist5_nboots30_shuffled.nc"))
+        shuffled_data_clean.create_shuffled_data()
+        assert caplog.record_tuples[0] == ('root', logging.INFO, "create / check shuffled bootstrap data")
+        assert caplog.record_tuples[1] == ('root', logging.INFO, "create bootstap data for DEBW013")
+        assert "DEBW107_o3_temp_hist7_nboots20_shuffled.nc" in os.listdir(data_path)
+        assert "DEBW013_o3_temp_hist7_nboots30_shuffled.nc" in os.listdir(data_path)
+        assert "DEBW013_o3_temp_hist5_nboots30_shuffled.nc" not in os.listdir(data_path)
+
+    def test_set_file_path(self, shuffled_data_no_creation):
+        res = shuffled_data_no_creation._set_file_path("DEBWtest", "o3_temp_wind", 10, 5)
+        assert "DEBWtest_o3_temp_wind_hist10_nboots5_shuffled.nc" in res
+        assert shuffled_data_no_creation.bootstrap_path in res
+
+    def test_valid_bootstrap_file_blank(self, shuffled_data_clean):
+        assert shuffled_data_clean.valid_bootstrap_file("DEBWtest", "o3_temp", 10) == (False, 20)
+
+    def test_valid_bootstrap_file_already_satisfied(self, shuffled_data_clean, data_path):
+        station, variables, window = "DEBWtest2", "o3_temp", 5
+        os.mknod(os.path.join(data_path, f"{station}_{variables}_hist5_nboots50_shuffled.dat"))
+        assert shuffled_data_clean.valid_bootstrap_file(station, variables, window) == (True, None)
+        os.mknod(os.path.join(data_path, f"{station}_{variables}_hist5_nboots100_shuffled.dat"))
+        assert shuffled_data_clean.valid_bootstrap_file(station, variables, window) == (True, None)
+        os.mknod(os.path.join(data_path, f"{station}_{variables}_hist10_nboots50_shuffled.dat"))
+        os.mknod(os.path.join(data_path, f"{station}1_{variables}_hist10_nboots50_shuffled.dat"))
+        assert shuffled_data_clean.valid_bootstrap_file(station, variables, window) == (True, None)
+
+    def test_valid_bootstrap_file_reload_data_window(self, shuffled_data_clean, data_path):
+        station, variables, window = "DEBWtest2", "o3_temp", 20
+        os.mknod(os.path.join(data_path, f"{station}_{variables}_hist5_nboots50_shuffled.dat"))
+        os.mknod(os.path.join(data_path, f"{station}_{variables}_hist5_nboots100_shuffled.dat"))
+        os.mknod(os.path.join(data_path, f"{station}_{variables}_hist10_nboots50_shuffled.dat"))
+        os.mknod(os.path.join(data_path, f"{station}1_{variables}_hist10_nboots50_shuffled.dat"))  # <- DEBWtest21
+        #  need to reload data and therefore remove not fitting history size in all files for this station
+        assert shuffled_data_clean.valid_bootstrap_file(station, variables, window) == (False, 100)
+        assert len(os.listdir(data_path)) == 1  # keep only data from other station DEBWtest21
+
+    def test_valid_bootstrap_file_reload_data_boots(self, shuffled_data_clean, data_path):
+        station, variables, window = "DEBWtest2", "o3_temp", 5
+        os.mknod(os.path.join(data_path, f"{station}_{variables}_hist5_nboots50_shuffled.dat"))
+        os.mknod(os.path.join(data_path, f"{station}1_{variables}_hist10_nboots50_shuffled.dat"))  # <- DEBWtest21
         # reload because expanded boot number
-        os.mknod(os.path.join(path, f"{station}_{variables}_hist5_nboots50_shuffled.dat"))
-        boot_no_init.number_bootstraps = 60
-        assert boot_no_init.valid_bootstrap_file(station, variables, window) == (False, 60)
-        assert len(os.listdir(path)) == 1
+        shuffled_data_clean.number_of_bootstraps = 60
+        assert shuffled_data_clean.valid_bootstrap_file(station, variables, window) == (False, 60)
+        assert len(os.listdir(data_path)) == 1
+
+    def test_valid_bootstrap_file_reload_data_use_max_file_boot(self, shuffled_data_clean, data_path):
+        station, variables, window = "DEBWtest2", "o3_temp", 20
+        os.mknod(os.path.join(data_path, f"{station}_{variables}_hist5_nboots50_shuffled.dat"))
+        os.mknod(os.path.join(data_path, f"{station}_{variables}_hist5_nboots60_shuffled.dat"))
+        os.mknod(os.path.join(data_path, f"{station}1_{variables}_hist10_nboots50_shuffled.dat"))  # <- DEBWtest21
         # reload because of expanded window size, but use maximum boot number from file names
-        os.mknod(os.path.join(path, f"{station}_{variables}_hist5_nboots60_shuffled.dat"))
-        boot_no_init.number_bootstraps = 50
-        assert boot_no_init.valid_bootstrap_file(station, variables, 20) == (False, 60)
-
-    def test_shuffle_single_variable(self, boot_no_init):
-        data = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]])
-        res = boot_no_init.shuffle_single_variable(data, chunks=(2, 3)).compute()
-        assert res.shape == data.shape
-        assert res.max() <= data.max()
-        assert res.min() >= data.min()
+        assert shuffled_data_clean.valid_bootstrap_file(station, variables, window) == (False, 60)
+
+    def test_shuffle(self, shuffled_data_no_creation):
+        dummy = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]])
+        res = shuffled_data_no_creation.shuffle(dummy, chunks=(2, 3)).compute()
+        assert res.shape == dummy.shape
+        assert dummy.max() >= res.max()
+        assert dummy.min() <= res.min()
         assert set(np.unique(res)).issubset({1, 2, 3})
 
-    def test_create_shuffled_data(self):
-        pass
\ No newline at end of file
+
+class TestBootStraps:
+
+    @pytest.fixture
+    def bootstrap(self, orig_generator, data_path):
+        return BootStraps(orig_generator, data_path, 20)
+
+    @pytest.fixture
+    @mock.patch("src.data_handling.bootstraps.CreateShuffledData", return_value=None)
+    def bootstrap_no_shuffling(self, mock_create_shuffle_data, orig_generator, data_path):
+        shutil.rmtree(data_path)
+        return BootStraps(orig_generator, data_path, 20)
+
+    def test_init_no_shuffling(self, bootstrap_no_shuffling, data_path):
+        assert isinstance(bootstrap_no_shuffling, BootStraps)
+        assert bootstrap_no_shuffling.number_of_bootstraps == 20
+        assert bootstrap_no_shuffling.bootstrap_path == data_path
+
+    def test_init_with_shuffling(self, orig_generator, data_path, caplog):
+        caplog.set_level(logging.INFO)
+        BootStraps(orig_generator, data_path, 20)
+        assert caplog.record_tuples[0] == ('root', logging.INFO, "create / check shuffled bootstrap data")
+
+    def test_stations(self, bootstrap_no_shuffling, orig_generator):
+        assert bootstrap_no_shuffling.stations == orig_generator.stations
+
+    def test_variables(self, bootstrap_no_shuffling, orig_generator):
+        assert bootstrap_no_shuffling.variables == orig_generator.variables
+
+    def test_window_history_size(self, bootstrap_no_shuffling, orig_generator):
+        assert bootstrap_no_shuffling.window_history_size == orig_generator.window_history_size
+
+    def test_get_generator(self, bootstrap, orig_generator):
+        station = bootstrap.stations[0]
+        var = bootstrap.variables[0]
+        var_others = bootstrap.variables[1:]
+        gen = bootstrap.get_generator(station, var)
+        assert isinstance(gen, BootStrapGenerator)
+        assert gen.number_of_boots == bootstrap.number_of_bootstraps
+        assert gen.variables == bootstrap.variables
+        expected = orig_generator.get_data_generator(station).get_transposed_history()
+        assert xr.testing.assert_equal(gen.history_orig, expected) is None
+        assert xr.testing.assert_equal(gen.history, expected.sel(variables=var_others)) is None
+        assert gen.shuffled.variables == "o3"
+
+    @mock.patch("src.data_handling.data_generator.DataGenerator._load_pickle_data", side_effect=FileNotFoundError)
+    def test_get_generator_different_generator(self, mock_load_pickle, data_path, orig_generator):
+        BootStraps(orig_generator, data_path, 20)  # to create
+        orig_generator.window_history_size = 4
+        bootstrap = BootStraps(orig_generator, data_path, 20)
+        station = bootstrap.stations[0]
+        var = bootstrap.variables[0]
+        var_others = bootstrap.variables[1:]
+        gen = bootstrap.get_generator(station, var)
+        expected = orig_generator.get_data_generator(station, load_local_tmp_storage=False).get_transposed_history()
+        assert xr.testing.assert_equal(gen.history_orig, expected) is None
+        assert xr.testing.assert_equal(gen.history, expected.sel(variables=var_others)) is None
+        assert gen.shuffled.variables == "o3"
+        assert gen.shuffled.shape[:-1] == expected.shape[:-1]
+        assert gen.shuffled.shape[-1] == 20
+
+    def test_get_labels(self, bootstrap, orig_generator):
+        station = bootstrap.stations[0]
+        labels = bootstrap.get_labels(station)
+        labels_orig = orig_generator.get_data_generator(station).get_transposed_label()
+        assert labels.shape == (labels_orig.shape[0] * bootstrap.number_of_bootstraps, *labels_orig.shape[1:])
+        assert np.testing.assert_array_equal(labels[:labels_orig.shape[0], :], labels_orig.values) is None
+
+    def test_get_orig_prediction(self, bootstrap, data_path, orig_generator):
+        station = bootstrap.stations[0]
+        labels = orig_generator.get_data_generator(station).get_transposed_label()
+        predictions = labels.expand_dims({"type": ["CNN"]}, -1)
+        file_name = "test_prediction.nc"
+        predictions.to_netcdf(os.path.join(data_path, file_name))
+        res = bootstrap.get_orig_prediction(data_path, file_name)
+        assert (*res.shape, 1) == (predictions.shape[0] * bootstrap.number_of_bootstraps, *predictions.shape[1:])
+        assert np.testing.assert_array_equal(res[:predictions.shape[0], :], predictions.squeeze().values) is None
+
+    def test_load_shuffled_data(self, bootstrap, orig_generator):
+        station = bootstrap.stations[0]
+        hist = orig_generator.get_data_generator(station).get_transposed_history()
+        shuffled_data = bootstrap._load_shuffled_data(station, ["o3", "temp"])
+        assert isinstance(shuffled_data, xr.DataArray)
+        assert hist.shape[0] >= shuffled_data.shape[0]  # longer window length lead to shorter datetime axis in shuffled
+        assert hist.shape[1] <= shuffled_data.shape[1]  # longer window length in shuffled
+        assert hist.shape[2] == shuffled_data.shape[2]
+        assert hist.shape[3] <= shuffled_data.shape[3]  # potentially more variables in shuffled
+        assert bootstrap.number_of_bootstraps == shuffled_data.shape[4]
+        assert shuffled_data.mean().compute()
+        assert np.testing.assert_almost_equal(shuffled_data.mean().compute(), hist.mean(), decimal=1) is None
+        assert shuffled_data.max() <= hist.max()
+        assert shuffled_data.min() >= hist.min()
+
+    def test_get_shuffled_data_file(self, bootstrap):
+        file_name = bootstrap._get_shuffled_data_file("DEBW107", ["o3"])
+        assert file_name == os.path.join(bootstrap.bootstrap_path, "DEBW107_o3_temp_hist7_nboots20_shuffled.nc")
+
+    def test_get_shuffled_data_file_not_found(self, bootstrap_no_shuffling, data_path):
+        bootstrap_no_shuffling.number_of_boots = 100
+        os.makedirs(data_path)
+        with pytest.raises(FileNotFoundError) as e:
+            bootstrap_no_shuffling._get_shuffled_data_file("DEBW107", ["o3"])
+        assert "Could not find a file to match pattern" in e.value.args[0]
+
+    def test_create_file_regex(self, bootstrap_no_shuffling):
+        regex = bootstrap_no_shuffling._create_file_regex("DEBW108", ["o3", "temp", "h2o"])
+        assert regex.match("DEBW108_h2o_hum_latent_o3_temp_h20_hist10_nboots10_shuffled.nc")
+        regex.match("DEBW108_h2o_hum_latent_o3_temp_hist10_shuffled.nc") is None
+
+    def test_filter_files(self, bootstrap_no_shuffling):
+        regex = bootstrap_no_shuffling._create_file_regex("DEBW108", ["o3", "temp", "h2o"])
+        test_list = ["DEBW108_o3_test23_test_shuffled.nc",
+                     "DEBW107_o3_test23_test_shuffled.nc",
+                     "DEBW108_o3_test23_test.nc",
+                     "DEBW108_h2o_o3_temp_test_shuffled.nc",
+                     "DEBW108_h2o_hum_latent_o3_temp_u_v_test23_test_shuffled.nc",
+                     "DEBW108_o3_temp_hist9_nboots20_shuffled.nc",
+                     "DEBW108_h2o_o3_temp_hist9_nboots20_shuffled.nc"]
+        f = bootstrap_no_shuffling._filter_files
+        assert f(regex, test_list, 10, 10) is None
+        assert f(regex, test_list, 9, 10) == "DEBW108_h2o_o3_temp_hist9_nboots20_shuffled.nc"
+        assert f(regex, test_list, 9, 20) == "DEBW108_h2o_o3_temp_hist9_nboots20_shuffled.nc"
+
diff --git a/test/test_modules/test_pre_processing.py b/test/test_modules/test_pre_processing.py
index 425ddecc135db75a3f2f624ed150e8dd8f566bdc..b29ed1e21480a869e4c118332c18b6edd8ac23a5 100644
--- a/test/test_modules/test_pre_processing.py
+++ b/test/test_modules/test_pre_processing.py
@@ -40,7 +40,7 @@ class TestPreProcessing:
         caplog.set_level(logging.INFO)
         with PreProcessing():
             assert caplog.record_tuples[0] == ('root', 20, 'PreProcessing started')
-            assert caplog.record_tuples[1] == ('root', 20, 'check valid stations started')
+            assert caplog.record_tuples[1] == ('root', 20, 'check valid stations started (all)')
             assert caplog.record_tuples[-1] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 5 '
                                                                         r'station\(s\). Found 5/5 valid stations.'))
         RunEnvironment().__del__()
@@ -83,16 +83,18 @@ class TestPreProcessing:
             data_store.get("generator", "general")
         assert data_store.get("stations", "general.awesome") == ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']
 
-    def test_check_valid_stations(self, caplog, obj_with_exp_setup):
+    @pytest.mark.parametrize("name", (None, "tester"))
+    def test_check_valid_stations(self, caplog, obj_with_exp_setup, name):
         pre = obj_with_exp_setup
         caplog.set_level(logging.INFO)
         args = pre.data_store.create_args_dict(DEFAULT_ARGS_LIST)
         kwargs = pre.data_store.create_args_dict(DEFAULT_KWARGS_LIST)
         stations = pre.data_store.get("stations", "general")
-        valid_stations = pre.check_valid_stations(args, kwargs, stations)
+        valid_stations = pre.check_valid_stations(args, kwargs, stations, name=name)
         assert len(valid_stations) < len(stations)
         assert valid_stations == stations[:-1]
-        assert caplog.record_tuples[0] == ('root', 20, 'check valid stations started')
+        expected = 'check valid stations started (tester)' if name else 'check valid stations started'
+        assert caplog.record_tuples[0] == ('root', 20, expected)
         assert caplog.record_tuples[-1] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 6 '
                                                                     r'station\(s\). Found 5/6 valid stations.'))