diff --git a/HPC_setup/create_runscripts_HPC.sh b/HPC_setup/create_runscripts_HPC.sh
index bcbb5fb07800079736361450d7f0fed8684dc344..7bf08a34c6285e31895d817735f319ddde5bfb04 100755
--- a/HPC_setup/create_runscripts_HPC.sh
+++ b/HPC_setup/create_runscripts_HPC.sh
@@ -117,15 +117,17 @@ EOT
 
 fi
 
-echo
-echo "You have to run the the following command on a login node to download data:"
-echo "          \`python run.py'"
-echo
+echo "###################################################################################"
+echo "#   You have to run the the following command on a login node to download data:   #"
+echo "#                              \`python run_HPC.py'                                #"
+echo "#                                                                                 #"
 
-echo "Please execute the following command to check if the setup went well:"
+echo "#   Please execute the following command to check if the setup went well:         #"
 if [[ ${hpcsys} = 'juwels' ]]; then
-  echo "          \`sbatch run_juwels_develgpus.bash'"
+  echo "#                   \`sbatch run_juwels_develgpus.bash'                            #"
 else
-  echo "          \`sbatch run_hdfml_batch.bash'"
+  echo "#                   \`sbatch run_hdfml_batch.bash'                                 #"
 fi
 
+echo "###################################################################################"
+
diff --git a/HPC_setup/mlt_modules_juwels.sh b/HPC_setup/mlt_modules_juwels.sh
index d20b246d4f396363a23e68d64f89b6d3abaee8c4..01eecbab617f7b3042222e24e562901b302d401e 100755
--- a/HPC_setup/mlt_modules_juwels.sh
+++ b/HPC_setup/mlt_modules_juwels.sh
@@ -8,7 +8,7 @@
 module --force purge
 module use $OTHERSTAGES
 
-ml Stages/Devel-2019a
+ml Stages/2019a
 ml GCCcore/.8.3.0
 
 ml Jupyter/2019a-Python-3.6.8
@@ -18,4 +18,4 @@ ml Keras/2.2.4-GPU-Python-3.6.8
 ml SciPy-Stack/2019a-Python-3.6.8
 ml dask/1.1.5-Python-3.6.8
 ml GEOS/3.7.1-Python-3.6.8
-ml Graphviz/2.40.1
\ No newline at end of file
+ml Graphviz/2.40.1
diff --git a/HPC_setup/requirements_JUWELS_additionals.txt b/HPC_setup/requirements_JUWELS_additionals.txt
index 0a2d6bc0f5cb4ce565b9eb69aad27cd1b3bbaef6..82d78d096ba56157cf046c2e9e2064c3b68e421c 100644
--- a/HPC_setup/requirements_JUWELS_additionals.txt
+++ b/HPC_setup/requirements_JUWELS_additionals.txt
@@ -1,16 +1,60 @@
+absl-py==0.9.0
+astor==0.8.1
+atomicwrites==1.3.0
+attrs==19.3.0
+certifi==2019.11.28
+chardet==3.0.4
+cloudpickle==1.3.0
 coverage==5.0.3
+cycler==0.10.0
+Cython==0.29.15
+dask==2.11.0
+fsspec==0.6.2
+gast==0.3.3
+grpcio==1.27.2
+h5py==2.10.0
+idna==2.8
 importlib-metadata==1.5.0
-matplotlib==3.2.0              # in SciPy-Stack
-pandas==1.0.1                  # in SciPy-Stack / but older version
-py==1.8.1                      # ?
-pyproj==2.5.0                  # in basemap
-pyshp==2.1.0                   # in basemap
-pytest==5.3.5                  # in python (but we need higher version) 
+
+kiwisolver==1.1.0
+locket==0.2.0
+Markdown==3.2.1
+matplotlib==3.2.0
+mock==4.0.1
+more-itertools==8.2.0
+numpy==1.18.1
+packaging==20.3
+pandas==1.0.1
+partd==1.1.0
+patsy==0.5.1
+Pillow==7.0.0
+pluggy==0.13.1
+protobuf==3.11.3
+py==1.8.1
+pydot==1.4.1
+pyparsing==2.4.6
+pyproj==2.5.0
+pyshp==2.1.0
+pytest==5.3.5
 pytest-cov==2.8.1
 pytest-html==2.0.1
 pytest-lazy-fixture==0.6.3
 pytest-metadata==1.8.0
 pytest-sugar
-statsmodels==0.11.1              # (in Jupyter, but not found)
-xarray==0.15.0                 # in SciPy-Stack only 0.12.1 a
+python-dateutil==2.8.1
+pytz==2019.3
+PyYAML==5.3
+requests==2.23.0
+scipy==1.4.1
+seaborn==0.10.0
+--no-binary shapely Shapely==1.7.0
+six==1.11.0
+statsmodels==0.11.1
 tabulate
+toolz==0.10.0
+typing-extensions
+urllib3==1.25.8
+wcwidth==0.1.8
+Werkzeug==1.0.0
+xarray==0.15.0
+zipp==3.1.0
diff --git a/HPC_setup/setup_venv_juwels.sh b/HPC_setup/setup_venv_juwels.sh
index b543db1ee5ac4bea4f64467e360a084a2156c02a..7788c124fdbd997789811d32dccab8b04894b0ae 100755
--- a/HPC_setup/setup_venv_juwels.sh
+++ b/HPC_setup/setup_venv_juwels.sh
@@ -24,18 +24,15 @@ source ${cur}/../venv_juwels/bin/activate
 # export path for side-packages 
 export PYTHONPATH=${cur}/../venv_juwels/lib/python3.6/site-packages:${PYTHONPATH}
 
+
+echo "##### START INSTALLING requirements_JUWELS_additionals.txt #####"
+pip install -r ${cur}/requirements_JUWELS_additionals.txt
+echo "##### FINISH INSTALLING requirements_JUWELS_additionals.txt #####"
+
 pip install -r ${cur}/requirements_JUWELS_additionals.txt
+pip install netcdf4
 pip install --ignore-installed matplotlib==3.2.0
 pip install --ignore-installed pandas==1.0.1
-
+pip install -U typing_extensions
 
 # Comment: Maybe we have to export PYTHONPATH a second time ater activating the venv (after job allocation)
-# source venv/bin/activate
-# alloc_develgpu
-# source venv/bin/activate
-# export PYTHONPATH=${PWD}/venv/lib/python3.6/site-packages:${PYTHONPATH}
-# srun python run.py
-
-# create batch run scripts
-# source create_runscripts_HPC.sh
-
diff --git a/README.md b/README.md
index 5c55b4094232908a56cdcf61ba437976f8714e8b..c33aab4b8643d2907b07b5ebcb254076515d03d2 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,10 @@
 # MLAir - Machine Learning on Air Data
 
 MLAir (Machine Learning on Air data) is an environment that simplifies and accelerates the creation of new machine 
-learning (ML) models for the analysis and forecasting of meteorological and air quality time series.
+learning (ML) models for the analysis and forecasting of meteorological and air quality time series. You can find the
+docs [here](http://toar.pages.jsc.fz-juelich.de/mlair/docs/).
+
+[[_TOC_]]
 
 # Installation
 
@@ -9,7 +12,8 @@ MLAir is based on several python frameworks. To work properly, you have to insta
 `requirements.txt` file. Additionally to support the geographical plotting part it is required to install geo
 packages built for your operating system. Name names of these package may differ for different systems, we refer
 here to the opensuse / leap OS. The geo plot can be removed from the `plot_list`, in this case there is no need to 
-install the geo packages.
+install the geo packages. For special instructions to install MLAir on the Juelich HPC systems, see 
+[here](#special-instructions-for-installation-on-jülich-hpc-systems).
 
 * (geo) Install **proj** on your machine using the console. E.g. for opensuse / leap `zypper install proj`
 * (geo) A c++ compiler is required for the installation of the program **cartopy**
@@ -27,7 +31,9 @@ install the geo packages.
 
 # How to start with MLAir
 
-In this section, we show three examples how to work with MLAir.
+In this section, we show three examples how to work with MLAir. Note, that for these examples MLAir was installed using
+the distribution file. In case you are using the git clone it is required to adjust the import path if not directly
+executed inside the source directory of MLAir.
 
 ## Example 1
 
@@ -112,12 +118,50 @@ INFO: No training has started, because trainable parameter was false.
 INFO: mlair finished after 00:00:06 (hh:mm:ss)
 ```
 
-# Customised workflows and models
 
-# Custom Workflow
+# Default Workflow
+
+MLAir is constituted of so-called `run_modules` which are executed in a distinct order called `workflow`. MLAir
+provides a `default_workflow`. This workflow runs the run modules `ExperimentSetup`, `PreProcessing`,
+`ModelSetup`, `Training`, and `PostProcessing` one by one.
 
-MLAir provides a default workflow. If additional steps are to be performed, you have to append custom run modules to 
-the workflow.
+![Sketch of the default workflow.](docs/_source/_plots/run_modules_schedule.png)
+
+```python
+import mlair
+
+# create your custom MLAir workflow
+DefaultWorkflow = mlair.DefaultWorkflow()
+# execute default workflow
+DefaultWorkflow.run()
+```
+The output of running this default workflow will be structured like the following.
+```log
+INFO: mlair started
+INFO: ExperimentSetup started
+...
+INFO: ExperimentSetup finished after 00:00:01 (hh:mm:ss)
+INFO: PreProcessing started
+...
+INFO: PreProcessing finished after 00:00:11 (hh:mm:ss)
+INFO: ModelSetup started
+...
+INFO: ModelSetup finished after 00:00:01 (hh:mm:ss)
+INFO: Training started
+...
+INFO: Training finished after 00:02:15 (hh:mm:ss)
+INFO: PostProcessing started
+...
+INFO: PostProcessing finished after 00:01:37 (hh:mm:ss)
+INFO: mlair finished after 00:04:05 (hh:mm:ss)
+```
+
+# Customised Run Module and Workflow
+
+It is possible to create new custom run modules. A custom run module is required to inherit from the base class
+`RunEnvironment` and to hold the constructor method `__init__()`. This method has to execute the module on call.
+In the following example, this is done by using the `_run()` method that is called by the initialiser. It is
+possible to parse arguments to the custom run module as shown.
 
 ```python
 import mlair
@@ -129,14 +173,19 @@ class CustomStage(mlair.RunEnvironment):
     def __init__(self, test_string):
         super().__init__()  # always call super init method
         self._run(test_string)  # call a class method
-        
+
     def _run(self, test_string):
         logging.info("Just running a custom stage.")
         logging.info("test_string = " + test_string)
         epochs = self.data_store.get("epochs")
         logging.info("epochs = " + str(epochs))
+```
+
+If a custom run module is defined, it is required to adjust the workflow. For this, you need to load the empty
+`Workflow` class and add each run module that is required. The order of adding modules defines the order of
+execution if running the workflow.
 
-        
+```python
 # create your custom MLAir workflow
 CustomWorkflow = mlair.Workflow()
 # provide stages without initialisation
@@ -146,6 +195,9 @@ CustomWorkflow.add(CustomStage, test_string="Hello World")
 # finally execute custom workflow in order of adding
 CustomWorkflow.run()
 ```
+
+The output will look like:
+
 ```log
 INFO: mlair started
 ...
@@ -158,115 +210,198 @@ INFO: CustomStage finished after 00:00:01 (hh:mm:ss)
 INFO: mlair finished after 00:00:13 (hh:mm:ss)
 ```
 
-## Custom Model
+# Custom Model
 
-Each model has to inherit from the abstract model class to ensure a smooth training and evaluation behaviour. It is 
-required to implement the set model and set compile options methods. The later has to set the loss at least.
+Create your own model to run your personal experiment. To guarantee a proper integration in the MLAir workflow, models
+are restricted to inherit from the `AbstractModelClass`. This will ensure a smooth training and evaluation
+behaviour.
 
-```python
+## How to create a customised model?
 
+* Create a new model class inheriting from `AbstractModelClass`
+
+```python
+from mlair import AbstractModelClass
 import keras
-from keras.losses import mean_squared_error as mse
-from keras.optimizers import SGD
 
-from mlair.model_modules import AbstractModelClass
+class MyCustomisedModel(AbstractModelClass):
+
+    def __init__(self, shape_inputs: list, shape_outputs: list):
+
+        super().__init__(shape_inputs[0], shape_outputs[0])
 
-class MyLittleModel(AbstractModelClass):
-    """
-    A customised model with a 1x1 Conv, and 3 Dense layers (32, 16
-    window_lead_time). Dropout is used after Conv layer.
-    """
-    def __init__(self, window_history_size, window_lead_time, channels):
-        super().__init__()
         # settings
-        self.window_history_size = window_history_size
-        self.window_lead_time = window_lead_time
-        self.channels = channels
         self.dropout_rate = 0.1
         self.activation = keras.layers.PReLU
-        self.lr = 1e-2
+
         # apply to model
         self.set_model()
         self.set_compile_options()
         self.set_custom_objects(loss=self.compile_options['loss'])
+```
+
+* Make sure to add the `super().__init__()` and at least `set_model()` and `set_compile_options()` to your
+  custom init method.
+* The shown model expects a single input and output branch provided in a list. Therefore shapes of input and output are
+  extracted and then provided to the super class initialiser.
+* Some general settings like the dropout rate are set in the init method additionally.
+* If you have custom objects in your model, that are not part of the keras or tensorflow frameworks, you need to add
+  them to custom objects. To do this, call `set_custom_objects` with arbitrarily kwargs. In the shown example, the
+  loss has been added for demonstration only, because we use a build-in loss function. Nonetheless, we always encourage
+  you to add the loss as custom object, to prevent potential errors when loading an already created model instead of
+  training a new one.
+* Now build your model inside `set_model()` by using the instance attributes `self.shape_inputs` and
+  `self.shape_outputs` and storing the model as `self.model`.
+
+```python
+class MyCustomisedModel(AbstractModelClass):
 
     def set_model(self):
-        # add 1 to window_size to include current time step t0
-        shape = (self.window_history_size + 1, 1, self.channels)
-        x_input = keras.layers.Input(shape=shape)
-        x_in = keras.layers.Conv2D(32, (1, 1), padding='same')(x_input)
+        x_input = keras.layers.Input(shape=self.shape_inputs)
+        x_in = keras.layers.Conv2D(32, (1, 1), padding='same', name='{}_Conv_1x1'.format("major"))(x_input)
+        x_in = self.activation(name='{}_conv_act'.format("major"))(x_in)
+        x_in = keras.layers.Flatten(name='{}'.format("major"))(x_in)
+        x_in = keras.layers.Dropout(self.dropout_rate, name='{}_Dropout_1'.format("major"))(x_in)
+        x_in = keras.layers.Dense(16, name='{}_Dense_16'.format("major"))(x_in)
         x_in = self.activation()(x_in)
-        x_in = keras.layers.Flatten()(x_in)
-        x_in = keras.layers.Dropout(self.dropout_rate)(x_in)
-        x_in = keras.layers.Dense(32)(x_in)
-        x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(16)(x_in)
-        x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(self.window_lead_time)(x_in)
-        out = self.activation()(x_in)
-        self.model = keras.Model(inputs=x_input, outputs=[out])
+        x_in = keras.layers.Dense(self.shape_outputs, name='{}_Dense'.format("major"))(x_in)
+        out_main = self.activation()(x_in)
+        self.model = keras.Model(inputs=x_input, outputs=[out_main])
+```
+
+* Your are free how to design your model. Just make sure to save it in the class attribute model.
+* Additionally, set your custom compile options including the loss definition.
+
+```python
+class MyCustomisedModel(AbstractModelClass):
 
     def set_compile_options(self):
-        self.compile_options = {"optimizer": SGD(lr=self.lr),
-                                "loss": mse, 
-                                "metrics": ["mse"]}
+        self.initial_lr = 1e-2
+        self.optimizer = keras.optimizers.SGD(lr=self.initial_lr, momentum=0.9)
+        self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr,
+                                                                               drop=.94,
+                                                                               epochs_drop=10)
+        self.loss = keras.losses.mean_squared_error
+        self.compile_options = {"metrics": ["mse", "mae"]}
 ```
 
+* The allocation of the instance parameters `initial_lr`, `optimizer`, and `lr_decay` could be also part of
+  the model class' initialiser. The same applies to `self.loss` and `compile_options`, but we recommend to use
+  the `set_compile_options` method for the definition of parameters, that are related to the compile options.
+* More important is that the compile options are actually saved. There are three ways to achieve this.
+
+  * (1): Set all compile options by parsing a dictionary with all options to `self.compile_options`.
+  * (2): Set all compile options as instance attributes. MLAir will search for these attributes and store them.
+  * (3): Define your compile options partly as dictionary and instance attributes (as shown in this example).
+  * If using (3) and defining the same compile option with different values, MLAir will raise an error.
+
+    Incorrect: (Will raise an error because of a mismatch for the `optimizer` parameter.)
+    ```python
+    def set_compile_options(self):
+        self.optimizer = keras.optimizers.SGD()
+        self.loss = keras.losses.mean_squared_error
+        self.compile_options = {"optimizer" = keras.optimizers.Adam()}
+    ```
+
+
+## Specials for Branched Models
+
+* If you have a branched model with multiple outputs, you need either set only a single loss for all branch outputs or
+  provide the same number of loss functions considering the right order.
 
-## Transformation
+```python
+class MyCustomisedModel(AbstractModelClass):
 
-There are two different approaches (called scopes) to transform the data:
-1) `station`: transform data for each station independently (somehow like batch normalisation)
-1) `data`: transform all data of each station with shared metrics
+    def set_model(self):
+        ...
+        self.model = keras.Model(inputs=x_input, outputs=[out_minor_1, out_minor_2, out_main])
 
-Transformation must be set by the `transformation` attribute. If `transformation = None` is given to `ExperimentSetup`, 
-data is not transformed at all. For all other setups, use the following dictionary structure to specify the 
-transformation.
+    def set_compile_options(self):
+        self.loss = [keras.losses.mean_absolute_error] +  # for out_minor_1
+                    [keras.losses.mean_squared_error] +   # for out_minor_2
+                    [keras.losses.mean_squared_error]     # for out_main
 ```
-transformation = {"scope": <...>, 
-                  "method": <...>,
-                  "mean": <...>,
-                  "std": <...>}
-ExperimentSetup(..., transformation=transformation, ...)
+
+
+## How to access my customised model?
+
+If the customised model is created, you can easily access the model with
+
+```python
+>>> MyCustomisedModel().model
+<your custom model>
 ```
 
-### scopes
+The loss is accessible via
 
-**station**: mean and std are not used
+```python
+>>> MyCustomisedModel().loss
+<your custom loss>
+```
 
-**data**: either provide already calculated values for mean and std (if required by transformation method), or choose 
-from different calculation schemes, explained in the mean and std section.
+You can treat the instance of your model as instance but also as the model itself. If you call a method, that refers to
+the model instead of the model instance, you can directly apply the command on the instance instead of adding the model
+parameter call.
 
-### supported transformation methods
-Currently supported methods are:
-* standardise (default, if method is not given)
-* centre
+```python
+>>> MyCustomisedModel().model.compile(**kwargs) == MyCustomisedModel().compile(**kwargs)
+True
+```
 
-### mean and std
-`"mean"="accurate"`: calculate the accurate values of mean and std (depending on method) by using all data. Although, 
-this method is accurate, it may take some time for the calculation. Furthermore, this could potentially lead to memory 
-issue (not explored yet, but could appear for a very big amount of data)
+# Data Handlers
 
-`"mean"="estimate"`: estimate mean and std (depending on method). For each station, mean and std are calculated and
-afterwards aggregated using the mean value over all station-wise metrics. This method is less accurate, especially 
-regarding the std calculation but therefore much faster.
+Data handlers are responsible for all tasks related to data like data acquisition, preparation and provision. A data 
+handler must inherit from the abstract base class `AbstractDataHandler` and requires the implementation of the 
+`__init__()` method and the accessors `get_X()` and `get_Y()`. In the following, we show an example how a custom data 
+handler could look like.
 
-We recommend to use the later method *estimate* because of following reasons:
-* much faster calculation
-* real accuracy of mean and std is less important, because it is "just" a transformation / scaling
-* accuracy of mean is almost as high as in the *accurate* case, because of 
-$\bar{x_{ij}} = \bar{\left(\bar{x_i}\right)_j}$. The only difference is, that in the *estimate* case, each mean is 
-equally weighted for each station independently of the actual data count of the station.
-* accuracy of std is lower for *estimate* because of $\var{x_{ij}} \ne \bar{\left(\var{x_i}\right)_j}$, but still the mean of all 
-station-wise std is a decent estimate of the true std.
+```python
+import datetime as dt
+import numpy as np
+import pandas as pd
+import xarray as xr
 
-`"mean"=<value, e.g. xr.DataArray>`: If mean and std are already calculated or shall be set manually, just add the
-scaling values instead of the calculation method. For method *centre*, std can still be None, but is required for the
-*standardise* method. **Important**: Format of given values **must** match internal data format of DataPreparation 
-class: `xr.DataArray` with `dims=["variables"]` and one value for each variable.
+from mlair.data_handler import AbstractDataHandler
 
+class DummyDataHandler(AbstractDataHandler):
 
+    def __init__(self, name, number_of_samples=None):
+        """This data handler takes a name argument and the number of samples to generate. If not provided, a random 
+        number between 100 and 150 is set."""
+        super().__init__()
+        self.name = name
+        self.number_of_samples = number_of_samples if number_of_samples is not None else np.random.randint(100, 150)
+        self._X = self.create_X()
+        self._Y = self.create_Y()
+
+    def create_X(self):
+        """Inputs are random numbers between 0 and 10 with shape (no_samples, window=14, variables=5)."""
+        X = np.random.randint(0, 10, size=(self.number_of_samples, 14, 5))  # samples, window, variables
+        datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
+        return xr.DataArray(X, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
+                                                                                 "window": range(14),
+                                                                                 "variables": range(5)})
+    
+    def create_Y(self):
+        """Targets are normal distributed random numbers with shape (no_samples, window=5, variables=1)."""
+        Y = np.round(0.5 * np.random.randn(self.number_of_samples, 5, 1), 1)  # samples, window, variables
+        datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
+        return xr.DataArray(Y, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
+                                                                                 "window": range(5),
+                                                                                 "variables": range(1)})
+
+    def get_X(self, upsampling=False, as_numpy=False):
+        """Upsampling parameter is not used for X."""
+        return np.copy(self._X) if as_numpy is True else self._X
+
+    def get_Y(self, upsampling=False, as_numpy=False):
+        """Upsampling parameter is not used for Y."""
+        return np.copy(self._Y) if as_numpy is True else self._Y
+
+    def __str__(self):
+        return self.name
 
+```
 
 
 # Special Remarks
@@ -297,3 +432,4 @@ Therefore, it might be necessary to adopt the `if` statement in `PartitionCheck.
 add it to `src/join_settings.py` in the hourly data section. Replace the `TOAR_SERVICE_URL` and the `Authorization` 
 value. To make sure, that this **sensitive** data is not uploaded to the remote server, use the following command to
 prevent git from tracking this file: `git update-index --assume-unchanged src/join_settings.py`
+
diff --git a/create_runscripts_HPC.sh b/create_runscripts_HPC.sh
deleted file mode 100755
index af657fd11779f67861785c1573acd80235380b53..0000000000000000000000000000000000000000
--- a/create_runscripts_HPC.sh
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/bin/csh -x
-
-echo "############################################################"
-echo "#                                                          #"
-echo "#            user interaction required                     #"
-echo "#                                                          #"
-echo "############################################################"
-echo 
-
-echo "This script creates the HPC batch scripts to run mlt on compute nodes (gpus and develgpus)."
-echo "You can modify the created run scripts afterwards if needed."
-
-while true; do
-    read -p "Do you wish to create run scripts for JUWELS? [yes/no]" yn
-    case $yn in
-        [Yy]* ) juwels=True; break;;
-        [Nn]* ) juwels=False;;
-        * ) echo "Please answer yes or no.";;
-   esac
-done
-
-while true; do
-    read -p "Do you wish to create run script for HDFML? [yes/no]" yn
-    case $yn in
-        [Yy]* ) hdfml=True; break;;
-        [Nn]* ) hdfml=False;;
-        * ) echo "Please answer yes or no.";;
-   esac
-done
-
-
-budget=''
-while [[ $budget == '' ]]
-do
- echo
- read -p "Enter project budget for --account flag: " budget 
-done
-
-email=`jutil user show -o json | grep email | cut -f2 -d':' | cut -f1 -d',' | cut -f2 -d'"'`
-echo
-read -p "Enter e-mail address for --mail-user (default: ${email}): " new_email
-
-if [[ -z "$new_email" ]]; then
-    new_email=$email
-fi
-
-# create HPC_logging dir
-hpclogging="../HPC_logging/"
-mkdir -p $hpclogging
-
-
-# ordering for looping:
-# "partition nGPUs timing"
-if [[ $juwels == True ]]; then
-  for i in "develgpus 2  02:00:00" "gpus 4 08:00:00"; do
-      set -- $i
-
-cat <<EOT > run_$1.bash
-#!/bin/bash -x
-#SBATCH --account=${budget}
-#SBATCH --nodes=1
-#SBATCH --output=${hpclogging}mlt-out.%j
-#SBATCH --error=${hpclogging}/mlt-err.%j
-#SBATCH --time=$3 
-#SBATCH --partition=$1 
-#SBATCH --gres=gpu:$2
-#SBATCH --mail-type=ALL
-#SBATCH --mail-user=${email}
-
-source mlt_modules_.sh
-source venv/bin/activate
-
-timestamp=\`date +"%Y-%m-%d_%H%M-%S"\`
-
-export PYTHONPATH=\${PWD}/venv/lib/python3.6/site-packages:\${PYTHONPATH}
-
-srun python run.py --experiment_date=\$timestamp
-EOT
-
-  echo "Created runscript: run_$1.bash"
-
-  done
-fi
-
-echo
-echo "You have to run the the following command on a login node to download data:"
-echo "          \`python run.py'"
-echo
-echo "Please execute the following command to check if the setup went well:"
-echo "          \`sbatch run_develgpus.bash'"
-
diff --git a/docs/_source/_api/machinelearningtools.rst b/docs/_source/_api/mlair.rst
similarity index 50%
rename from docs/_source/_api/machinelearningtools.rst
rename to docs/_source/_api/mlair.rst
index cd6885f52bedfa295139251c641c5bba8e2a30e9..26166fc3aa8d824e2b0a80a19c656596a86ca3c0 100644
--- a/docs/_source/_api/machinelearningtools.rst
+++ b/docs/_source/_api/mlair.rst
@@ -1,7 +1,7 @@
-machinelearningtools package
-============================
+MLAir package
+=============
 
-.. automodule:: src
+.. automodule:: mlair
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/_source/_plots/run_modules_schedule.png b/docs/_source/_plots/run_modules_schedule.png
new file mode 100755
index 0000000000000000000000000000000000000000..b7549849757118a688bad0c4128b6de694b079bc
Binary files /dev/null and b/docs/_source/_plots/run_modules_schedule.png differ
diff --git a/docs/_source/conf.py b/docs/_source/conf.py
index 573918ee35e9757b8c0b32b2697fc0cc2bc0b38f..d4c71e69cc38499930d28952562372106b5f13b3 100644
--- a/docs/_source/conf.py
+++ b/docs/_source/conf.py
@@ -15,6 +15,8 @@ import sys
 
 sys.path.insert(0, os.path.abspath('../..'))
 
+import mlair
+
 # -- Project information -----------------------------------------------------
 
 project = 'MLAir'
@@ -22,9 +24,9 @@ copyright = '2020, Lukas H Leufen, Felix Kleinert'
 author = 'Lukas H Leufen, Felix Kleinert'
 
 # The short X.Y version
-version = 'v0.9.0'
+version = "v" + ".".join(mlair.__version__.split(".")[0:2])
 # The full version, including alpha/beta/rc tags
-release = 'v0.9.0'
+release = "v" + mlair.__version__
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/docs/_source/customise.rst b/docs/_source/customise.rst
new file mode 100644
index 0000000000000000000000000000000000000000..4c9ee5386365c74e3d85c0f81085fcd3e1971b69
--- /dev/null
+++ b/docs/_source/customise.rst
@@ -0,0 +1,353 @@
+Default Workflow
+----------------
+
+.. role:: py(code)
+   :language: python
+
+MLAir is constituted of so-called :py:`run_modules` which are executed in a distinct order called :py:`workflow`. MLAir
+provides a :py:`DefaultWorkflow`. This workflow runs the run modules :py:`ExperimentSetup`, :py:`PreProcessing`,
+:py:`ModelSetup`, :py:`Training`, and :py:`PostProcessing` one by one.
+
+
+.. figure:: ./_plots/run_modules_schedule.png
+
+    Sketch of the default workflow.
+
+
+.. code-block:: python
+
+    import mlair
+
+    # create your custom MLAir workflow
+    DefaultWorkflow = mlair.DefaultWorkflow()
+    # execute default workflow
+    DefaultWorkflow.run()
+
+The output of running this default workflow will be structured like the following.
+
+.. code-block::
+
+    INFO: mlair started
+    INFO: ExperimentSetup started
+    ...
+    INFO: ExperimentSetup finished after 00:00:01 (hh:mm:ss)
+    INFO: PreProcessing started
+    ...
+    INFO: PreProcessing finished after 00:00:11 (hh:mm:ss)
+    INFO: ModelSetup started
+    ...
+    INFO: ModelSetup finished after 00:00:01 (hh:mm:ss)
+    INFO: Training started
+    ...
+    INFO: Training finished after 00:02:15 (hh:mm:ss)
+    INFO: PostProcessing started
+    ...
+    INFO: PostProcessing finished after 00:01:37 (hh:mm:ss)
+    INFO: mlair finished after 00:04:05 (hh:mm:ss)
+
+Custom Model
+------------
+
+Create your own model to run your personal experiment. To guarantee a proper integration in the MLAir workflow, models
+are restricted to inherit from the :py:`AbstractModelClass`. This will ensure a smooth training and evaluation
+behaviour.
+
+
+How to create a customised model?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Create a new model class inheriting from :py:`AbstractModelClass`
+
+.. code-block:: python
+
+    from mlair import AbstractModelClass
+    import keras
+
+    class MyCustomisedModel(AbstractModelClass):
+
+        def __init__(self, shape_inputs: list, shape_outputs: list):
+
+            super().__init__(shape_inputs[0], shape_outputs[0])
+
+            # settings
+            self.dropout_rate = 0.1
+            self.activation = keras.layers.PReLU
+
+            # apply to model
+            self.set_model()
+            self.set_compile_options()
+            self.set_custom_objects(loss=self.compile_options['loss'])
+
+* Make sure to add the :py:`super().__init__()` and at least :py:`set_model()` and :py:`set_compile_options()` to your
+  custom init method.
+* The shown model expects a single input and output branch provided in a list. Therefore shapes of input and output are
+  extracted and then provided to the super class initialiser.
+* Some general settings like the dropout rate are set in the init method additionally.
+* If you have custom objects in your model, that are not part of the keras or tensorflow frameworks, you need to add
+  them to custom objects. To do this, call :py:`set_custom_objects` with arbitrarily kwargs. In the shown example, the
+  loss has been added for demonstration only, because we use a build-in loss function. Nonetheless, we always encourage
+  you to add the loss as custom object, to prevent potential errors when loading an already created model instead of
+  training a new one.
+* Now build your model inside :py:`set_model()` by using the instance attributes :py:`self.shape_inputs` and
+  :py:`self.shape_outputs` and storing the model as :py:`self.model`.
+
+.. code-block:: python
+
+    class MyCustomisedModel(AbstractModelClass):
+
+        def set_model(self):
+            x_input = keras.layers.Input(shape=self.shape_inputs)
+            x_in = keras.layers.Conv2D(32, (1, 1), padding='same', name='{}_Conv_1x1'.format("major"))(x_input)
+            x_in = self.activation(name='{}_conv_act'.format("major"))(x_in)
+            x_in = keras.layers.Flatten(name='{}'.format("major"))(x_in)
+            x_in = keras.layers.Dropout(self.dropout_rate, name='{}_Dropout_1'.format("major"))(x_in)
+            x_in = keras.layers.Dense(16, name='{}_Dense_16'.format("major"))(x_in)
+            x_in = self.activation()(x_in)
+            x_in = keras.layers.Dense(self.shape_outputs, name='{}_Dense'.format("major"))(x_in)
+            out_main = self.activation()(x_in)
+            self.model = keras.Model(inputs=x_input, outputs=[out_main])
+
+* Your are free how to design your model. Just make sure to save it in the class attribute model.
+* Additionally, set your custom compile options including the loss definition.
+
+.. code-block:: python
+
+    class MyCustomisedModel(AbstractModelClass):
+
+        def set_compile_options(self):
+            self.initial_lr = 1e-2
+            self.optimizer = keras.optimizers.SGD(lr=self.initial_lr, momentum=0.9)
+            self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr,
+                                                                                   drop=.94,
+                                                                                   epochs_drop=10)
+            self.loss = keras.losses.mean_squared_error
+            self.compile_options = {"metrics": ["mse", "mae"]}
+
+* The allocation of the instance parameters :py:`initial_lr`, :py:`optimizer`, and :py:`lr_decay` could be also part of
+  the model class' initialiser. The same applies to :py:`self.loss` and :py:`compile_options`, but we recommend to use
+  the :py:`set_compile_options` method for the definition of parameters, that are related to the compile options.
+* More important is that the compile options are actually saved. There are three ways to achieve this.
+
+  * (1): Set all compile options by parsing a dictionary with all options to :py:`self.compile_options`.
+  * (2): Set all compile options as instance attributes. MLAir will search for these attributes and store them.
+  * (3): Define your compile options partly as dictionary and instance attributes (as shown in this example).
+  * If using (3) and defining the same compile option with different values, MLAir will raise an error.
+
+      Incorrect: (Will raise an error because of a mismatch for the :py:`optimizer` parameter.)
+
+      .. code-block:: python
+
+          def set_compile_options(self):
+              self.optimizer = keras.optimizers.SGD()
+              self.loss = keras.losses.mean_squared_error
+              self.compile_options = {"optimizer" = keras.optimizers.Adam()}
+
+
+Specials for Branched Models
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* If you have a branched model with multiple outputs, you need either set only a single loss for all branch outputs or
+  provide the same number of loss functions considering the right order.
+
+.. code-block:: python
+
+    class MyCustomisedModel(AbstractModelClass):
+
+        def set_model(self):
+            ...
+            self.model = keras.Model(inputs=x_input, outputs=[out_minor_1, out_minor_2, out_main])
+
+        def set_compile_options(self):
+            self.loss = [keras.losses.mean_absolute_error] +  # for out_minor_1
+                        [keras.losses.mean_squared_error] +   # for out_minor_2
+                        [keras.losses.mean_squared_error]     # for out_main
+
+
+How to access my customised model?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If the customised model is created, you can easily access the model with
+
+>>> MyCustomisedModel().model
+<your custom model>
+
+The loss is accessible via
+
+>>> MyCustomisedModel().loss
+<your custom loss>
+
+You can treat the instance of your model as instance but also as the model itself. If you call a method, that refers to
+the model instead of the model instance, you can directly apply the command on the instance instead of adding the model
+parameter call.
+
+>>> MyCustomisedModel().model.compile(**kwargs) == MyCustomisedModel().compile(**kwargs)
+True
+
+
+Data Handler
+------------
+
+The basic concept of a data handler is to ensure an appropriate handling of input and target data. This includes the
+loading and preparation of data and their provision in a predefined format. The user is given free rein as to which
+steps the loading and preparation must include. The only constraint is that data is considered as a collection of
+stations. This means that one instance of the data handler is created per station. MLAir then takes over the iteration
+over the collection of stations or distributes the data during the training according to the given batch size. With very
+large data sets, memory problems may occur if all data is loaded and held in main memory. In such a case it is
+recommended to open the data only temporarily. This has no effect on the training itself, as the data is then
+automatically distributed by MLAir.
+
+Interface of a data handler
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A data handler should inherit from the :py:`AbstractDataHandler` class. This class has some key features:
+
+* :py:`cls.requirements()` can be used, to request all available :py:`args` and :py:`kwargs` from MLAir to build the
+  class.
+* :py:`cls.build(*args, **kwargs)` returns in default mode the class itself. This can be modified (=overwritten) to
+  execute some pre-build operations.
+* :py:`self.get_X(upsampling, as_numpy)` should return the input data either as NumPy array or xarray. With the
+  upsamling argument it is possible to implement a feature to weight inputs during training.
+* :py:`self.get_Y(upsampling, as_numpy)` the same but for the target data.
+* :py:`self.transformation(*args, **kwargs)` is a placeholder to execute any desired transformation. This class method
+  is called during the preprocessing stage in the default MLAir workflow. Note that a transformation operation is only
+  estimated on the train data subset and afterwards applied on all data subsets.
+* :py:`self.get_coordinates()` is a placeholder and can be used to return a position for a geographical overview plot.
+
+During the preprocessing stage the following is executed:
+
+1) MLAir requests all required parameters that should be set during Experiment Setup stage by calling
+   :py:`data_handler.requirements()`.
+2) The data handler is build for each station using :py:`data_handler.build(station, **kwargs)` to check if data is
+   available for the given station.
+3) If valid: The build data handler is added to a internal data collection, that collects all contributing data
+   handlers.
+4) MLAir creates subsets for training, validation, and testing. Therefore, a separate data handler for each subset is
+   created using subset parameters (e.g. start and end).
+
+Later on during ModelSetup, Training and PostProcessing, MLAir requests data using :py:`data_handler.get_X()` and
+:py:`data_handler.get_Y()`.
+
+Default Data Handler
+~~~~~~~~~~~~~~~~~~~~
+
+The default data handler accesses data from the TOAR database.
+
+
+Custom Data Handler
+~~~~~~~~~~~~~~~~~~~
+
+* Choose your personal data source, either a web interface or locally available data.
+* Create your custom data handler class by inheriting from :py:`AbstractDataHandler`.
+* Implement the initialiser :py:`__init__(*args, **kwargs)` and make sure to call the super class initialiser as well.
+  After executing this method data should be ready to use. Besides there are no further rules for the initialiser.
+* (optionally) Modify the class method :py:`cls.build(*args, **kwargs)` to calculate pre-build operations. Otherwise the
+  data handler calls the class initialiser. On modification make sure to return the class at the end.
+* (optionally) Add names of required arguments to the :py:`cls._requirements` list. It is not required to add args and
+  kwargs from the initialiser, they are added automatically. Modifying the requirements is only necessary if the build
+  method is modified (see previous bullet).
+* (optionally) Overwrite the base class :py:`self.get_coordinates()` method to return coordinates as dictionary with
+  keys *lon* and *lat*.
+
+.. code-block:: python
+
+    import datetime as dt
+    import numpy as np
+    import pandas as pd
+    import xarray as xr
+
+    from mlair.data_handler import AbstractDataHandler
+
+    class DummyDataHandler(AbstractDataHandler):
+
+        def __init__(self, name, number_of_samples=None):
+            """This data handler takes a name argument and the number of samples to generate. If not provided, a random
+            number between 100 and 150 is set."""
+            super().__init__()
+            self.name = name
+            self.number_of_samples = number_of_samples if number_of_samples is not None else np.random.randint(100, 150)
+            self._X = self.create_X()
+            self._Y = self.create_Y()
+
+        def create_X(self):
+            """Inputs are random numbers between 0 and 10 with shape (no_samples, window=14, variables=5)."""
+            X = np.random.randint(0, 10, size=(self.number_of_samples, 14, 5))  # samples, window, variables
+            datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
+            return xr.DataArray(X, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
+                                                                                     "window": range(14),
+                                                                                     "variables": range(5)})
+
+        def create_Y(self):
+            """Targets are normal distributed random numbers with shape (no_samples, window=5, variables=1)."""
+            Y = np.round(0.5 * np.random.randn(self.number_of_samples, 5, 1), 1)  # samples, window, variables
+            datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
+            return xr.DataArray(Y, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
+                                                                                     "window": range(5),
+                                                                                     "variables": range(1)})
+
+        def get_X(self, upsampling=False, as_numpy=False):
+            """Upsampling parameter is not used for X."""
+            return np.copy(self._X) if as_numpy is True else self._X
+
+        def get_Y(self, upsampling=False, as_numpy=False):
+            """Upsampling parameter is not used for Y."""
+            return np.copy(self._Y) if as_numpy is True else self._Y
+
+        def __str__(self):
+            return self.name
+
+
+Customised Run Module and Workflow
+----------------------------------
+
+It is possible to create new custom run modules. A custom run module is required to inherit from the base class
+:py:`RunEnvironment` and to hold the constructor method :py:`__init__()`. This method has to execute the module on call.
+In the following example, this is done by using the :py:`_run()` method that is called by the initialiser. It is
+possible to parse arguments to the custom run module as shown.
+
+.. code-block:: python
+
+    import mlair
+    import logging
+
+    class CustomStage(mlair.RunEnvironment):
+        """A custom MLAir stage for demonstration."""
+
+        def __init__(self, test_string):
+            super().__init__()  # always call super init method
+            self._run(test_string)  # call a class method
+
+        def _run(self, test_string):
+            logging.info("Just running a custom stage.")
+            logging.info("test_string = " + test_string)
+            epochs = self.data_store.get("epochs")
+            logging.info("epochs = " + str(epochs))
+
+
+If a custom run module is defined, it is required to adjust the workflow. For this, you need to load the empty
+:py:`Workflow` class and add each run module that is required. The order of adding modules defines the order of
+execution if running the workflow.
+
+.. code-block:: python
+
+    # create your custom MLAir workflow
+    CustomWorkflow = mlair.Workflow()
+    # provide stages without initialisation
+    CustomWorkflow.add(mlair.ExperimentSetup, epochs=128)
+    # add also keyword arguments for a specific stage
+    CustomWorkflow.add(CustomStage, test_string="Hello World")
+    # finally execute custom workflow in order of adding
+    CustomWorkflow.run()
+
+The output will look like:
+
+.. code-block::
+
+    INFO: mlair started
+    ...
+    INFO: ExperimentSetup finished after 00:00:12 (hh:mm:ss)
+    INFO: CustomStage started
+    INFO: Just running a custom stage.
+    INFO: test_string = Hello World
+    INFO: epochs = 128
+    INFO: CustomStage finished after 00:00:01 (hh:mm:ss)
+    INFO: mlair finished after 00:00:13 (hh:mm:ss)
\ No newline at end of file
diff --git a/docs/_source/get-started.rst b/docs/_source/get-started.rst
index 98a96d43675a0263be5bfc2d452b8af1c2626b60..2e8838fd5b1ac63a7e34e39b7f8bc24d70f9c1b7 100644
--- a/docs/_source/get-started.rst
+++ b/docs/_source/get-started.rst
@@ -1,34 +1,46 @@
-Get started with MLAir
-======================
+
+Getting started with MLAir
+==========================
+
+.. role:: py(code)
+   :language: python
+
 
 Install MLAir
 -------------
 
 MLAir is based on several python frameworks. To work properly, you have to install all packages from the
-`requirements.txt` file. Additionally to support the geographical plotting part it is required to install geo
+:py:`requirements.txt` file. Additionally to support the geographical plotting part it is required to install geo
 packages built for your operating system. Name names of these package may differ for different systems, we refer
-here to the opensuse / leap OS. The geo plot can be removed from the `plot_list`, in this case there is no need to
+here to the opensuse / leap OS. The geo plot can be removed from the :py:`plot_list`, in this case there is no need to
 install the geo packages.
 
-* (geo) Install **proj** on your machine using the console. E.g. for opensuse / leap `zypper install proj`
+Pre-requirements
+~~~~~~~~~~~~~~~~
+
+* (geo) Install **proj** on your machine using the console. E.g. for opensuse / leap :py:`zypper install proj`
 * (geo) A c++ compiler is required for the installation of the program **cartopy**
-* Install all requirements from [`requirements.txt`](https://gitlab.version.fz-juelich.de/toar/machinelearningtools/-/blob/master/requirements.txt)
-  preferably in a virtual environment
 * (tf) Currently, TensorFlow-1.13 is mentioned in the requirements. We already tested the TensorFlow-1.15 version and couldn't
   find any compatibility errors. Please note, that tf-1.13 and 1.15 have two distinct branches each, the default branch
   for CPU support, and the "-gpu" branch for GPU support. If the GPU version is installed, MLAir will make use of the GPU
   device.
-* Installation of **MLAir**:
-    * Either clone MLAir from the [gitlab repository](https://gitlab.version.fz-juelich.de/toar/machinelearningtools.git)
-      and use it without installation (beside the requirements)
-    * or download the distribution file (?? .whl) and install it via `pip install <??>`. In this case, you can simply
-      import MLAir in any python script inside your virtual environment using `import mlair`.
+
+Installation of MLAir
+~~~~~~~~~~~~~~~~~~~~~
+
+* Install all requirements from `requirements.txt <https://gitlab.version.fz-juelich.de/toar/machinelearningtools/-/blob/master/requirements.txt>`_
+  preferably in a virtual environment
+* Either clone MLAir from the `gitlab repository <https://gitlab.version.fz-juelich.de/toar/machinelearningtools.git>`_
+* or download the distribution file (?? .whl) and install it via :py:`pip install <??>`. In this case, you can simply
+  import MLAir in any python script inside your virtual environment using :py:`import mlair`.
 
 
 How to start with MLAir
 -----------------------
 
-In this section, we show three examples how to work with MLAir.
+In this section, we show three examples how to work with MLAir. Note, that for these examples MLAir was installed using
+the distribution file. In case you are using the git clone it is required to adjust the import path if not directly
+executed inside the source directory of MLAir.
 
 Example 1
 ~~~~~~~~~
@@ -126,107 +138,3 @@ We can see from the terminal that no training was performed. Analysis is now mad
     ...
     INFO: mlair finished after 00:00:06 (hh:mm:ss)
 
-
-
-Customised workflows and models
--------------------------------
-
-Custom Workflow
-~~~~~~~~~~~~~~~
-
-MLAir provides a default workflow. If additional steps are to be performed, you have to append custom run modules to
-the workflow.
-
-.. code-block:: python
-
-    import mlair
-    import logging
-
-    class CustomStage(mlair.RunEnvironment):
-        """A custom MLAir stage for demonstration."""
-
-        def __init__(self, test_string):
-            super().__init__()  # always call super init method
-            self._run(test_string)  # call a class method
-
-        def _run(self, test_string):
-            logging.info("Just running a custom stage.")
-            logging.info("test_string = " + test_string)
-            epochs = self.data_store.get("epochs")
-            logging.info("epochs = " + str(epochs))
-
-
-    # create your custom MLAir workflow
-    CustomWorkflow = mlair.Workflow()
-    # provide stages without initialisation
-    CustomWorkflow.add(mlair.ExperimentSetup, epochs=128)
-    # add also keyword arguments for a specific stage
-    CustomWorkflow.add(CustomStage, test_string="Hello World")
-    # finally execute custom workflow in order of adding
-    CustomWorkflow.run()
-
-.. code-block::
-
-    INFO: mlair started
-    ...
-    INFO: ExperimentSetup finished after 00:00:12 (hh:mm:ss)
-    INFO: CustomStage started
-    INFO: Just running a custom stage.
-    INFO: test_string = Hello World
-    INFO: epochs = 128
-    INFO: CustomStage finished after 00:00:01 (hh:mm:ss)
-    INFO: mlair finished after 00:00:13 (hh:mm:ss)
-
-Custom Model
-~~~~~~~~~~~~
-
-Each model has to inherit from the abstract model class to ensure a smooth training and evaluation behaviour. It is
-required to implement the set model and set compile options methods. The later has to set the loss at least.
-
-.. code-block:: python
-
-    import keras
-    from keras.losses import mean_squared_error as mse
-    from keras.optimizers import SGD
-
-    from mlair.model_modules import AbstractModelClass
-
-    class MyLittleModel(AbstractModelClass):
-        """
-        A customised model with a 1x1 Conv, and 3 Dense layers (32, 16
-        window_lead_time). Dropout is used after Conv layer.
-        """
-        def __init__(self, window_history_size, window_lead_time, channels):
-            super().__init__()
-            # settings
-            self.window_history_size = window_history_size
-            self.window_lead_time = window_lead_time
-            self.channels = channels
-            self.dropout_rate = 0.1
-            self.activation = keras.layers.PReLU
-            self.lr = 1e-2
-            # apply to model
-            self.set_model()
-            self.set_compile_options()
-            self.set_custom_objects(loss=self.compile_options['loss'])
-
-        def set_model(self):
-            # add 1 to window_size to include current time step t0
-            shape = (self.window_history_size + 1, 1, self.channels)
-            x_input = keras.layers.Input(shape=shape)
-            x_in = keras.layers.Conv2D(32, (1, 1), padding='same')(x_input)
-            x_in = self.activation()(x_in)
-            x_in = keras.layers.Flatten()(x_in)
-            x_in = keras.layers.Dropout(self.dropout_rate)(x_in)
-            x_in = keras.layers.Dense(32)(x_in)
-            x_in = self.activation()(x_in)
-            x_in = keras.layers.Dense(16)(x_in)
-            x_in = self.activation()(x_in)
-            x_in = keras.layers.Dense(self.window_lead_time)(x_in)
-            out = self.activation()(x_in)
-            self.model = keras.Model(inputs=x_input, outputs=[out])
-
-        def set_compile_options(self):
-            self.compile_options = {"optimizer": SGD(lr=self.lr),
-                                    "loss": mse,
-                                    "metrics": ["mse"]}
diff --git a/docs/_source/index.rst b/docs/_source/index.rst
index 341ac58acd62ccc5bcf786580fff1bc193170d62..a60643042eb750b6a21fa0f638797002be7675c4 100644
--- a/docs/_source/index.rst
+++ b/docs/_source/index.rst
@@ -1,17 +1,18 @@
-.. machinelearningtools documentation master file, created by
+.. MLair documentation master file, created by
     sphinx-quickstart on Wed Apr 15 14:27:29 2020.
     You can adapt this file completely to your liking, but it should at least
     contain the root `toctree` directive.
 
-Welcome to machinelearningtools's documentation!
+Welcome to MLAir's documentation!
 ================================================
 
+
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
 
    get-started
-   api
+   customise
 
 
 Indices and tables
diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py
index 31746ec889cc82ebbae8de82a05c5cff02a22ac0..d191af2edd8a6fe2c1093b3f1c3f5d419cc42b76 100644
--- a/mlair/configuration/defaults.py
+++ b/mlair/configuration/defaults.py
@@ -18,7 +18,7 @@ DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise"}
 DEFAULT_HPC_LOGIN_LIST = ["ju", "hdfmll"]  # ju[wels} #hdfmll(ogin)
 DEFAULT_HPC_HOST_LIST = ["jw", "hdfmlc"]  # first part of node names for Juwels (jw[comp], hdfmlc(ompute).
 DEFAULT_CREATE_NEW_MODEL = True
-DEFAULT_TRAINABLE = True
+DEFAULT_TRAIN_MODEL = True
 DEFAULT_FRACTION_OF_TRAINING = 0.8
 DEFAULT_EXTREME_VALUES = None
 DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY = False
@@ -46,15 +46,19 @@ DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS = True
 DEFAULT_EVALUATE_BOOTSTRAPS = True
 DEFAULT_CREATE_NEW_BOOTSTRAPS = False
 DEFAULT_NUMBER_OF_BOOTSTRAPS = 20
-DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
+#DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
+#                     "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
+#                     "PlotAvailability"]
+DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", 
                      "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
                      "PlotAvailability"]
 
 
+
 def get_defaults():
     """Return all default parameters set in defaults.py"""
     return {key: value for key, value in globals().items() if key.startswith('DEFAULT')}
 
 
 if __name__ == "__main__":
-    print(get_defaults())
\ No newline at end of file
+    print(get_defaults())
diff --git a/mlair/data_handler/__init__.py b/mlair/data_handler/__init__.py
index 451868b838ab7a0d165942e36b5ec6aa03e42721..01d660031bbbdda08eba80044a08fcb034d8171b 100644
--- a/mlair/data_handler/__init__.py
+++ b/mlair/data_handler/__init__.py
@@ -11,5 +11,6 @@ __date__ = '2020-04-17'
 
 from .bootstraps import BootStraps
 from .iterator import KerasIterator, DataCollection
-from .advanced_data_handler import DefaultDataPreparation, AbstractDataPreparation
-from .data_preparation_neighbors import DataPreparationNeighbors
+from .default_data_handler import DefaultDataHandler
+from .abstract_data_handler import AbstractDataHandler
+from .data_preparation_neighbors import DataHandlerNeighbors
diff --git a/mlair/data_handler/abstract_data_handler.py b/mlair/data_handler/abstract_data_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..04b3d4651347759130da15a05056f6ace3d0fc1f
--- /dev/null
+++ b/mlair/data_handler/abstract_data_handler.py
@@ -0,0 +1,47 @@
+
+__author__ = 'Lukas Leufen'
+__date__ = '2020-09-21'
+
+import inspect
+from typing import Union, Dict
+
+from mlair.helpers import remove_items
+
+
+class AbstractDataHandler:
+
+    _requirements = []
+
+    def __init__(self, *args, **kwargs):
+        pass
+
+    @classmethod
+    def build(cls, *args, **kwargs):
+        """Return initialised class."""
+        return cls(*args, **kwargs)
+
+    @classmethod
+    def requirements(cls):
+        """Return requirements and own arguments without duplicates."""
+        return list(set(cls._requirements + cls.own_args()))
+
+    @classmethod
+    def own_args(cls, *args):
+        return remove_items(inspect.getfullargspec(cls).args, ["self"] + list(args))
+
+    @classmethod
+    def transformation(cls, *args, **kwargs):
+        return None
+
+    def get_X(self, upsampling=False, as_numpy=False):
+        raise NotImplementedError
+
+    def get_Y(self, upsampling=False, as_numpy=False):
+        raise NotImplementedError
+
+    def get_data(self, upsampling=False, as_numpy=False):
+        return self.get_X(upsampling, as_numpy), self.get_Y(upsampling, as_numpy)
+
+    def get_coordinates(self) -> Union[None, Dict]:
+        """Return coordinates as dictionary with keys `lon` and `lat`."""
+        return None
diff --git a/mlair/data_handler/advanced_data_handler.py b/mlair/data_handler/advanced_data_handler.py
index 57a9667f2a42575faa02d50e439252738a8dc8bb..c2d210bffdb598b23c025f60b903ddef84e4509d 100644
--- a/mlair/data_handler/advanced_data_handler.py
+++ b/mlair/data_handler/advanced_data_handler.py
@@ -2,324 +2,39 @@
 __author__ = 'Lukas Leufen'
 __date__ = '2020-07-08'
 
-
-from mlair.helpers import to_list, remove_items
 import numpy as np
 import xarray as xr
-import pickle
 import os
 import pandas as pd
 import datetime as dt
-import shutil
-import inspect
-import copy
 
-from typing import Union, List, Tuple, Dict
-import logging
-from functools import reduce
-from mlair.data_handler.station_preparation import StationPrep
-from mlair.helpers.join import EmptyQueryResult
+from mlair.data_handler import AbstractDataHandler
 
+from typing import Union, List
 
 number = Union[float, int]
 num_or_list = Union[number, List[number]]
 
 
-class DummyDataSingleStation:  # pragma: no cover
-
-    def __init__(self, name, number_of_samples=None):
-        self.name = name
-        self.number_of_samples = number_of_samples if number_of_samples is not None else np.random.randint(100, 150)
-
-    def get_X(self):
-        X1 = np.random.randint(0, 10, size=(self.number_of_samples, 14, 5))  # samples, window, variables
-        datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
-        return xr.DataArray(X1, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
-                                                                                  "window": range(14),
-                                                                                  "variables": range(5)})
-
-    def get_Y(self):
-        Y1 = np.round(0.5 * np.random.randn(self.number_of_samples, 5, 1), 1)  # samples, window, variables
-        datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
-        return xr.DataArray(Y1, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
-                                                                                  "window": range(5),
-                                                                                  "variables": range(1)})
-
-    def __str__(self):
-        return self.name
-
-
-class AbstractDataPreparation:
-
-    _requirements = []
-
-    def __init__(self, *args, **kwargs):
-        pass
-
-    @classmethod
-    def build(cls, *args, **kwargs):
-        """Return initialised class."""
-        return cls(*args, **kwargs)
-
-    @classmethod
-    def requirements(cls):
-        """Return requirements and own arguments without duplicates."""
-        return list(set(cls._requirements + cls.own_args()))
-
-    @classmethod
-    def own_args(cls, *args):
-        return remove_items(inspect.getfullargspec(cls).args, ["self"] + list(args))
-
-    @classmethod
-    def transformation(cls, *args, **kwargs):
-        return None
-
-    def get_X(self, upsampling=False, as_numpy=False):
-        raise NotImplementedError
-
-    def get_Y(self, upsampling=False, as_numpy=False):
-        raise NotImplementedError
-
-    def get_data(self, upsampling=False, as_numpy=False):
-        return self.get_X(upsampling, as_numpy), self.get_Y(upsampling, as_numpy)
-
-    def get_coordinates(self) -> Union[None, Dict]:
-        return None
-
-
-class DefaultDataPreparation(AbstractDataPreparation):
-
-    _requirements = remove_items(inspect.getfullargspec(StationPrep).args, ["self", "station"])
-
-    def __init__(self, id_class, data_path, min_length=0,
-                 extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None):
-        super().__init__()
-        self.id_class = id_class
-        self.interpolation_dim = "datetime"
-        self.min_length = min_length
-        self._X = None
-        self._Y = None
-        self._X_extreme = None
-        self._Y_extreme = None
-        _name_affix = str(f"{str(self.id_class)}_{name_affix}" if name_affix is not None else id(self))
-        self._save_file = os.path.join(data_path, f"data_preparation_{_name_affix}.pickle")
-        self._collection = self._create_collection()
-        self.harmonise_X()
-        self.multiply_extremes(extreme_values, extremes_on_right_tail_only, dim=self.interpolation_dim)
-        self._store(fresh_store=True)
-
-    @classmethod
-    def build(cls, station, **kwargs):
-        sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs}
-        sp = StationPrep(station, **sp_keys)
-        dp_args = {k: copy.deepcopy(kwargs[k]) for k in cls.own_args("id_class") if k in kwargs}
-        return cls(sp, **dp_args)
-
-    def _create_collection(self):
-        return [self.id_class]
-
-    @classmethod
-    def requirements(cls):
-        return remove_items(super().requirements(), "id_class")
-
-    def _reset_data(self):
-        self._X, self._Y, self._X_extreme, self._Y_extreme = None, None, None, None
-
-    def _cleanup(self):
-        directory = os.path.dirname(self._save_file)
-        if os.path.exists(directory) is False:
-            os.makedirs(directory)
-        if os.path.exists(self._save_file):
-            shutil.rmtree(self._save_file, ignore_errors=True)
-
-    def _store(self, fresh_store=False):
-        self._cleanup() if fresh_store is True else None
-        data = {"X": self._X, "Y": self._Y, "X_extreme": self._X_extreme, "Y_extreme": self._Y_extreme}
-        with open(self._save_file, "wb") as f:
-            pickle.dump(data, f)
-        logging.debug(f"save pickle data to {self._save_file}")
-        self._reset_data()
-
-    def _load(self):
-        try:
-            with open(self._save_file, "rb") as f:
-                data = pickle.load(f)
-            logging.debug(f"load pickle data from {self._save_file}")
-            self._X, self._Y = data["X"], data["Y"]
-            self._X_extreme, self._Y_extreme = data["X_extreme"], data["Y_extreme"]
-        except FileNotFoundError:
-            pass
-
-    def get_data(self, upsampling=False, as_numpy=True):
-        self._load()
-        X = self.get_X(upsampling, as_numpy)
-        Y = self.get_Y(upsampling, as_numpy)
-        self._reset_data()
-        return X, Y
-
-    def __repr__(self):
-        return ";".join(list(map(lambda x: str(x), self._collection)))
-
-    def get_X_original(self):
-        X = []
-        for data in self._collection:
-            X.append(data.get_X())
-        return X
-
-    def get_Y_original(self):
-        Y = self._collection[0].get_Y()
-        return Y
-
-    @staticmethod
-    def _to_numpy(d):
-        return list(map(lambda x: np.copy(x), d))
-
-    def get_X(self, upsampling=False, as_numpy=True):
-        no_data = (self._X is None)
-        self._load() if no_data is True else None
-        X = self._X if upsampling is False else self._X_extreme
-        self._reset_data() if no_data is True else None
-        return self._to_numpy(X) if as_numpy is True else X
-
-    def get_Y(self, upsampling=False, as_numpy=True):
-        no_data = (self._Y is None)
-        self._load() if no_data is True else None
-        Y = self._Y if upsampling is False else self._Y_extreme
-        self._reset_data() if no_data is True else None
-        return self._to_numpy([Y]) if as_numpy is True else Y
-
-    def harmonise_X(self):
-        X_original, Y_original = self.get_X_original(), self.get_Y_original()
-        dim = self.interpolation_dim
-        intersect = reduce(np.intersect1d, map(lambda x: x.coords[dim].values, X_original))
-        if len(intersect) < max(self.min_length, 1):
-            X, Y = None, None
-        else:
-            X = list(map(lambda x: x.sel({dim: intersect}), X_original))
-            Y = Y_original.sel({dim: intersect})
-        self._X, self._Y = X, Y
-
-    def get_observation(self):
-        return self.id_class.observation.copy().squeeze()
-
-    def get_transformation_Y(self):
-        return self.id_class.get_transformation_information()
-
-    def multiply_extremes(self, extreme_values: num_or_list = 1., extremes_on_right_tail_only: bool = False,
-                          timedelta: Tuple[int, str] = (1, 'm'), dim="datetime"):
-        """
-        Multiply extremes.
-
-        This method extracts extreme values from self.labels which are defined in the argument extreme_values. One can
-        also decide only to extract extremes on the right tail of the distribution. When extreme_values is a list of
-        floats/ints all values larger (and smaller than negative extreme_values; extraction is performed in standardised
-        space) than are extracted iteratively. If for example extreme_values = [1.,2.] then a value of 1.5 would be
-        extracted once (for 0th entry in list), while a 2.5 would be extracted twice (once for each entry). Timedelta is
-        used to mark those extracted values by adding one min to each timestamp. As TOAR Data are hourly one can
-        identify those "artificial" data points later easily. Extreme inputs and labels are stored in
-        self.extremes_history and self.extreme_labels, respectively.
-
-        :param extreme_values: user definition of extreme
-        :param extremes_on_right_tail_only: if False also multiply values which are smaller then -extreme_values,
-            if True only extract values larger than extreme_values
-        :param timedelta: used as arguments for np.timedelta in order to mark extreme values on datetime
-        """
-        # check if X or Y is None
-        if (self._X is None) or (self._Y is None):
-            logging.debug(f"{str(self.id_class)} has no data for X or Y, skip multiply extremes")
-            return
-        if extreme_values is None:
-            logging.debug(f"No extreme values given, skip multiply extremes")
-            self._X_extreme, self._Y_extreme = self._X, self._Y
-            return
-
-        # check type if inputs
-        extreme_values = to_list(extreme_values)
-        for i in extreme_values:
-            if not isinstance(i, number.__args__):
-                raise TypeError(f"Elements of list extreme_values have to be {number.__args__}, but at least element "
-                                f"{i} is type {type(i)}")
-
-        for extr_val in sorted(extreme_values):
-            # check if some extreme values are already extracted
-            if (self._X_extreme is None) or (self._Y_extreme is None):
-                X = self._X
-                Y = self._Y
-            else:  # one extr value iteration is done already: self.extremes_label is NOT None...
-                X = self._X_extreme
-                Y = self._Y_extreme
-
-            # extract extremes based on occurrence in labels
-            other_dims = remove_items(list(Y.dims), dim)
-            if extremes_on_right_tail_only:
-                extreme_idx = (Y > extr_val).any(dim=other_dims)
-            else:
-                extreme_idx = xr.concat([(Y < -extr_val).any(dim=other_dims[0]),
-                                           (Y > extr_val).any(dim=other_dims[0])],
-                                          dim=other_dims[1]).any(dim=other_dims[1])
-
-            extremes_X = list(map(lambda x: x.sel(**{dim: extreme_idx}), X))
-            self._add_timedelta(extremes_X, dim, timedelta)
-            # extremes_X = list(map(lambda x: x.coords[dim].values + np.timedelta64(*timedelta), extremes_X))
-
-            extremes_Y = Y.sel(**{dim: extreme_idx})
-            extremes_Y.coords[dim].values += np.timedelta64(*timedelta)
-
-            self._Y_extreme = xr.concat([Y, extremes_Y], dim=dim)
-            self._X_extreme = list(map(lambda x1, x2: xr.concat([x1, x2], dim=dim), X, extremes_X))
-
-    @staticmethod
-    def _add_timedelta(data, dim, timedelta):
-        for d in data:
-            d.coords[dim].values += np.timedelta64(*timedelta)
-
-    @classmethod
-    def transformation(cls, set_stations, **kwargs):
-        sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs}
-        transformation_dict = sp_keys.pop("transformation")
-        if transformation_dict is None:
-            return
-        scope = transformation_dict.pop("scope")
-        method = transformation_dict.pop("method")
-        if transformation_dict.pop("mean", None) is not None:
-            return
-        mean, std = None, None
-        for station in set_stations:
-            try:
-                sp = StationPrep(station, transformation={"method": method}, **sp_keys)
-                mean = sp.mean.copy(deep=True) if mean is None else mean.combine_first(sp.mean)
-                std = sp.std.copy(deep=True) if std is None else std.combine_first(sp.std)
-            except (AttributeError, EmptyQueryResult):
-                continue
-        if mean is None:
-            return None
-        mean_estimated = mean.mean("Stations")
-        std_estimated = std.mean("Stations")
-        return {"scope": scope, "method": method, "mean": mean_estimated, "std": std_estimated}
-
-    def get_coordinates(self):
-        return self.id_class.get_coordinates()
-
-
 def run_data_prep():
 
-    from .data_preparation_neighbors import DataPreparationNeighbors
-    data = DummyDataSingleStation("main_class")
+    from .data_preparation_neighbors import DataHandlerNeighbors
+    data = DummyDataHandler("main_class")
     data.get_X()
     data.get_Y()
 
     path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata")
-    data_prep = DataPreparationNeighbors(DummyDataSingleStation("main_class"),
-                                         path,
-                                         neighbors=[DummyDataSingleStation("neighbor1"),
-                                                    DummyDataSingleStation("neighbor2")],
-                                         extreme_values=[1., 1.2])
+    data_prep = DataHandlerNeighbors(DummyDataHandler("main_class"),
+                                     path,
+                                     neighbors=[DummyDataHandler("neighbor1"),
+                                                DummyDataHandler("neighbor2")],
+                                     extreme_values=[1., 1.2])
     data_prep.get_data(upsampling=False)
 
 
 def create_data_prep():
 
-    from .data_preparation_neighbors import DataPreparationNeighbors
+    from .data_preparation_neighbors import DataHandlerNeighbors
     path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata")
     station_type = None
     network = 'UBA'
@@ -329,22 +44,61 @@ def create_data_prep():
     interpolation_dim = 'datetime'
     window_history_size = 7
     window_lead_time = 3
-    central_station = StationPrep("DEBW011", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {},station_type, network, sampling, target_dim,
-                                  target_var, interpolation_dim, window_history_size, window_lead_time)
-    neighbor1 = StationPrep("DEBW013", path, {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}, {},station_type, network, sampling, target_dim,
-                                  target_var, interpolation_dim, window_history_size, window_lead_time)
-    neighbor2 = StationPrep("DEBW034", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim,
-                                  target_var, interpolation_dim, window_history_size, window_lead_time)
+    central_station = DataHandlerSingleStation("DEBW011", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim,
+                                               target_var, interpolation_dim, window_history_size, window_lead_time)
+    neighbor1 = DataHandlerSingleStation("DEBW013", path, {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}, {}, station_type, network, sampling, target_dim,
+                                         target_var, interpolation_dim, window_history_size, window_lead_time)
+    neighbor2 = DataHandlerSingleStation("DEBW034", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim,
+                                         target_var, interpolation_dim, window_history_size, window_lead_time)
 
     data_prep = []
-    data_prep.append(DataPreparationNeighbors(central_station, path, neighbors=[neighbor1, neighbor2]))
-    data_prep.append(DataPreparationNeighbors(neighbor1, path, neighbors=[central_station, neighbor2]))
-    data_prep.append(DataPreparationNeighbors(neighbor2, path, neighbors=[neighbor1, central_station]))
+    data_prep.append(DataHandlerNeighbors(central_station, path, neighbors=[neighbor1, neighbor2]))
+    data_prep.append(DataHandlerNeighbors(neighbor1, path, neighbors=[central_station, neighbor2]))
+    data_prep.append(DataHandlerNeighbors(neighbor2, path, neighbors=[neighbor1, central_station]))
     return data_prep
 
 
+class DummyDataHandler(AbstractDataHandler):
+
+    def __init__(self, name, number_of_samples=None):
+        """This data handler takes a name argument and the number of samples to generate. If not provided, a random
+        number between 100 and 150 is set."""
+        super().__init__()
+        self.name = name
+        self.number_of_samples = number_of_samples if number_of_samples is not None else np.random.randint(100, 150)
+        self._X = self.create_X()
+        self._Y = self.create_Y()
+
+    def create_X(self):
+        """Inputs are random numbers between 0 and 10 with shape (no_samples, window=14, variables=5)."""
+        X = np.random.randint(0, 10, size=(self.number_of_samples, 14, 5))  # samples, window, variables
+        datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
+        return xr.DataArray(X, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
+                                                                                 "window": range(14),
+                                                                                 "variables": range(5)})
+
+    def create_Y(self):
+        """Targets are normal distributed random numbers with shape (no_samples, window=5, variables=1)."""
+        Y = np.round(0.5 * np.random.randn(self.number_of_samples, 5, 1), 1)  # samples, window, variables
+        datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
+        return xr.DataArray(Y, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
+                                                                                 "window": range(5),
+                                                                                 "variables": range(1)})
+
+    def get_X(self, upsampling=False, as_numpy=False):
+        """Upsampling parameter is not used for X."""
+        return np.copy(self._X) if as_numpy is True else self._X
+
+    def get_Y(self, upsampling=False, as_numpy=False):
+        """Upsampling parameter is not used for Y."""
+        return np.copy(self._Y) if as_numpy is True else self._Y
+
+    def __str__(self):
+        return self.name
+
+
 if __name__ == "__main__":
-    from mlair.data_handler.station_preparation import StationPrep
+    from mlair.data_handler.station_preparation import DataHandlerSingleStation
     from mlair.data_handler.iterator import KerasIterator, DataCollection
     data_prep = create_data_prep()
     data_collection = DataCollection(data_prep)
diff --git a/mlair/data_handler/bootstraps.py b/mlair/data_handler/bootstraps.py
index 91603b41822b92e28fbd077c502d84707fff746f..68a4bbc4bc9620bfb54ba23fef1ce882e76c8626 100644
--- a/mlair/data_handler/bootstraps.py
+++ b/mlair/data_handler/bootstraps.py
@@ -19,7 +19,7 @@ from itertools import chain
 import numpy as np
 import xarray as xr
 
-from mlair.data_handler.advanced_data_handler import AbstractDataPreparation
+from mlair.data_handler.abstract_data_handler import AbstractDataHandler
 
 
 class BootstrapIterator(Iterator):
@@ -82,7 +82,7 @@ class BootStraps(Iterable):
     """
     Main class to perform bootstrap operations.
 
-    This class requires a data handler following the definition of the AbstractDataPreparation, the number of bootstraps
+    This class requires a data handler following the definition of the AbstractDataHandler, the number of bootstraps
     to create and the dimension along this bootstrapping is performed (default dimension is `variables`).
 
     When iterating on this class, it returns the bootstrapped X, Y and a tuple with (position of variable in X, name of
@@ -91,7 +91,7 @@ class BootStraps(Iterable):
     retrieved by calling the .bootstraps() method. Further more, by calling the .get_orig_prediction() this class
     imitates according to the set number of bootstraps the original prediction
     """
-    def __init__(self, data: AbstractDataPreparation, number_of_bootstraps: int = 10,
+    def __init__(self, data: AbstractDataHandler, number_of_bootstraps: int = 10,
                  bootstrap_dimension: str = "variables"):
         """
         Create iterable class to be ready to iter.
diff --git a/mlair/data_handler/data_preparation_neighbors.py b/mlair/data_handler/data_preparation_neighbors.py
index 0c95b242e1046618403ebb6592407ef8b680e890..1482bb9fe20afcc2b92d2b91ae523a6dca19c54d 100644
--- a/mlair/data_handler/data_preparation_neighbors.py
+++ b/mlair/data_handler/data_preparation_neighbors.py
@@ -4,8 +4,8 @@ __date__ = '2020-07-17'
 
 
 from mlair.helpers import to_list
-from mlair.data_handler.station_preparation import StationPrep
-from mlair.data_handler.advanced_data_handler import DefaultDataPreparation
+from mlair.data_handler.station_preparation import DataHandlerSingleStation
+from mlair.data_handler import DefaultDataHandler
 import os
 
 from typing import Union, List
@@ -14,7 +14,7 @@ number = Union[float, int]
 num_or_list = Union[number, List[number]]
 
 
-class DataPreparationNeighbors(DefaultDataPreparation):
+class DataHandlerNeighbors(DefaultDataHandler):
 
     def __init__(self, id_class, data_path, neighbors=None, min_length=0,
                  extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False):
@@ -25,10 +25,10 @@ class DataPreparationNeighbors(DefaultDataPreparation):
     @classmethod
     def build(cls, station, **kwargs):
         sp_keys = {k: kwargs[k] for k in cls._requirements if k in kwargs}
-        sp = StationPrep(station, **sp_keys)
+        sp = DataHandlerSingleStation(station, **sp_keys)
         n_list = []
         for neighbor in kwargs.get("neighbors", []):
-            n_list.append(StationPrep(neighbor, **sp_keys))
+            n_list.append(DataHandlerSingleStation(neighbor, **sp_keys))
         else:
             kwargs["neighbors"] = n_list if len(n_list) > 0 else None
         dp_args = {k: kwargs[k] for k in cls.own_args("id_class") if k in kwargs}
@@ -39,12 +39,12 @@ class DataPreparationNeighbors(DefaultDataPreparation):
 
     def get_coordinates(self, include_neighbors=False):
         neighbors = list(map(lambda n: n.get_coordinates(), self.neighbors)) if include_neighbors is True else []
-        return [super(DataPreparationNeighbors, self).get_coordinates()].append(neighbors)
+        return [super(DataHandlerNeighbors, self).get_coordinates()].append(neighbors)
 
 
 if __name__ == "__main__":
 
-    a = DataPreparationNeighbors
+    a = DataHandlerNeighbors
     requirements = a.requirements()
 
     kwargs = {"path": os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata"),
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..47f63a3e7bcbebd131c2a0da47d2e0833b02efed
--- /dev/null
+++ b/mlair/data_handler/default_data_handler.py
@@ -0,0 +1,238 @@
+
+__author__ = 'Lukas Leufen'
+__date__ = '2020-09-21'
+
+import copy
+import inspect
+import logging
+import os
+import pickle
+import shutil
+from functools import reduce
+from typing import Tuple, Union, List
+
+import numpy as np
+import xarray as xr
+
+from mlair.data_handler.abstract_data_handler import AbstractDataHandler
+from mlair.data_handler.station_preparation import DataHandlerSingleStation
+from mlair.helpers import remove_items, to_list
+from mlair.helpers.join import EmptyQueryResult
+
+
+number = Union[float, int]
+num_or_list = Union[number, List[number]]
+
+
+class DefaultDataHandler(AbstractDataHandler):
+
+    _requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
+
+    def __init__(self, id_class: DataHandlerSingleStation, data_path: str, min_length: int = 0,
+                 extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None):
+        super().__init__()
+        self.id_class = id_class
+        self.interpolation_dim = "datetime"
+        self.min_length = min_length
+        self._X = None
+        self._Y = None
+        self._X_extreme = None
+        self._Y_extreme = None
+        _name_affix = str(f"{str(self.id_class)}_{name_affix}" if name_affix is not None else id(self))
+        self._save_file = os.path.join(data_path, f"data_preparation_{_name_affix}.pickle")
+        self._collection = self._create_collection()
+        self.harmonise_X()
+        self.multiply_extremes(extreme_values, extremes_on_right_tail_only, dim=self.interpolation_dim)
+        self._store(fresh_store=True)
+
+    @classmethod
+    def build(cls, station: str, **kwargs):
+        sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs}
+        sp = DataHandlerSingleStation(station, **sp_keys)
+        dp_args = {k: copy.deepcopy(kwargs[k]) for k in cls.own_args("id_class") if k in kwargs}
+        return cls(sp, **dp_args)
+
+    def _create_collection(self):
+        return [self.id_class]
+
+    @classmethod
+    def requirements(cls):
+        return remove_items(super().requirements(), "id_class")
+
+    def _reset_data(self):
+        self._X, self._Y, self._X_extreme, self._Y_extreme = None, None, None, None
+
+    def _cleanup(self):
+        directory = os.path.dirname(self._save_file)
+        if os.path.exists(directory) is False:
+            os.makedirs(directory)
+        if os.path.exists(self._save_file):
+            shutil.rmtree(self._save_file, ignore_errors=True)
+
+    def _store(self, fresh_store=False):
+        self._cleanup() if fresh_store is True else None
+        data = {"X": self._X, "Y": self._Y, "X_extreme": self._X_extreme, "Y_extreme": self._Y_extreme}
+        with open(self._save_file, "wb") as f:
+            pickle.dump(data, f)
+        logging.debug(f"save pickle data to {self._save_file}")
+        self._reset_data()
+
+    def _load(self):
+        try:
+            with open(self._save_file, "rb") as f:
+                data = pickle.load(f)
+            logging.debug(f"load pickle data from {self._save_file}")
+            self._X, self._Y = data["X"], data["Y"]
+            self._X_extreme, self._Y_extreme = data["X_extreme"], data["Y_extreme"]
+        except FileNotFoundError:
+            pass
+
+    def get_data(self, upsampling=False, as_numpy=True):
+        self._load()
+        X = self.get_X(upsampling, as_numpy)
+        Y = self.get_Y(upsampling, as_numpy)
+        self._reset_data()
+        return X, Y
+
+    def __repr__(self):
+        return ";".join(list(map(lambda x: str(x), self._collection)))
+
+    def get_X_original(self):
+        X = []
+        for data in self._collection:
+            X.append(data.get_X())
+        return X
+
+    def get_Y_original(self):
+        Y = self._collection[0].get_Y()
+        return Y
+
+    @staticmethod
+    def _to_numpy(d):
+        return list(map(lambda x: np.copy(x), d))
+
+    def get_X(self, upsampling=False, as_numpy=True):
+        no_data = (self._X is None)
+        self._load() if no_data is True else None
+        X = self._X if upsampling is False else self._X_extreme
+        self._reset_data() if no_data is True else None
+        return self._to_numpy(X) if as_numpy is True else X
+
+    def get_Y(self, upsampling=False, as_numpy=True):
+        no_data = (self._Y is None)
+        self._load() if no_data is True else None
+        Y = self._Y if upsampling is False else self._Y_extreme
+        self._reset_data() if no_data is True else None
+        return self._to_numpy([Y]) if as_numpy is True else Y
+
+    def harmonise_X(self):
+        X_original, Y_original = self.get_X_original(), self.get_Y_original()
+        dim = self.interpolation_dim
+        intersect = reduce(np.intersect1d, map(lambda x: x.coords[dim].values, X_original))
+        if len(intersect) < max(self.min_length, 1):
+            X, Y = None, None
+        else:
+            X = list(map(lambda x: x.sel({dim: intersect}), X_original))
+            Y = Y_original.sel({dim: intersect})
+        self._X, self._Y = X, Y
+
+    def get_observation(self):
+        return self.id_class.observation.copy().squeeze()
+
+    def get_transformation_Y(self):
+        return self.id_class.get_transformation_information()
+
+    def multiply_extremes(self, extreme_values: num_or_list = 1., extremes_on_right_tail_only: bool = False,
+                          timedelta: Tuple[int, str] = (1, 'm'), dim="datetime"):
+        """
+        Multiply extremes.
+
+        This method extracts extreme values from self.labels which are defined in the argument extreme_values. One can
+        also decide only to extract extremes on the right tail of the distribution. When extreme_values is a list of
+        floats/ints all values larger (and smaller than negative extreme_values; extraction is performed in standardised
+        space) than are extracted iteratively. If for example extreme_values = [1.,2.] then a value of 1.5 would be
+        extracted once (for 0th entry in list), while a 2.5 would be extracted twice (once for each entry). Timedelta is
+        used to mark those extracted values by adding one min to each timestamp. As TOAR Data are hourly one can
+        identify those "artificial" data points later easily. Extreme inputs and labels are stored in
+        self.extremes_history and self.extreme_labels, respectively.
+
+        :param extreme_values: user definition of extreme
+        :param extremes_on_right_tail_only: if False also multiply values which are smaller then -extreme_values,
+            if True only extract values larger than extreme_values
+        :param timedelta: used as arguments for np.timedelta in order to mark extreme values on datetime
+        """
+        # check if X or Y is None
+        if (self._X is None) or (self._Y is None):
+            logging.debug(f"{str(self.id_class)} has no data for X or Y, skip multiply extremes")
+            return
+        if extreme_values is None:
+            logging.debug(f"No extreme values given, skip multiply extremes")
+            self._X_extreme, self._Y_extreme = self._X, self._Y
+            return
+
+        # check type if inputs
+        extreme_values = to_list(extreme_values)
+        for i in extreme_values:
+            if not isinstance(i, number.__args__):
+                raise TypeError(f"Elements of list extreme_values have to be {number.__args__}, but at least element "
+                                f"{i} is type {type(i)}")
+
+        for extr_val in sorted(extreme_values):
+            # check if some extreme values are already extracted
+            if (self._X_extreme is None) or (self._Y_extreme is None):
+                X = self._X
+                Y = self._Y
+            else:  # one extr value iteration is done already: self.extremes_label is NOT None...
+                X = self._X_extreme
+                Y = self._Y_extreme
+
+            # extract extremes based on occurrence in labels
+            other_dims = remove_items(list(Y.dims), dim)
+            if extremes_on_right_tail_only:
+                extreme_idx = (Y > extr_val).any(dim=other_dims)
+            else:
+                extreme_idx = xr.concat([(Y < -extr_val).any(dim=other_dims[0]),
+                                           (Y > extr_val).any(dim=other_dims[0])],
+                                          dim=other_dims[1]).any(dim=other_dims[1])
+
+            extremes_X = list(map(lambda x: x.sel(**{dim: extreme_idx}), X))
+            self._add_timedelta(extremes_X, dim, timedelta)
+            # extremes_X = list(map(lambda x: x.coords[dim].values + np.timedelta64(*timedelta), extremes_X))
+
+            extremes_Y = Y.sel(**{dim: extreme_idx})
+            extremes_Y.coords[dim].values += np.timedelta64(*timedelta)
+
+            self._Y_extreme = xr.concat([Y, extremes_Y], dim=dim)
+            self._X_extreme = list(map(lambda x1, x2: xr.concat([x1, x2], dim=dim), X, extremes_X))
+
+    @staticmethod
+    def _add_timedelta(data, dim, timedelta):
+        for d in data:
+            d.coords[dim].values += np.timedelta64(*timedelta)
+
+    @classmethod
+    def transformation(cls, set_stations, **kwargs):
+        sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs}
+        transformation_dict = sp_keys.pop("transformation")
+        if transformation_dict is None:
+            return
+        scope = transformation_dict.pop("scope")
+        method = transformation_dict.pop("method")
+        if transformation_dict.pop("mean", None) is not None:
+            return
+        mean, std = None, None
+        for station in set_stations:
+            try:
+                sp = DataHandlerSingleStation(station, transformation={"method": method}, **sp_keys)
+                mean = sp.mean.copy(deep=True) if mean is None else mean.combine_first(sp.mean)
+                std = sp.std.copy(deep=True) if std is None else std.combine_first(sp.std)
+            except (AttributeError, EmptyQueryResult):
+                continue
+        if mean is None:
+            return None
+        mean_estimated = mean.mean("Stations")
+        std_estimated = std.mean("Stations")
+        return {"scope": scope, "method": method, "mean": mean_estimated, "std": std_estimated}
+
+    def get_coordinates(self):
+        return self.id_class.get_coordinates()
\ No newline at end of file
diff --git a/mlair/data_handler/station_preparation.py b/mlair/data_handler/station_preparation.py
index ff8496ab30a3b6392ea2314ef2526c80e0f57591..6112e7c5dd937c9b3217d67037d7ad229e8eb963 100644
--- a/mlair/data_handler/station_preparation.py
+++ b/mlair/data_handler/station_preparation.py
@@ -16,6 +16,7 @@ import xarray as xr
 from mlair.configuration import check_path_and_create
 from mlair import helpers
 from mlair.helpers import join, statistics
+from mlair.data_handler.abstract_data_handler import AbstractDataHandler
 
 # define a more general date type for type hinting
 date = Union[dt.date, dt.datetime]
@@ -39,18 +40,7 @@ DEFAULT_SAMPLING = "daily"
 DEFAULT_INTERPOLATION_METHOD = "linear"
 
 
-class AbstractStationPrep(object):
-    def __init__(self): #, path, station, statistics_per_var, transformation, **kwargs):
-        pass
-
-    def get_X(self):
-        raise NotImplementedError
-
-    def get_Y(self):
-        raise NotImplementedError
-
-
-class StationPrep(AbstractStationPrep):
+class DataHandlerSingleStation(AbstractDataHandler):
 
     def __init__(self, station, data_path, statistics_per_var, station_type=DEFAULT_STATION_TYPE,
                  network=DEFAULT_NETWORK, sampling=DEFAULT_SAMPLING, target_dim=DEFAULT_TARGET_DIM,
@@ -58,7 +48,7 @@ class StationPrep(AbstractStationPrep):
                  window_history_size=DEFAULT_WINDOW_HISTORY_SIZE, window_lead_time=DEFAULT_WINDOW_LEAD_TIME,
                  interpolation_limit: int = 0, interpolation_method: str = DEFAULT_INTERPOLATION_METHOD,
                  overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True,
-                 min_length: int = 0, start=None, end=None, **kwargs):
+                 min_length: int = 0, start=None, end=None, variables=None, **kwargs):
         super().__init__()  # path, station, statistics_per_var, transformation, **kwargs)
         self.station = helpers.to_list(station)
         self.path = os.path.abspath(data_path)
@@ -86,7 +76,7 @@ class StationPrep(AbstractStationPrep):
         # internal
         self.data = None
         self.meta = None
-        self.variables = kwargs.get('variables', list(statistics_per_var.keys()))
+        self.variables = statistics_per_var.keys() if variables is None else variables
         self.history = None
         self.label = None
         self.observation = None
@@ -98,10 +88,7 @@ class StationPrep(AbstractStationPrep):
         self.min = None
         self._transform_method = None
 
-        self.kwargs = kwargs
-        # self.kwargs["overwrite_local_data"] = overwrite_local_data
-
-        # self.make_samples()
+        # create samples
         self.setup_samples()
 
     def __str__(self):
@@ -123,7 +110,7 @@ class StationPrep(AbstractStationPrep):
                f"time_dim='{self.time_dim}', window_history_size={self.window_history_size}, " \
                f"window_lead_time={self.window_lead_time}, interpolation_limit={self.interpolation_limit}, " \
                f"interpolation_method='{self.interpolation_method}', overwrite_local_data={self.overwrite_local_data}, " \
-               f"transformation={self._print_transformation_as_string}, **{self.kwargs})"
+               f"transformation={self._print_transformation_as_string})"
 
     @property
     def _print_transformation_as_string(self):
@@ -155,10 +142,10 @@ class StationPrep(AbstractStationPrep):
         """
         return self.label.squeeze("Stations").transpose("datetime", "window").copy()
 
-    def get_X(self):
+    def get_X(self, **kwargs):
         return self.get_transposed_history()
 
-    def get_Y(self):
+    def get_Y(self, **kwargs):
         return self.get_transposed_label()
 
     def get_coordinates(self):
@@ -498,7 +485,7 @@ class StationPrep(AbstractStationPrep):
         """
         chem_vars = ["benzene", "ch4", "co", "ethane", "no", "no2", "nox", "o3", "ox", "pm1", "pm10", "pm2p5",
                      "propane", "so2", "toluene"]
-        used_chem_vars = list(set(chem_vars) & set(self.variables))
+        used_chem_vars = list(set(chem_vars) & set(self.statistics_per_var.keys()))
         data.loc[..., used_chem_vars] = data.loc[..., used_chem_vars].clip(min=minimum)
         return data
 
@@ -514,6 +501,59 @@ class StationPrep(AbstractStationPrep):
         :param transformation: the transformation dictionary as described above.
 
         :return: updated transformation dictionary
+
+        ## Transformation
+
+        There are two different approaches (called scopes) to transform the data:
+        1) `station`: transform data for each station independently (somehow like batch normalisation)
+        1) `data`: transform all data of each station with shared metrics
+
+        Transformation must be set by the `transformation` attribute. If `transformation = None` is given to `ExperimentSetup`, 
+        data is not transformed at all. For all other setups, use the following dictionary structure to specify the 
+        transformation.
+        ```
+        transformation = {"scope": <...>, 
+                        "method": <...>,
+                        "mean": <...>,
+                        "std": <...>}
+        ExperimentSetup(..., transformation=transformation, ...)
+        ```
+
+        ### scopes
+
+        **station**: mean and std are not used
+
+        **data**: either provide already calculated values for mean and std (if required by transformation method), or choose 
+        from different calculation schemes, explained in the mean and std section.
+
+        ### supported transformation methods
+        Currently supported methods are:
+        * standardise (default, if method is not given)
+        * centre
+
+        ### mean and std
+        `"mean"="accurate"`: calculate the accurate values of mean and std (depending on method) by using all data. Although, 
+        this method is accurate, it may take some time for the calculation. Furthermore, this could potentially lead to memory 
+        issue (not explored yet, but could appear for a very big amount of data)
+
+        `"mean"="estimate"`: estimate mean and std (depending on method). For each station, mean and std are calculated and
+        afterwards aggregated using the mean value over all station-wise metrics. This method is less accurate, especially 
+        regarding the std calculation but therefore much faster.
+
+        We recommend to use the later method *estimate* because of following reasons:
+        * much faster calculation
+        * real accuracy of mean and std is less important, because it is "just" a transformation / scaling
+        * accuracy of mean is almost as high as in the *accurate* case, because of 
+        $\bar{x_{ij}} = \bar{\left(\bar{x_i}\right)_j}$. The only difference is, that in the *estimate* case, each mean is 
+        equally weighted for each station independently of the actual data count of the station.
+        * accuracy of std is lower for *estimate* because of $\var{x_{ij}} \ne \bar{\left(\var{x_i}\right)_j}$, but still the mean of all 
+        station-wise std is a decent estimate of the true std.
+
+        `"mean"=<value, e.g. xr.DataArray>`: If mean and std are already calculated or shall be set manually, just add the
+        scaling values instead of the calculation method. For method *centre*, std can still be None, but is required for the
+        *standardise* method. **Important**: Format of given values **must** match internal data format of DataPreparation 
+        class: `xr.DataArray` with `dims=["variables"]` and one value for each variable.
+
         """
         if transformation is None:
             return
@@ -681,18 +721,18 @@ if __name__ == "__main__":
     # dp = AbstractDataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
     # print(dp)
     statistics_per_var = {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}
-    sp = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122',
-                     statistics_per_var=statistics_per_var, station_type='background',
-                     network='UBA', sampling='daily', target_dim='variables', target_var='o3',
-                     time_dim='datetime', window_history_size=7, window_lead_time=3,
-                     interpolation_limit=0
-                     )  # transformation={'method': 'standardise'})
+    sp = DataHandlerSingleStation(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122',
+                                  statistics_per_var=statistics_per_var, station_type='background',
+                                  network='UBA', sampling='daily', target_dim='variables', target_var='o3',
+                                  time_dim='datetime', window_history_size=7, window_lead_time=3,
+                                  interpolation_limit=0
+                                  )  # transformation={'method': 'standardise'})
     # sp.set_transformation({'method': 'standardise', 'mean': sp.mean+2, 'std': sp.std+1})
-    sp2 = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122',
-                      statistics_per_var=statistics_per_var, station_type='background',
-                      network='UBA', sampling='daily', target_dim='variables', target_var='o3',
-                      time_dim='datetime', window_history_size=7, window_lead_time=3,
-                      transformation={'method': 'standardise'})
+    sp2 = DataHandlerSingleStation(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122',
+                                   statistics_per_var=statistics_per_var, station_type='background',
+                                   network='UBA', sampling='daily', target_dim='variables', target_var='o3',
+                                   time_dim='datetime', window_history_size=7, window_lead_time=3,
+                                   transformation={'method': 'standardise'})
     sp2.transform(inverse=True)
     sp.get_X()
     sp.get_Y()
diff --git a/mlair/model_modules/model_class.py b/mlair/model_modules/model_class.py
index 56e7b4c347a69781854a9cf8ad9a719f7d6ac8b9..0e69d22012a592b30c6ffdf9ed6082c47a291f90 100644
--- a/mlair/model_modules/model_class.py
+++ b/mlair/model_modules/model_class.py
@@ -23,13 +23,13 @@ How to create a customised model?
 
         class MyCustomisedModel(AbstractModelClass):
 
-            def __init__(self, window_history_size, window_lead_time, channels):
-                super.__init__()
+            def __init__(self, shape_inputs: list, shape_outputs: list):
+
+                super().__init__(shape_inputs[0], shape_outputs[0])
+
                 # settings
-                self.window_history_size = window_history_size
-                self.window_lead_time = window_lead_time
-                self.channels = channels
                 self.dropout_rate = 0.1
+                self.activation = keras.layers.PReLU
 
                 # apply to model
                 self.set_model()
@@ -49,14 +49,14 @@ How to create a customised model?
         class MyCustomisedModel(AbstractModelClass):
 
             def set_model(self):
-                x_input = keras.layers.Input(shape=(self.window_history_size + 1, 1, self.channels))
+                x_input = keras.layers.Input(shape=self.shape_inputs)
                 x_in = keras.layers.Conv2D(32, (1, 1), padding='same', name='{}_Conv_1x1'.format("major"))(x_input)
                 x_in = self.activation(name='{}_conv_act'.format("major"))(x_in)
                 x_in = keras.layers.Flatten(name='{}'.format("major"))(x_in)
                 x_in = keras.layers.Dropout(self.dropout_rate, name='{}_Dropout_1'.format("major"))(x_in)
                 x_in = keras.layers.Dense(16, name='{}_Dense_16'.format("major"))(x_in)
                 x_in = self.activation()(x_in)
-                x_in = keras.layers.Dense(self.window_lead_time, name='{}_Dense'.format("major"))(x_in)
+                x_in = keras.layers.Dense(self.shape_outputs, name='{}_Dense'.format("major"))(x_in)
                 out_main = self.activation()(x_in)
                 self.model = keras.Model(inputs=x_input, outputs=[out_main])
 
@@ -153,6 +153,7 @@ class AbstractModelClass(ABC):
                                           'target_tensors': None
                                           }
         self.__compile_options = self.__allowed_compile_options
+        self.__compile_options_is_set = False
         self.shape_inputs = shape_inputs
         self.shape_outputs = self.__extract_from_tuple(shape_outputs)
 
@@ -204,7 +205,8 @@ class AbstractModelClass(ABC):
     def compile_options(self) -> Callable:
         """
         The compile options property allows the user to use all keras.compile() arguments. They can ether be passed as
-        dictionary (1), as attribute, with compile_options=None (2) or as mixture of both of them (3).
+        dictionary (1), as attribute, without setting compile_options (2) or as mixture (partly defined as instance
+        attributes and partly parsing a dictionary) of both of them (3).
         The method will raise an Error when the same parameter is set differently.
 
         Example (1) Recommended (includes check for valid keywords which are used as args in keras.compile)
@@ -220,7 +222,6 @@ class AbstractModelClass(ABC):
                 self.optimizer = keras.optimizers.SGD()
                 self.loss = keras.losses.mean_squared_error
                 self.metrics = ["mse", "mae"]
-                self.compile_options = None # make sure to use this line
 
         Example (3)
         Correct:
@@ -245,6 +246,8 @@ class AbstractModelClass(ABC):
 
         :return:
         """
+        if self.__compile_options_is_set is False:
+            self.compile_options = None
         return self.__compile_options
 
     @compile_options.setter
@@ -274,6 +277,7 @@ class AbstractModelClass(ABC):
             else:
                 raise ValueError(
                     f"Got different values or arguments for same argument: self.{allow_k}={new_v_attr.__class__} and '{allow_k}': {new_v_dic.__class__}")
+        self.__compile_options_is_set = True
 
     @staticmethod
     def __extract_from_tuple(tup):
@@ -282,6 +286,11 @@ class AbstractModelClass(ABC):
 
     @staticmethod
     def __compare_keras_optimizers(first, second):
+        """
+        Compares if optimiser and all settings of the optimisers are exactly equal.
+
+        :return True if optimisers are interchangeable, or False if optimisers are distinguishable.
+        """
         if first.__class__ == second.__class__ and first.__module__ == 'keras.optimizers':
             res = True
             init = tf.global_variables_initializer()
@@ -342,9 +351,8 @@ class AbstractModelClass(ABC):
 
 class MyLittleModel(AbstractModelClass):
     """
-    A customised model with a 1x1 Conv, and 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the
-    output layer depending on the window_lead_time parameter. Dropout is used between the Convolution and the first
-    Dense layer.
+    A customised model 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the output layer depending
+    on the window_lead_time parameter.
     """
 
     def __init__(self, shape_inputs: list, shape_outputs: list):
@@ -373,13 +381,8 @@ class MyLittleModel(AbstractModelClass):
         """
         Build the model.
         """
-
-        # add 1 to window_size to include current time step t0
         x_input = keras.layers.Input(shape=self.shape_inputs)
-        x_in = keras.layers.Conv2D(32, (1, 1), padding='same', name='{}_Conv_1x1'.format("major"))(x_input)
-        x_in = self.activation(name='{}_conv_act'.format("major"))(x_in)
-        x_in = keras.layers.Flatten(name='{}'.format("major"))(x_in)
-        x_in = keras.layers.Dropout(self.dropout_rate, name='{}_Dropout_1'.format("major"))(x_in)
+        x_in = keras.layers.Flatten(name='{}'.format("major"))(x_input)
         x_in = keras.layers.Dense(64, name='{}_Dense_64'.format("major"))(x_in)
         x_in = self.activation()(x_in)
         x_in = keras.layers.Dense(32, name='{}_Dense_32'.format("major"))(x_in)
@@ -688,3 +691,8 @@ class MyPaperModel(AbstractModelClass):
         self.optimizer = keras.optimizers.SGD(lr=self.initial_lr, momentum=0.9)
         self.compile_options = {"loss": [keras.losses.mean_squared_error, keras.losses.mean_squared_error],
                                 "metrics": ['mse', 'mea']}
+
+
+if __name__ == "__main__":
+    model = MyLittleModel([(1, 3, 10)], [2])
+    print(model.compile_options)
diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index 5cc449aac88ebab58689656820769fe7751f6098..675e5ade587011a9ac835e9afb45f89173bc7653 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -786,8 +786,8 @@ class PlotTimeSeries:
     def _plot(self, plot_folder):
         pdf_pages = self._create_pdf_pages(plot_folder)
         for pos, station in enumerate(self._stations):
-            start, end = self._get_time_range(self._load_data(self._stations[0]))
             data = self._load_data(station)
+            start, end = self._get_time_range(data)
             fig, axes, factor = self._create_subplots(start, end)
             nan_list = []
             for i_year in range(end - start + 1):
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index 407465ad4cd99b85c3c5b37eb2aef6e9e71c6424..f5d7d80f01de9e04a4e1e2d41901b402a17816df 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -10,7 +10,7 @@ from mlair.configuration import path_config
 from mlair import helpers
 from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_NETWORK, DEFAULT_STATION_TYPE, \
     DEFAULT_START, DEFAULT_END, DEFAULT_WINDOW_HISTORY_SIZE, DEFAULT_OVERWRITE_LOCAL_DATA, DEFAULT_TRANSFORMATION, \
-    DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \
+    DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAIN_MODEL, \
     DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \
     DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \
     DEFAULT_DIMENSIONS, DEFAULT_TIME_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_INTERPOLATION_LIMIT, \
@@ -18,7 +18,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
     DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
     DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
     DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST
-from mlair.data_handler.advanced_data_handler import DefaultDataPreparation
+from mlair.data_handler import DefaultDataHandler
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.model_modules.model_class import MyLittleModel as VanillaModel
 
@@ -39,7 +39,7 @@ class ExperimentSetup(RunEnvironment):
         * `data_path` [.]
         * `create_new_model` [.]
         * `bootstrap_path` [.]
-        * `trainable` [.]
+        * `train_model` [.]
         * `fraction_of_training` [.]
         * `extreme_values` [train]
         * `extremes_on_right_tail_only` [train]
@@ -94,7 +94,7 @@ class ExperimentSetup(RunEnvironment):
 
         # set post-processing instructions
         self._set_param("evaluate_bootstraps", evaluate_bootstraps, scope="general.postprocessing")
-        create_new_bootstraps = max([self.data_store.get("trainable", "general"), create_new_bootstraps or False])
+        create_new_bootstraps = max([self.data_store.get("train_model", "general"), create_new_bootstraps or False])
         self._set_param("create_new_bootstraps", create_new_bootstraps, scope="general.postprocessing")
         self._set_param("number_of_bootstraps", number_of_bootstraps, default=20, scope="general.postprocessing")
         self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing")
@@ -144,8 +144,8 @@ class ExperimentSetup(RunEnvironment):
     :param test_start:
     :param test_end:
     :param use_all_stations_on_all_data_sets:
-    :param trainable: train a new model from scratch or resume training with existing model if `True` (default) or
-        freeze loaded model and do not perform any modification on it. ``trainable`` is set to `True` if
+    :param train_model: train a new model from scratch or resume training with existing model if `True` (default) or
+        freeze loaded model and do not perform any modification on it. ``train_model`` is set to `True` if
         ``create_new_model`` is `True`.
     :param fraction_of_train: given value is used to split between test data and train data (including validation data).
         The value of ``fraction_of_train`` must be in `(0, 1)` but is recommended to be in the interval `[0.6, 0.9]`.
@@ -166,7 +166,7 @@ class ExperimentSetup(RunEnvironment):
         parameter is set to `False`, make sure, that a suitable model already exists in the experiment path. This model
         must fit in terms of input and output dimensions as well as ``window_history_size`` and ``window_lead_time`` and
         must be implemented as a :py:mod:`model class <src.model_modules.model_class>` and imported in
-        :py:mod:`model setup <src.run_modules.model_setup>`. If ``create_new_model`` is `True`, parameter ``trainable``
+        :py:mod:`model setup <src.run_modules.model_setup>`. If ``create_new_model`` is `True`, parameter ``train_model``
         is automatically set to `True` too.
     :param bootstrap_path:
     :param permute_data_on_training: shuffle train data individually for each station if `True`. This is performed each
@@ -215,13 +215,13 @@ class ExperimentSetup(RunEnvironment):
                  time_dim=None,
                  interpolation_method=None,
                  interpolation_limit=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None,
-                 test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None,
+                 test_end=None, use_all_stations_on_all_data_sets=None, train_model: bool = None, fraction_of_train: float = None,
                  experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data = None, sampling: str = "daily",
                  create_new_model = None, bootstrap_path=None, permute_data_on_training = None, transformation=None,
                  train_min_length=None, val_min_length=None, test_min_length=None, extreme_values: list = None,
                  extremes_on_right_tail_only: bool = None, evaluate_bootstraps=None, plot_list=None, number_of_bootstraps=None,
                  create_new_bootstraps=None, data_path: str = None, batch_path: str = None, login_nodes=None,
-                 hpc_hosts=None, model=None, batch_size=None, epochs=None, data_preparation=None, **kwargs):
+                 hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None, **kwargs):
 
         # create run framework
         super().__init__()
@@ -233,11 +233,11 @@ class ExperimentSetup(RunEnvironment):
         self._set_param("login_nodes", login_nodes, default=DEFAULT_HPC_LOGIN_LIST)
         self._set_param("create_new_model", create_new_model, default=DEFAULT_CREATE_NEW_MODEL)
         if self.data_store.get("create_new_model"):
-            trainable = True
+            train_model = True
         data_path = self.data_store.get("data_path")
         bootstrap_path = path_config.set_bootstrap_path(bootstrap_path, data_path, sampling)
         self._set_param("bootstrap_path", bootstrap_path)
-        self._set_param("trainable", trainable, default=DEFAULT_TRAINABLE)
+        self._set_param("train_model", train_model, default=DEFAULT_TRAIN_MODEL)
         self._set_param("fraction_of_training", fraction_of_train, default=DEFAULT_FRACTION_OF_TRAINING)
         self._set_param("extreme_values", extreme_values, default=DEFAULT_EXTREME_VALUES, scope="train")
         self._set_param("extremes_on_right_tail_only", extremes_on_right_tail_only,
@@ -290,7 +290,7 @@ class ExperimentSetup(RunEnvironment):
         self._set_param("sampling", sampling)
         self._set_param("transformation", transformation, default=DEFAULT_TRANSFORMATION)
         self._set_param("transformation", None, scope="preprocessing")
-        self._set_param("data_preparation", data_preparation, default=DefaultDataPreparation)
+        self._set_param("data_handler", data_handler, default=DefaultDataHandler)
 
         # target
         self._set_param("target_var", target_var, default=DEFAULT_TARGET_VAR)
@@ -331,7 +331,7 @@ class ExperimentSetup(RunEnvironment):
         # set post-processing instructions
         self._set_param("evaluate_bootstraps", evaluate_bootstraps, default=DEFAULT_EVALUATE_BOOTSTRAPS,
                         scope="general.postprocessing")
-        create_new_bootstraps = max([self.data_store.get("trainable", "general"),
+        create_new_bootstraps = max([self.data_store.get("train_model", "general"),
                                      create_new_bootstraps or DEFAULT_CREATE_NEW_BOOTSTRAPS])
         self._set_param("create_new_bootstraps", create_new_bootstraps, scope="general.postprocessing")
         self._set_param("number_of_bootstraps", number_of_bootstraps, default=DEFAULT_NUMBER_OF_BOOTSTRAPS,
diff --git a/mlair/run_modules/model_setup.py b/mlair/run_modules/model_setup.py
index 3dc56f01c4f37ce9fc53086d837386af81e5f53d..e4bff5a621619e6d806fc7ae58ed093331463187 100644
--- a/mlair/run_modules/model_setup.py
+++ b/mlair/run_modules/model_setup.py
@@ -31,7 +31,7 @@ class ModelSetup(RunEnvironment):
     Required objects [scope] from data store:
         * `experiment_path` [.]
         * `experiment_name` [.]
-        * `trainable` [.]
+        * `train_model` [.]
         * `create_new_model` [.]
         * `generator` [train]
         * `model_class` [.]
@@ -64,7 +64,7 @@ class ModelSetup(RunEnvironment):
         self.model_name = self.path % "%s.h5"
         self.checkpoint_name = self.path % "model-best.h5"
         self.callbacks_name = self.path % "model-best-callbacks-%s.pickle"
-        self._trainable = self.data_store.get("trainable")
+        self._train_model = self.data_store.get("train_model")
         self._create_new_model = self.data_store.get("create_new_model")
         self._run()
 
@@ -80,7 +80,7 @@ class ModelSetup(RunEnvironment):
         self.plot_model()
 
         # load weights if no training shall be performed
-        if not self._trainable and not self._create_new_model:
+        if not self._train_model and not self._create_new_model:
             self.load_weights()
 
         # create checkpoint
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index d4f409ec503ba0ae37bdd1d1bec4b0207eec453c..b4af7a754335e8da6d29870b1a0c4152d7dc9af5 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -81,16 +81,12 @@ class PostProcessing(RunEnvironment):
 
     def _run(self):
         # ols model
-        with TimeTracking():
-            self.train_ols_model()
-            logging.info("take a look on the next reported time measure. If this increases a lot, one should think to "
-                         "skip train_ols_model() whenever it is possible to save time.")
+        self.train_ols_model()
 
         # forecasts
-        with TimeTracking():
-            self.make_prediction()
-            logging.info("take a look on the next reported time measure. If this increases a lot, one should think to "
-                         "skip make_prediction() whenever it is possible to save time.")
+        self.make_prediction()
+
+        # skill scores on test data
         self.calculate_test_score()
 
         # bootstraps
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index b4185df2f6699cb20ac96e32661433e7a6164abc..ed972896e7a39b0b56df23dbc8a8d1ae64fb4183 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -10,7 +10,7 @@ from typing import Tuple
 import numpy as np
 import pandas as pd
 
-from mlair.data_handler import DataCollection
+from mlair.data_handler import DataCollection, AbstractDataHandler
 from mlair.helpers import TimeTracking
 from mlair.configuration import path_config
 from mlair.helpers.join import EmptyQueryResult
@@ -55,8 +55,8 @@ class PreProcessing(RunEnvironment):
 
     def _run(self):
         stations = self.data_store.get("stations")
-        data_preparation = self.data_store.get("data_preparation")
-        _, valid_stations = self.validate_station(data_preparation, stations, "preprocessing", overwrite_local_data=True)
+        data_handler = self.data_store.get("data_handler")
+        _, valid_stations = self.validate_station(data_handler, stations, "preprocessing", overwrite_local_data=True)
         if len(valid_stations) == 0:
             raise ValueError("Couldn't find any valid data according to given parameters. Abort experiment run.")
         self.data_store.set("stations", valid_stations)
@@ -187,12 +187,12 @@ class PreProcessing(RunEnvironment):
             set_stations = stations[index_list]
         logging.debug(f"{set_name.capitalize()} stations (len={len(set_stations)}): {set_stations}")
         # create set data_collection and store
-        data_preparation = self.data_store.get("data_preparation")
-        collection, valid_stations = self.validate_station(data_preparation, set_stations, set_name)
+        data_handler = self.data_store.get("data_handler")
+        collection, valid_stations = self.validate_station(data_handler, set_stations, set_name)
         self.data_store.set("stations", valid_stations, scope=set_name)
         self.data_store.set("data_collection", collection, scope=set_name)
 
-    def validate_station(self, data_preparation, set_stations, set_name=None, overwrite_local_data=False):
+    def validate_station(self, data_handler: AbstractDataHandler, set_stations, set_name=None, overwrite_local_data=False):
         """
         Check if all given stations in `all_stations` are valid.
 
@@ -212,14 +212,14 @@ class PreProcessing(RunEnvironment):
         logging.info(f"check valid stations started{' (%s)' % (set_name if set_name is not None else 'all')}")
         # calculate transformation using train data
         if set_name == "train":
-            self.transformation(data_preparation, set_stations)
+            self.transformation(data_handler, set_stations)
         # start station check
         collection = DataCollection()
         valid_stations = []
-        kwargs = self.data_store.create_args_dict(data_preparation.requirements(), scope=set_name)
+        kwargs = self.data_store.create_args_dict(data_handler.requirements(), scope=set_name)
         for station in set_stations:
             try:
-                dp = data_preparation.build(station, name_affix=set_name, **kwargs)
+                dp = data_handler.build(station, name_affix=set_name, **kwargs)
                 collection.add(dp)
                 valid_stations.append(station)
             except (AttributeError, EmptyQueryResult):
@@ -228,10 +228,10 @@ class PreProcessing(RunEnvironment):
                      f"{len(set_stations)} valid stations.")
         return collection, valid_stations
 
-    def transformation(self, data_preparation, stations):
-        if hasattr(data_preparation, "transformation"):
-            kwargs = self.data_store.create_args_dict(data_preparation.requirements(), scope="train")
-            transformation_dict = data_preparation.transformation(stations, **kwargs)
+    def transformation(self, data_handler: AbstractDataHandler, stations):
+        if hasattr(data_handler, "transformation"):
+            kwargs = self.data_store.create_args_dict(data_handler.requirements(), scope="train")
+            transformation_dict = data_handler.transformation(stations, **kwargs)
             if transformation_dict is not None:
                 self.data_store.set("transformation", transformation_dict)
 
diff --git a/mlair/run_modules/training.py b/mlair/run_modules/training.py
index f8909e15341f959455b1e8da0b0cb7502bdfa81b..113765e0d295bb0b1d756cd1cefba85093b20089 100644
--- a/mlair/run_modules/training.py
+++ b/mlair/run_modules/training.py
@@ -23,7 +23,7 @@ class Training(RunEnvironment):
     Train your model with this module.
 
     This module isn't required to run, if only a fresh post-processing is preformed. Either remove training call from
-    your run script or set create_new_model and trainable both to false.
+    your run script or set create_new_model and train_model both to false.
 
     Schedule of training:
         #. set_generators(): set generators for training, validation and testing and distribute according to batch size
@@ -40,7 +40,7 @@ class Training(RunEnvironment):
         * `model_name` [model]
         * `experiment_name` [.]
         * `experiment_path` [.]
-        * `trainable` [.]
+        * `train_model` [.]
         * `create_new_model` [.]
         * `generator` [train, val, test]
         * `plot_path` [.]
@@ -72,7 +72,7 @@ class Training(RunEnvironment):
         self.epochs = self.data_store.get("epochs")
         self.callbacks: CallbackHandler = self.data_store.get("callbacks", "model")
         self.experiment_name = self.data_store.get("experiment_name")
-        self._trainable = self.data_store.get("trainable")
+        self._train_model = self.data_store.get("train_model")
         self._create_new_model = self.data_store.get("create_new_model")
         self._run()
 
@@ -80,12 +80,12 @@ class Training(RunEnvironment):
         """Run training. Details in class description."""
         self.set_generators()
         self.make_predict_function()
-        if self._trainable:
+        if self._train_model:
             self.train()
             self.save_model()
             self.report_training()
         else:
-            logging.info("No training has started, because trainable parameter was false.")
+            logging.info("No training has started, because train_model parameter was false.")
 
     def make_predict_function(self) -> None:
         """
diff --git a/mlair/run_script.py b/mlair/run_script.py
index 00a28f686bf392f76787b56a48790999e9fa5c05..10851da1e3ef77b3d9a9c325ab018fdea5f31fb7 100644
--- a/mlair/run_script.py
+++ b/mlair/run_script.py
@@ -6,7 +6,7 @@ import inspect
 
 
 def run(stations=None,
-        trainable=None, create_new_model=None,
+        train_model=None, create_new_model=None,
         window_history_size=None,
         experiment_date="testrun",
         variables=None, statistics_per_var=None,
@@ -27,7 +27,7 @@ def run(stations=None,
         model=None,
         batch_size=None,
         epochs=None,
-        data_preparation=None,
+        data_handler=None,
         **kwargs):
 
     params = inspect.getfullargspec(DefaultWorkflow).args
@@ -39,5 +39,5 @@ def run(stations=None,
 
 if __name__ == "__main__":
     from mlair.model_modules.model_class import MyBranchedModel
-    run(statistics_per_var={'o3': 'dma8eu', "temp": "maximum"}, trainable=True,
+    run(statistics_per_var={'o3': 'dma8eu', "temp": "maximum"}, train_model=True,
         create_new_model=True, model=MyBranchedModel, station_type="background")
diff --git a/mlair/workflows/default_workflow.py b/mlair/workflows/default_workflow.py
index 3dba7e6c5c5773fa4d74860b2cba67a5804123b7..85d6726b70b699968933bf9af7580895490b8a6d 100644
--- a/mlair/workflows/default_workflow.py
+++ b/mlair/workflows/default_workflow.py
@@ -14,7 +14,7 @@ class DefaultWorkflow(Workflow):
     the mentioned ordering."""
 
     def __init__(self, stations=None,
-        trainable=None, create_new_model=None,
+        train_model=None, create_new_model=None,
         window_history_size=None,
         experiment_date="testrun",
         variables=None, statistics_per_var=None,
@@ -58,7 +58,7 @@ class DefaultWorkflowHPC(Workflow):
     Training and PostProcessing in exact the mentioned ordering."""
 
     def __init__(self, stations=None,
-        trainable=None, create_new_model=None,
+        train_model=None, create_new_model=None,
         window_history_size=None,
         experiment_date="testrun",
         variables=None, statistics_per_var=None,
diff --git a/requirements.txt b/requirements.txt
index 7da29a05b748531fd4ec327ff17f432ff1ecaabb..e2d8f5bc47e3ab1365814259b313b1a07e54fff3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -65,3 +65,5 @@ wcwidth==0.1.8
 Werkzeug==1.0.0
 xarray==0.15.0
 zipp==3.1.0
+
+setuptools~=49.6.0
\ No newline at end of file
diff --git a/requirements_gpu.txt b/requirements_gpu.txt
index 5ddb56acc71e0a51abb99b9447f871ddcb715a5d..38598ff6b902da6d96eb870124f60c72737000c2 100644
--- a/requirements_gpu.txt
+++ b/requirements_gpu.txt
@@ -2,7 +2,7 @@ absl-py==0.9.0
 astor==0.8.1
 atomicwrites==1.3.0
 attrs==19.3.0
-Cartopy==0.17.0
+#Cartopy==0.17.0
 certifi==2019.11.28
 chardet==3.0.4
 cloudpickle==1.3.0
@@ -16,9 +16,9 @@ grpcio==1.27.2
 h5py==2.10.0
 idna==2.8
 importlib-metadata==1.5.0
-Keras==2.2.4
-Keras-Applications==1.0.8
-Keras-Preprocessing==1.1.0
+#Keras==2.2.4
+#Keras-Applications==1.0.8
+#Keras-Preprocessing==1.1.0
 kiwisolver==1.1.0
 locket==0.2.0
 Markdown==3.2.1
@@ -54,10 +54,10 @@ seaborn==0.10.0
 six==1.11.0
 statsmodels==0.11.1
 tabulate
-tensorboard==1.13.1
-tensorflow-estimator==1.13.0
-tensorflow-gpu==1.13.1
-termcolor==1.1.0
+#tensorboard==1.13.1
+#tensorflow-estimator==1.13.0
+#tensorflow-gpu==1.13.1
+#termcolor==1.1.0
 toolz==0.10.0
 typing-extensions
 urllib3==1.25.8
diff --git a/setupHPC.sh b/setupHPC.sh
index 0248fdc09e658bac1ba6f9742426ce41996e1ade..a50f2ddea62cc298eaf6ed9a1b443d0f9769c5b6 100644
--- a/setupHPC.sh
+++ b/setupHPC.sh
@@ -1,3 +1,7 @@
+# This is the main installation script for the HPC systems JUWELS and HDFML operated by JSC.
+# It loads all preinstalled HPC-modules, installs additional packages via pip3, and finally creates the sbatch-runscripts.
+# If you use other HPC-systems you can use this script and related scripts in HPC_setup/ as blue print.
+# __author__ = Felix Kleinert
 
 basepath=${PWD}/
 settingpath=HPC_setup/
diff --git a/test/test_run_modules/test_experiment_setup.py b/test/test_run_modules/test_experiment_setup.py
index abd265f5815d974d6edb474e5a03ed08dc5843cc..ff35508542b694eb1def0ba791d9a5f70043f19c 100644
--- a/test/test_run_modules/test_experiment_setup.py
+++ b/test/test_run_modules/test_experiment_setup.py
@@ -38,7 +38,7 @@ class TestExperimentSetup:
         data_store = exp_setup.data_store
         # experiment setup
         assert data_store.get("data_path", "general") == prepare_host()
-        assert data_store.get("trainable", "general") is True
+        assert data_store.get("train_model", "general") is True
         assert data_store.get("create_new_model", "general") is True
         assert data_store.get("fraction_of_training", "general") == 0.8
         # set experiment name
@@ -99,7 +99,7 @@ class TestExperimentSetup:
         data_store = exp_setup.data_store
         # experiment setup
         assert data_store.get("data_path", "general") == prepare_host()
-        assert data_store.get("trainable", "general") is True
+        assert data_store.get("train_model", "general") is True
         assert data_store.get("create_new_model", "general") is True
         assert data_store.get("fraction_of_training", "general") == 0.5
         # set experiment name
@@ -145,22 +145,22 @@ class TestExperimentSetup:
         # use all stations on all data sets (train, val, test)
         assert data_store.get("use_all_stations_on_all_data_sets", "general.test") is False
 
-    def test_init_trainable_behaviour(self):
-        exp_setup = ExperimentSetup(trainable=False, create_new_model=True)
+    def test_init_train_model_behaviour(self):
+        exp_setup = ExperimentSetup(train_model=False, create_new_model=True)
         data_store = exp_setup.data_store
-        assert data_store.get("trainable", "general") is True
+        assert data_store.get("train_model", "general") is True
         assert data_store.get("create_new_model", "general") is True
-        exp_setup = ExperimentSetup(trainable=False, create_new_model=False)
+        exp_setup = ExperimentSetup(train_model=False, create_new_model=False)
         data_store = exp_setup.data_store
-        assert data_store.get("trainable", "general") is False
+        assert data_store.get("train_model", "general") is False
         assert data_store.get("create_new_model", "general") is False
-        exp_setup = ExperimentSetup(trainable=True, create_new_model=True)
+        exp_setup = ExperimentSetup(train_model=True, create_new_model=True)
         data_store = exp_setup.data_store
-        assert data_store.get("trainable", "general") is True
+        assert data_store.get("train_model", "general") is True
         assert data_store.get("create_new_model", "general") is True
-        exp_setup = ExperimentSetup(trainable=True, create_new_model=False)
+        exp_setup = ExperimentSetup(train_model=True, create_new_model=False)
         data_store = exp_setup.data_store
-        assert data_store.get("trainable", "general") is True
+        assert data_store.get("train_model", "general") is True
         assert data_store.get("create_new_model", "general") is False
 
     def test_compare_variables_and_statistics(self):
diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py
index 97e73204068d334590ee98271080acddf29dfc5f..bdb8fdabff67ad894275c805522b9df4cf167011 100644
--- a/test/test_run_modules/test_pre_processing.py
+++ b/test/test_run_modules/test_pre_processing.py
@@ -2,7 +2,7 @@ import logging
 
 import pytest
 
-from mlair.data_handler import DefaultDataPreparation, DataCollection, AbstractDataPreparation
+from mlair.data_handler import DefaultDataHandler, DataCollection, AbstractDataHandler
 from mlair.helpers.datastore import NameNotFoundInScope
 from mlair.helpers import PyTestRegex
 from mlair.run_modules.experiment_setup import ExperimentSetup
@@ -28,7 +28,7 @@ class TestPreProcessing:
     def obj_with_exp_setup(self):
         ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'],
                         statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}, station_type="background",
-                        data_preparation=DefaultDataPreparation)
+                        data_handler=DefaultDataHandler)
         pre = object.__new__(PreProcessing)
         super(PreProcessing, pre).__init__()
         yield pre
@@ -90,7 +90,7 @@ class TestPreProcessing:
         pre = obj_with_exp_setup
         caplog.set_level(logging.INFO)
         stations = pre.data_store.get("stations", "general")
-        data_preparation = pre.data_store.get("data_preparation")
+        data_preparation = pre.data_store.get("data_handler")
         collection, valid_stations = pre.validate_station(data_preparation, stations, set_name=name)
         assert isinstance(collection, DataCollection)
         assert len(valid_stations) < len(stations)
@@ -110,7 +110,7 @@ class TestPreProcessing:
 
     def test_transformation(self):
         pre = object.__new__(PreProcessing)
-        data_preparation = AbstractDataPreparation
+        data_preparation = AbstractDataHandler
         stations = ['DEBW107', 'DEBY081']
         assert pre.transformation(data_preparation, stations) is None
         class data_preparation_no_trans: pass
diff --git a/test/test_run_modules/test_training.py b/test/test_run_modules/test_training.py
index 1fec8f4e56e2925bff0bc4af859dac1fe5fbb2b6..c0b625ef70deeb0686b236275e6bd1182ad48d41 100644
--- a/test/test_run_modules/test_training.py
+++ b/test/test_run_modules/test_training.py
@@ -9,7 +9,7 @@ import mock
 import pytest
 from keras.callbacks import History
 
-from mlair.data_handler import DataCollection, KerasIterator, DefaultDataPreparation
+from mlair.data_handler import DataCollection, KerasIterator, DefaultDataHandler
 from mlair.helpers import PyTestRegex
 from mlair.model_modules.flatten import flatten_tail
 from mlair.model_modules.inception_model import InceptionModelBase
@@ -73,7 +73,7 @@ class TestTraining:
         path_plot = os.path.join(path, "plots")
         os.makedirs(path_plot)
         obj.data_store.set("plot_path", path_plot, "general")
-        obj._trainable = True
+        obj._train_model = True
         obj._create_new_model = False
         yield obj
         if os.path.exists(path):
@@ -125,12 +125,12 @@ class TestTraining:
 
     @pytest.fixture
     def data_collection(self, path, window_history_size, window_lead_time, statistics_per_var):
-        data_prep = DefaultDataPreparation.build(['DEBW107'], data_path=os.path.join(os.path.dirname(__file__), 'data'),
-                                                 statistics_per_var=statistics_per_var, station_type="background",
-                                                 network="AIRBASE", sampling="daily", target_dim="variables",
-                                                 target_var="o3", time_dim="datetime",
-                                                 window_history_size=window_history_size,
-                                                 window_lead_time=window_lead_time, name_affix="train")
+        data_prep = DefaultDataHandler.build(['DEBW107'], data_path=os.path.join(os.path.dirname(__file__), 'data'),
+                                             statistics_per_var=statistics_per_var, station_type="background",
+                                             network="AIRBASE", sampling="daily", target_dim="variables",
+                                             target_var="o3", time_dim="datetime",
+                                             window_history_size=window_history_size,
+                                             window_lead_time=window_lead_time, name_affix="train")
         return DataCollection([data_prep])
 
     @pytest.fixture
@@ -187,7 +187,7 @@ class TestTraining:
         obj.data_store.set("hist", hist, "general.model")
         obj.data_store.set("experiment_name", "TestExperiment", "general")
         obj.data_store.set("experiment_path", path, "general")
-        obj.data_store.set("trainable", True, "general")
+        obj.data_store.set("train_model", True, "general")
         obj.data_store.set("create_new_model", True, "general")
         os.makedirs(batch_path)
         obj.data_store.set("batch_path", batch_path, "general")
@@ -203,9 +203,9 @@ class TestTraining:
 
     def test_no_training(self, ready_to_init, caplog):
         caplog.set_level(logging.INFO)
-        ready_to_init.data_store.set("trainable", False)
+        ready_to_init.data_store.set("train_model", False)
         Training()
-        message = "No training has started, because trainable parameter was false."
+        message = "No training has started, because train_model parameter was false."
         assert caplog.record_tuples[-2] == ("root", 20, message)
 
     def test_run(self, ready_to_run):