diff --git a/README.md b/README.md
index 329b1f16ffc1f38ddc78408a5d04199b69bf7dc8..e49362e95e9a69c159a5b8d857ccb336cf58d3c6 100644
--- a/README.md
+++ b/README.md
@@ -6,4 +6,10 @@ This is a collection of all relevant functions used for ML stuff in the ESDE gro
 
 See a description [here](https://towardsdatascience.com/a-simple-guide-to-the-versions-of-the-inception-network-7fc52b863202)
 or take a look on the papers [Going Deeper with Convolutions (Szegedy et al., 2014)](https://arxiv.org/abs/1409.4842)
-and [Network In Network (Lin et al., 2014)](https://arxiv.org/abs/1312.4400).
\ No newline at end of file
+and [Network In Network (Lin et al., 2014)](https://arxiv.org/abs/1312.4400).
+
+
+# Installation
+
+* Install __proj__ on your machine using the console. E.g. for opensuse / leap `zypper install proj`
+* c++ compiler required for cartopy installation
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index e8ac7eb75d866b27d72af51fae31f417c471dd05..9ccd09ef9234bafff4559aa9fc325bef7d8bf3ea 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,4 +11,16 @@ pytest-html
 pydot
 mock
 statsmodels
+seaborn
+dask==0.20.2
+toolz  # for dask
+cloudpickle  # for dask
+cython==0.29.14
+pyshp
+six
+pyproj
+shapely
+cartopy==0.16.0
 matplotlib
+pillow
+scipy
\ No newline at end of file
diff --git a/run.py b/run.py
index 03eda04280a19e5c2bb9f1743c40f07e9e3fd2cc..71244fb9d15f594ac3ffbce60341d5c8dcb15f03 100644
--- a/run.py
+++ b/run.py
@@ -30,7 +30,8 @@ def main(parser_args):
 if __name__ == "__main__":
 
     formatter = '%(asctime)s - %(levelname)s: %(message)s  [%(filename)s:%(funcName)s:%(lineno)s]'
-    logging.basicConfig(format=formatter, level=logging.INFO)
+    # logging.basicConfig(format=formatter, level=logging.INFO)
+    logging.basicConfig(format=formatter, level=logging.DEBUG)
 
     parser = argparse.ArgumentParser()
     parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default=None,
diff --git a/src/data_handling/data_preparation.py b/src/data_handling/data_preparation.py
index db800f5e50146c4f73cde643f154bcb1a5047437..d0d89438c14c4f2cfbbc3e76504b82f310dc1a8a 100644
--- a/src/data_handling/data_preparation.py
+++ b/src/data_handling/data_preparation.py
@@ -1,7 +1,6 @@
 __author__ = 'Felix Kleinert, Lukas Leufen'
 __date__ = '2019-10-16'
 
-
 import xarray as xr
 import pandas as pd
 import numpy as np
@@ -12,7 +11,6 @@ from src import statistics
 from typing import Union, List, Iterable
 import datetime as dt
 
-
 # define a more general date type for type hinting
 date = Union[dt.date, dt.datetime]
 
@@ -72,35 +70,42 @@ class DataPrep(object):
         Load data and meta data either from local disk (preferred) or download new data from TOAR database if no local
         data is  available. The latter case, store downloaded data locally if wished (default yes).
         """
-
         helpers.check_path_and_create(self.path)
         file_name = self._set_file_name()
         meta_file = self._set_meta_file_name()
-        try:
-
-            logging.debug(f"try to load local data from: {file_name}")
-            data = self._slice_prep(xr.open_dataarray(file_name))
-            self.data = self.check_for_negative_concentrations(data)
-            self.meta = pd.read_csv(meta_file, index_col=0)
-            if self.station_type is not None:
-                self.check_station_meta()
-            logging.debug("loading finished")
-        except FileNotFoundError as e:
-            logging.warning(e)
-            data, self.meta = self.download_data_from_join(file_name, meta_file)
-            data = self._slice_prep(data)
-            self.data = self.check_for_negative_concentrations(data)
+        if self.kwargs.get('overwrite_local_data', False):
+            logging.debug(f"overwrite_local_data is true, therefore reload {file_name} from JOIN")
+            if os.path.exists(file_name):
+                os.remove(file_name)
+            if os.path.exists(meta_file):
+                os.remove(meta_file)
+            self.download_data(file_name, meta_file)
             logging.debug("loaded new data from JOIN")
+        else:
+            try:
+                logging.debug(f"try to load local data from: {file_name}")
+                data = self._slice_prep(xr.open_dataarray(file_name))
+                self.data = self.check_for_negative_concentrations(data)
+                self.meta = pd.read_csv(meta_file, index_col=0)
+                if self.station_type is not None:
+                    self.check_station_meta()
+                logging.debug("loading finished")
+            except FileNotFoundError as e:
+                logging.warning(e)
+                self.download_data(file_name, meta_file)
+                logging.debug("loaded new data from JOIN")
+
+    def download_data(self, file_name, meta_file):
+        data, self.meta = self.download_data_from_join(file_name, meta_file)
+        data = self._slice_prep(data)
+        self.data = self.check_for_negative_concentrations(data)
 
     def check_station_meta(self):
         """
         Search for the entries in meta data and compare the value with the requested values. Raise a FileNotFoundError
         if the values mismatch.
         """
-        check_dict = {
-            "station_type": self.station_type,
-            "network_name": self.network
-        }
+        check_dict = {"station_type": self.station_type, "network_name": self.network}
         for (k, v) in check_dict.items():
             if self.meta.at[k, self.station[0]] != v:
                 logging.debug(f"meta data does not agree which given request for {k}: {v} (requested) != "
@@ -138,8 +143,8 @@ class DataPrep(object):
         return f"Dataprep(path='{self.path}', network='{self.network}', station={self.station}, " \
                f"variables={self.variables}, station_type={self.station_type}, **{self.kwargs})"
 
-    def interpolate(self, dim: str, method: str = 'linear', limit: int = None,
-                    use_coordinate: Union[bool, str] = True, **kwargs):
+    def interpolate(self, dim: str, method: str = 'linear', limit: int = None, use_coordinate: Union[bool, str] = True,
+                    **kwargs):
         """
         (Copy paste from dataarray.interpolate_na)
         Interpolate values according to different methods.
@@ -193,6 +198,7 @@ class DataPrep(object):
         Perform inverse transformation
         :return:
         """
+
         def f_inverse(data, mean, std, method_inverse):
             if method_inverse == 'standardise':
                 return statistics.standardise_inverse(data, mean, std), None, None
@@ -319,8 +325,7 @@ class DataPrep(object):
         if (self.history is not None) and (self.label is not None):
             non_nan_history = self.history.dropna(dim=dim)
             non_nan_label = self.label.dropna(dim=dim)
-            intersect = np.intersect1d(non_nan_history.coords[dim].values,
-                                       non_nan_label.coords[dim].values)
+            intersect = np.intersect1d(non_nan_history.coords[dim].values, non_nan_label.coords[dim].values)
 
         if len(intersect) == 0:
             self.history = None
@@ -382,6 +387,5 @@ class DataPrep(object):
 
 
 if __name__ == "__main__":
-
     dp = DataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
     print(dp)
diff --git a/src/helpers.py b/src/helpers.py
index 40a3f9762cd649651631e45d94b78c19562b9749..172a8dd3cf04a15e9069347dac7f06c6d2d8ed60 100644
--- a/src/helpers.py
+++ b/src/helpers.py
@@ -5,16 +5,19 @@ __date__ = '2019-10-21'
 
 
 import logging
-import keras
-import keras.backend as K
 import math
-from typing import Union
-import numpy as np
 import os
 import time
 import socket
 import datetime as dt
 
+import keras
+import keras.backend as K
+import numpy as np
+import xarray as xr
+
+from typing import Union, Dict, Callable
+
 
 def to_list(arg):
     if not isinstance(arg, list):
@@ -197,3 +200,35 @@ class PyTestRegex:
 
     def __repr__(self) -> str:
         return self._regex.pattern
+
+
+def dict_to_xarray(d: Dict, coordinate_name: str) -> xr.DataArray:
+    """
+    Convert a dictionary of 2D-xarrays to single 3D-xarray. The name of new coordinate axis follows <coordinate_name>.
+    :param d: dictionary with 2D-xarrays
+    :param coordinate_name: name of the new created axis (2D -> 3D)
+    :return: combined xarray
+    """
+    xarray = None
+    for k, v in d.items():
+        if xarray is None:
+            xarray = v
+            xarray.coords[coordinate_name] = k
+        else:
+            tmp_xarray = v
+            tmp_xarray.coords[coordinate_name] = k
+            xarray = xr.concat([xarray, tmp_xarray], coordinate_name)
+    return xarray
+
+
+def float_round(number: float, decimals: int = 0, round_type: Callable = math.ceil) -> float:
+    """
+    Perform given rounding operation on number with the precision of decimals.
+    :param number: the number to round
+    :param decimals: numbers of decimals of the rounding operations (default 0 -> round to next integer value)
+    :param round_type: the actual rounding operation. Can be any callable function like math.ceil, math.floor or python
+        built-in round operation.
+    :return: rounded number with desired precision
+    """
+    multiplier = 10. ** decimals
+    return round_type(number * multiplier) / multiplier
diff --git a/src/plotting/postprocessing_plotting.py b/src/plotting/postprocessing_plotting.py
new file mode 100644
index 0000000000000000000000000000000000000000..469b00d7da8954d1e96ab10f5372b338f9fa8b3e
--- /dev/null
+++ b/src/plotting/postprocessing_plotting.py
@@ -0,0 +1,324 @@
+__author__ = "Lukas Leufen, Felix Kleinert"
+__date__ = '2019-12-17'
+
+import os
+import logging
+import math
+import warnings
+from src import helpers
+from src.helpers import TimeTracking
+
+import numpy as np
+import xarray as xr
+import pandas as pd
+
+import matplotlib
+import seaborn as sns
+import matplotlib.pyplot as plt
+import cartopy.crs as ccrs
+import cartopy.feature as cfeature
+from matplotlib.backends.backend_pdf import PdfPages
+
+from typing import Dict, List
+
+logging.getLogger('matplotlib').setLevel(logging.WARNING)
+
+
+def plot_monthly_summary(stations: List, data_path: str, name: str, target_var: str, window_lead_time: int = None,
+                         plot_folder: str = "."):
+    """
+    Show a monthly summary over all stations for each lead time ("ahead") as box and whiskers plot. The plot is saved
+    in data_path with name monthly_summary_box_plot.pdf and 500dpi resolution.
+    :param stations: all stations to plot
+    :param data_path: path, where the data is located
+    :param name: full name of the local files with a % as placeholder for the station name
+    :param target_var: display name of the target variable on plot's axis
+    :param window_lead_time: lead time to plot, if window_lead_time is higher than the available lead time or not given
+        the maximum lead time from data is used. (default None -> use maximum lead time from data).
+    :param plot_folder: path to save the plot (default: current directory)
+    """
+    logging.debug("run plot_monthly_summary()")
+    forecasts = None
+
+    for station in stations:
+        logging.debug(f"... preprocess station {station}")
+        file_name = os.path.join(data_path, name % station)
+        data = xr.open_dataarray(file_name)
+
+        data_cnn = data.sel(type="CNN").squeeze()
+        data_cnn.coords["ahead"].values = [f"{days}d" for days in data_cnn.coords["ahead"].values]
+
+        data_orig = data.sel(type="orig", ahead=1).squeeze()
+        data_orig.coords["ahead"] = "orig"
+
+        data_concat = xr.concat([data_orig, data_cnn], dim="ahead")
+        data_concat = data_concat.drop("type")
+
+        data_concat.index.values = data_concat.index.values.astype("datetime64[M]").astype(int) % 12 + 1
+        data_concat = data_concat.clip(min=0)
+
+        forecasts = xr.concat([forecasts, data_concat], 'index') if forecasts is not None else data_concat
+
+    ahead_steps = len(forecasts.ahead)
+    if window_lead_time is None:
+        window_lead_time = ahead_steps
+    window_lead_time = min(ahead_steps, window_lead_time)
+
+    forecasts = forecasts.to_dataset(name='values').to_dask_dataframe()
+    logging.debug("... start plotting")
+    ax = sns.boxplot(x='index', y='values', hue='ahead', data=forecasts.compute(), whis=1.,
+                     palette=[matplotlib.colors.cnames["green"]] + sns.color_palette("Blues_d",
+                                                                                     window_lead_time).as_hex(),
+                     flierprops={'marker': '.', 'markersize': 1}, showmeans=True,
+                     meanprops={'markersize': 1, 'markeredgecolor': 'k'})
+    ax.set(xlabel='month', ylabel=f'{target_var}')
+    plt.tight_layout()
+    plot_name = os.path.join(os.path.abspath(plot_folder), 'monthly_summary_box_plot.pdf')
+    logging.debug(f"... save plot to {plot_name}")
+    plt.savefig(plot_name, dpi=500)
+    plt.close('all')
+
+
+def plot_station_map(generators: Dict, plot_folder: str = "."):
+    """
+    Plot geographical overview of all used stations. Different data sets can be colorised by its key in the input
+    dictionary generators. The key represents the color to plot on the map. Currently, there is only a white background,
+    but this can be adjusted by loading locally stored topography data (not implemented yet). The plot is saved under
+    plot_path with the name station_map.pdf
+    :param generators: dictionary with the plot color of each data set as key and the generator containing all stations
+        as value.
+    :param plot_folder: path to save the plot (default: current directory)
+    """
+    logging.debug("run station_map()")
+    fig = plt.figure(figsize=(10, 5))
+    ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
+    ax.set_extent([0, 20, 42, 58], crs=ccrs.PlateCarree())
+    ax.add_feature(cfeature.COASTLINE.with_scale("10m"), edgecolor='black')
+    ax.add_feature(cfeature.LAKES.with_scale("50m"))
+    ax.add_feature(cfeature.OCEAN.with_scale("50m"))
+    ax.add_feature(cfeature.RIVERS.with_scale("10m"))
+    ax.add_feature(cfeature.BORDERS.with_scale("10m"), facecolor='none', edgecolor='black')
+
+    if generators is not None:
+        for color, gen in generators.items():
+            for k, v in enumerate(gen):
+                station_coords = gen.get_data_generator(k).meta.loc[['station_lon', 'station_lat']]
+                # station_names = gen.get_data_generator(k).meta.loc[['station_id']]
+                IDx, IDy = float(station_coords.loc['station_lon'].values), float(
+                    station_coords.loc['station_lat'].values)
+                ax.plot(IDx, IDy, mfc=color, mec='k', marker='s', markersize=6, transform=ccrs.PlateCarree())
+
+    plot_name = os.path.join(os.path.abspath(plot_folder), 'station_map.pdf')
+    logging.debug(f"... save plot to {plot_name}")
+    plt.savefig(plot_name, dpi=500)
+    plt.close('all')
+
+
+def plot_conditional_quantiles(stations: list, plot_folder: str = ".", rolling_window: int = 3, ref_name: str = 'orig',
+                               pred_name: str = 'CNN', season: str = "", forecast_path: str = None,
+                               plot_name_affix: str = "", units: str = "ppb"):
+    """
+    This plot was originally taken from Murphy, Brown and Chen (1989):
+    https://journals.ametsoc.org/doi/pdf/10.1175/1520-0434%281989%29004%3C0485%3ADVOTF%3E2.0.CO%3B2
+
+    :param stations: stations to include in the plot (forecast data needs to be available already)
+    :param plot_folder: path to save the plot (default: current directory)
+    :param rolling_window: the rolling window mean will smooth the plot appearance (no smoothing in bin calculation,
+        this is only a cosmetic step, default: 3)
+    :param ref_name: name of the reference data series
+    :param pred_name: name of the investigated data series
+    :param season: season name to highlight if not empty
+    :param forecast_path: path to save the plot file
+    :param plot_name_affix: name to specify this plot (e.g. 'cali-ref', default: '')
+    :param units: units of the forecasted values (default: ppb)
+    """
+    time = TimeTracking()
+    logging.debug(f"started plot_conditional_quantiles()")
+    # ignore warnings if nans appear in quantile grouping
+    warnings.filterwarnings("ignore", message="All-NaN slice encountered")
+    # ignore warnings if mean is calculated on nans
+    warnings.filterwarnings("ignore", message="Mean of empty slice")
+    # ignore warnings for y tick = 0 on log scale (instead of 0.00001 or similar)
+    warnings.filterwarnings("ignore", message="Attempted to set non-positive bottom ylim on a log-scaled axis.")
+
+    def load_data():
+        logging.debug("... load data")
+        data_collector = []
+        for station in stations:
+            file = os.path.join(forecast_path, f"forecasts_{station}_test.nc")
+            data_tmp = xr.open_dataarray(file)
+            data_collector.append(data_tmp.loc[:, :, ['CNN', 'orig', 'OLS']].assign_coords(station=station))
+        return xr.concat(data_collector, dim='station').transpose('index', 'type', 'ahead', 'station')
+
+    def segment_data(data):
+        logging.debug("... segment data")
+        # combine index and station to multi index
+        data = data.stack(z=['index', 'station'])
+        # replace multi index by simple position index (order is not relevant anymore)
+        data.coords['z'] = range(len(data.coords['z']))
+        # segment data of pred_name into bins
+        data.loc[pred_name, ...] = data.loc[pred_name, ...].to_pandas().T.apply(pd.cut, bins=bins,
+                                                                                labels=bins[1:]).T.values
+        return data
+
+    def create_quantile_panel(data, q):
+        logging.debug("... create quantile panel")
+        # create empty xarray with dims: time steps ahead, quantiles, bin index (numbers create in previous step)
+        quantile_panel = xr.DataArray(np.full([data.ahead.shape[0], len(q), bins[1:].shape[0]], np.nan),
+                                      coords=[data.ahead, q, bins[1:]], dims=['ahead', 'quantiles', 'categories'])
+        # ensure that the coordinates are in the right order
+        quantile_panel = quantile_panel.transpose('ahead', 'quantiles', 'categories')
+        # calculate for each bin of the pred_name data the quantiles of the ref_name data
+        for bin in bins[1:]:
+            mask = (data.loc[pred_name, ...] == bin)
+            quantile_panel.loc[..., bin] = data.loc[ref_name, ...].where(mask).quantile(q, dim=['z']).T
+
+        return quantile_panel
+
+    def labels(plot_type, data_unit="ppb"):
+        names = (f"forecast concentration (in {data_unit})", f"observed concentration (in {data_unit})")
+        if plot_type == "orig":
+            return names
+        else:
+            return names[::-1]
+
+    xlabel, ylabel = labels(ref_name, units)
+
+    opts = {"q": [.1, .25, .5, .75, .9], "linetype": [':', '-.', '--', '-.', ':'],
+            "legend": ['.10th and .90th quantile', '.25th and .75th quantile', '.50th quantile', 'reference 1:1'],
+            "xlabel": xlabel, "ylabel": ylabel}
+
+    # set name and path of the plot
+    base_name = "conditional_quantiles"
+    def add_affix(x): return f"_{x}" if len(x) > 0 else ""
+    plot_name = f"{base_name}{add_affix(season)}{add_affix(plot_name_affix)}_plot.pdf"
+    plot_path = os.path.join(os.path.abspath(plot_folder), plot_name)
+
+    # check forecast path
+    if forecast_path is None:
+        raise ValueError("Forecast path is not given but required.")
+
+    # load data and set data bins
+    orig_data = load_data()
+    bins = np.arange(0, math.ceil(orig_data.max().max()) + 1, 1).astype(int)
+    segmented_data = segment_data(orig_data)
+    quantile_panel = create_quantile_panel(segmented_data, q=opts["q"])
+
+    # init pdf output
+    pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
+    logging.debug(f"... plot path is {plot_path}")
+
+    # create plot for each time step ahead
+    y2_max = 0
+    for iteration, d in enumerate(segmented_data.ahead):
+        logging.debug(f"... plotting {d.values} time step(s) ahead")
+        # plot smoothed lines with rolling mean
+        smooth_data = quantile_panel.loc[d, ...].rolling(categories=rolling_window, center=True).mean().to_pandas().T
+        ax = smooth_data.plot(style=opts["linetype"], color='black', legend=False)
+        ax2 = ax.twinx()
+        # add reference line
+        ax.plot([0, bins.max()], [0, bins.max()], color='k', label='reference 1:1', linewidth=.8)
+        # add histogram of the segmented data (pred_name)
+        handles, labels = ax.get_legend_handles_labels()
+        segmented_data.loc[pred_name, d, :].to_pandas().hist(bins=bins, ax=ax2, color='k', alpha=.3, grid=False,
+                                                             rwidth=1)
+        # add legend
+        plt.legend(handles[:3] + [handles[-1]], opts["legend"], loc='upper left', fontsize='large')
+        # adjust limits and set labels
+        ax.set(xlim=(0, bins.max()), ylim=(0, bins.max()))
+        ax.set_xlabel(opts["xlabel"], fontsize='x-large')
+        ax.tick_params(axis='x', which='major', labelsize=15)
+        ax.set_ylabel(opts["ylabel"], fontsize='x-large')
+        ax.tick_params(axis='y', which='major', labelsize=15)
+        ax2.yaxis.label.set_color('gray')
+        ax2.tick_params(axis='y', colors='gray')
+        ax2.yaxis.labelpad = -15
+        ax2.set_yscale('log')
+        if iteration == 0:
+            y2_max = ax2.get_ylim()[1] + 100
+        ax2.set(ylim=(0, y2_max * 10 ** 8), yticks=np.logspace(0, 4, 5))
+        ax2.set_ylabel('              sample size', fontsize='x-large')
+        ax2.tick_params(axis='y', which='major', labelsize=15)
+        # set title and save current figure
+        title = f"{d.values} time step(s) ahead{f' ({season})' if len(season) > 0 else ''}"
+        plt.title(title)
+        pdf_pages.savefig()
+    # close all open figures / plots
+    pdf_pages.close()
+    plt.close('all')
+    logging.info(f"plot_conditional_quantiles() finished after {time}")
+
+
+def plot_climatological_skill_score(data: Dict, plot_folder: str = ".", score_only: bool = True,
+                                    extra_name_tag: str = "", model_setup: str = ""):
+    """
+    Create plot of climatological skill score after Murphy (1988) as box plot over all stations. A forecast time step
+    (called "ahead") is separately shown to highlight the differences for each prediction time step. Either each single
+    term is plotted (score_only=False) or only the resulting scores CASE I to IV are displayed (score_only=True,
+    default). Y-axis is adjusted following the data and not hard coded. The plot is saved under plot_folder path with
+    name skill_score_clim_{extra_name_tag}{model_setup}.pdf and resolution of 500dpi.
+    :param data: dictionary with station names as keys and 2D xarrays as values, consist on axis ahead and terms.
+    :param plot_folder: path to save the plot (default: current directory)
+    :param score_only: if true plot only scores of CASE I to IV, otherwise plot all single terms (default True)
+    :param extra_name_tag: additional tag that can be included in the plot name (default "")
+    :param model_setup: architecture type (default "CNN")
+    """
+    logging.debug("run plot_climatological_skill_score()")
+    data = helpers.dict_to_xarray(data, "station")
+    labels = [str(i) + "d" for i in data.coords["ahead"].values]
+    fig, ax = plt.subplots()
+    if score_only:
+        data = data.loc[:, ["CASE I", "CASE II", "CASE III", "CASE IV"], :]
+        lab_add = ''
+    else:
+        fig.set_size_inches(11.7, 8.27)
+        lab_add = "terms and "
+    data = data.to_dataframe("data").reset_index(level=[0, 1, 2])
+    sns.boxplot(x="terms", y="data", hue="ahead", data=data, ax=ax, whis=1., palette="Blues_d", showmeans=True,
+                meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
+    ax.axhline(y=0, color="grey", linewidth=.5)
+    ax.set(ylabel=f"{lab_add}skill score", xlabel="", title="summary of all stations")
+    handles, _ = ax.get_legend_handles_labels()
+    ax.legend(handles, labels)
+    plt.tight_layout()
+    plot_name = os.path.join(plot_folder, f"skill_score_clim_{extra_name_tag}{model_setup}.pdf")
+    logging.debug(f"... save plot to {plot_name}")
+    plt.savefig(plot_name, dpi=500)
+    plt.close('all')
+
+
+def plot_competitive_skill_score(data: pd.DataFrame, plot_folder=".", model_setup="CNN"):
+    """
+    Create competitive skill score for the given model setup and the reference models ordinary least squared ("ols") and
+    the persistence forecast ("persi") for all lead times ("ahead"). The plot is saved under plot_folder with the name
+    skill_score_competitive_{model_setup}.pdf and resolution of 500dpi.
+    :param data: data frame with index=['cnn-persi', 'ols-persi', 'cnn-ols'] and columns "ahead" containing the pre-
+        calculated comparisons for cnn, persistence and ols.
+    :param plot_folder: path to save the plot (default: current directory)
+    :param model_setup: architecture type (default "CNN")
+    """
+    logging.debug("run plot_general_skill_score()")
+
+    data = pd.concat(data, axis=0)
+    data = xr.DataArray(data, dims=["stations", "ahead"]).unstack("stations")
+    data = data.rename({"stations_level_0": "stations", "stations_level_1": "comparison"})
+    data = data.to_dataframe("data").unstack(level=1).swaplevel()
+    data.columns = data.columns.levels[1]
+
+    labels = [str(i) + "d" for i in data.index.levels[1].values]
+    data = data.stack(level=0).reset_index(level=2, drop=True).reset_index(name="data")
+
+    fig, ax = plt.subplots()
+    sns.boxplot(x="comparison", y="data", hue="ahead", data=data, whis=1., ax=ax, palette="Blues_d", showmeans=True,
+                meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."},
+                order=["cnn-persi", "ols-persi", "cnn-ols"])
+    ax.axhline(y=0, color="grey", linewidth=.5)
+    ax.set(ylabel="skill score", xlabel="competing models", title="summary of all stations",
+           ylim=(np.min([0, helpers.float_round(data.min()[2], 2) - 0.1]), helpers.float_round(data.max()[2], 2) + 0.1))
+    handles, _ = ax.get_legend_handles_labels()
+    ax.legend(handles, labels)
+    plt.tight_layout()
+    plot_name = os.path.join(plot_folder, f"skill_score_competitive_{model_setup}.pdf")
+    logging.debug(f"... save plot to {plot_name}")
+    plt.savefig(plot_name, dpi=500)
+    plt.close()
diff --git a/src/run_modules/README.md b/src/run_modules/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..331492209136893934038d6049f274ae19495aec
--- /dev/null
+++ b/src/run_modules/README.md
@@ -0,0 +1,96 @@
+# On data handling
+
+This readme declares which function loads which data and where it is stored.
+
+## experiment setup
+
+**data_path** is the destination where all downloaded data is locally stored. Data is downloaded from TOARDB either using 
+the JOIN interface or a direct connection to the underlying PostgreSQL DB. If data was already downloaded, no new 
+download will be started. Missing data will be downloaded on the fly and saved in data_path. 
+
+`data_path = src.helpers.prepare_host()`
+ 
+ Current implementation leads to following paths:
+ 
+ | hostname | path | comment |
+ | --- | --- | --- |
+ | ZAM144 | `/home/{user}/Data/toar_daily/` | notebook Felix |
+ | zam347 | `/home/{user}/Data/toar_daily/` | ESDE server |
+ | linux-gzsx | `/home/{user}/machinelearningtools/data/toar_daily/` | notebook Lukas |
+ | jureca | `/p/project/cjjsc42/{user}/DATA/toar_daily/` | JURECA |
+ | juwels | `/p/home/jusers/{user}/juwels/intelliaq/DATA/toar_daily/` | JUWELS |
+ | runner-6HmDp9Qd-project-2411-concurrent | `/home/{user}/machinelearningtools/data/toar_daily/` | gitlab-runner |
+
+**experiment_path** is the root folder in that all results from the experiment are saved. For each experiment there should
+be distinct folder. Experiment path is can be set in ExperimentSetup. `experiment_date` can be set by parser_args and 
+`experiment_path` (this argument is not the same as the internal stored experiment_path!) as args. The *experiment_path*
+is the combination of both given arguments `os.path.join(experiment_path, f"{experiment_date}_network")`. Inside this
+folder, several subfolders are created in the course of the program.
+
+```bash
+data_path
+    <station1>_<var1>_<var2>_..._<varx>.nc
+    <station1>_<var1>_<var2>_..._<varx>_meta.csv
+    <station2>_<var1>_<var2>_..._<varx>.nc
+    <station2>_<var1>_<var2>_..._<varx>_meta.csv
+------
+experiment_path
+|   history.json
+|   history_lr.json
+|   <experiment_name>_model.pdf
+|   <experiment_name>_model-best.h5
+|   <experiment_name>_my_model.h5
+├─── forecasts 
+|       forecasts_<station1>_test.nc
+|       forecasts_<station2>_test.nc
+|       ...
+└─── plots
+        conditional_quantiles_cali-ref_plot.pdf
+        conditional_quantiles_like-bas_plot.pdf
+        test_monthly_box.pdf
+        test_map_plot.pdf
+        <experiment_name>_history_learning_rate.pdf
+        <experiment_name>_history_loss.pdf
+        <experiment_name>_history_main_loss.pdf
+        <experiment_name>_history_main_mse.pdf
+        ...
+
+```
+
+**plot_path** includes all created plots. If not given, this is create into the experiment_path by default (as shown in 
+the folder structure above). Can be customised by `ExperimentSetup(plot_path=<path>)`.
+
+**forecast_path** is the place, where all forecasts are stored as netcdf file. Each file consists exactly one single
+station. If not given, this is create into the experiment_path by default (as shown in the folder structure above). Can 
+be customised by `ExperimentSetup(forecast_path=<path>)`.
+
+## pre-processing
+
+Each requested station is check whether it is already included in *data_path*. The files all following the naming 
+convention `<station_name>_<sorted_list_of_all_variables_split_by_underscore>.nc`. E.g. the station *DEBW013* with the
+variables cloudcover, NO, NO2, O3 and temp (all TOARDB short names) is saved as `DEBW013_cloudcover_no_no2_o3_temp.nc`, 
+whereas the same station with only O3 and temperature becomes `DEBW013_o3_temp.nc`. Although all data of the latter file
+is potentially also included in the former file, the program will always download the data specification for new and 
+save this data into a new file. Only if the exactly fitting file is available locally, no data is downloaded. **NOTE**:
+There is no check on data time range, only the name is compared. Set `overwrite_local_data=True` 
+in `experiment_setup.py` to overwrite local data by downloading new data.
+
+## model setup
+
+**checkpoint** is created inside *experiment_path* as `<experiment_name>_model-best.h5`.
+
+The architecture of the model is plotted into *experiment_path* as `<experiment_name>_model.pdf` 
+
+## training
+
+Training metrics are saved in `history.json` and `history_lr.json`.
+
+Best model is saved in `<experiment_name>_my_model.h5`.
+
+## post-processing
+
+During the *make_forecast* method, all calculated forecasts of the neural network, persistence, ordinary least squared
+and the target values with the regarding lead time are saved locally inside *forecast_path* as 
+`forecasts_<station>_test.nc`.
+
+All plots are created inside *plot_path*.
\ No newline at end of file
diff --git a/src/run_modules/experiment_setup.py b/src/run_modules/experiment_setup.py
index b0c628b8966aa7cfedbf389552a9893acca297fa..8542f57a49e7d197d4118a23e6dbcefbb95e793e 100644
--- a/src/run_modules/experiment_setup.py
+++ b/src/run_modules/experiment_setup.py
@@ -33,7 +33,7 @@ class ExperimentSetup(RunEnvironment):
                  window_lead_time=None, dimensions=None, interpolate_dim=None, interpolate_method=None,
                  limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None,
                  test_end=None, use_all_stations_on_all_data_sets=True, trainable=False, fraction_of_train=None,
-                 experiment_path=None, plot_path=None, forecast_path=None):
+                 experiment_path=None, plot_path=None, forecast_path=None, overwrite_local_data=None):
 
         # create run framework
         super().__init__()
@@ -70,6 +70,7 @@ class ExperimentSetup(RunEnvironment):
         self._set_param("start", start, default="1997-01-01", scope="general")
         self._set_param("end", end, default="2017-12-31", scope="general")
         self._set_param("window_history_size", window_history_size, default=13)
+        self._set_param("overwrite_local_data", overwrite_local_data, default=False, scope="general.preprocessing")
 
         # target
         self._set_param("target_var", target_var, default="o3")
@@ -94,6 +95,10 @@ class ExperimentSetup(RunEnvironment):
         self._set_param("start", test_start, default="2010-01-01", scope="general.test")
         self._set_param("end", test_end, default="2017-12-31", scope="general.test")
 
+        # train_val parameters
+        self._set_param("start", self.data_store.get("start", "general.train"), scope="general.train_val")
+        self._set_param("end", self.data_store.get("end", "general.val"), scope="general.train_val")
+
         # use all stations on all data sets (train, val, test)
         self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, default=True)
 
diff --git a/src/run_modules/post_processing.py b/src/run_modules/post_processing.py
index e5739e5f15e1c2f20758e388b3493c28f577bb9a..ecbc415f8b463690dcc75d7731d8ec859d74da33 100644
--- a/src/run_modules/post_processing.py
+++ b/src/run_modules/post_processing.py
@@ -8,35 +8,71 @@ import os
 import numpy as np
 import pandas as pd
 import xarray as xr
-import statsmodels.api as sm
+import keras
 
 from src.run_modules.run_environment import RunEnvironment
 from src.data_handling.data_distributor import Distributor
+from src.data_handling.data_generator import DataGenerator
 from src.model_modules.linear_model import OrdinaryLeastSquaredModel
 from src import statistics
-from src import helpers
-from src.helpers import TimeTracking
+from src.plotting.postprocessing_plotting import plot_monthly_summary, plot_station_map, plot_conditional_quantiles, \
+    plot_climatological_skill_score, plot_competitive_skill_score
+from src.datastore import NameNotFoundInDataStore
 
 
 class PostProcessing(RunEnvironment):
 
     def __init__(self):
         super().__init__()
-        self.model = self.data_store.get("best_model", "general")
+        self.model: keras.Model = self._load_model()
         self.ols_model = None
-        self.batch_size = self.data_store.get("batch_size", "general.model")
-        self.test_data = self.data_store.get("generator", "general.test")
+        self.batch_size: int = self.data_store.get("batch_size", "general.model")
+        self.test_data: DataGenerator = self.data_store.get("generator", "general.test")
         self.test_data_distributed = Distributor(self.test_data, self.model, self.batch_size)
-        self.train_data = self.data_store.get("generator", "general.train")
+        self.train_data: DataGenerator = self.data_store.get("generator", "general.train")
+        self.train_val_data: DataGenerator = self.data_store.get("generator", "general.train_val")
+        self.plot_path: str = self.data_store.get("plot_path", "general")
+        self.skill_scores = None
         self._run()
 
     def _run(self):
         self.train_ols_model()
         preds_for_all_stations = self.make_prediction()
+        self.skill_scores = self.calculate_skill_scores()
+        self.plot()
+
+    def _load_model(self):
+        try:
+            model = self.data_store.get("best_model", "general")
+        except NameNotFoundInDataStore:
+            logging.info("no model saved in data store. trying to load model from experiment")
+            path = self.data_store.get("experiment_path", "general")
+            name = f"{self.data_store.get('experiment_name', 'general')}_my_model.h5"
+            model_name = os.path.join(path, name)
+            model = keras.models.load_model(model_name)
+        return model
+
+    def plot(self):
+        logging.debug("Run plotting routines...")
+        path = self.data_store.get("forecast_path", "general")
+        target_var = self.data_store.get("target_var", "general")
+
+        plot_conditional_quantiles(self.test_data.stations, pred_name="CNN", ref_name="orig",
+                                   forecast_path=path, plot_name_affix="cali-ref", plot_folder=self.plot_path)
+        plot_conditional_quantiles(self.test_data.stations, pred_name="orig", ref_name="CNN",
+                                   forecast_path=path, plot_name_affix="like-bas", plot_folder=self.plot_path)
+        plot_station_map(generators={'b': self.test_data}, plot_folder=self.plot_path)
+        plot_monthly_summary(self.test_data.stations, path, r"forecasts_%s_test.nc", target_var,
+                             plot_folder=self.plot_path)
+        #
+        plot_climatological_skill_score(self.skill_scores[1], plot_folder=self.plot_path, model_setup="CNN")
+        plot_climatological_skill_score(self.skill_scores[1], plot_folder=self.plot_path, score_only=False,
+                                        extra_name_tag="all_terms_", model_setup="CNN")
+        plot_competitive_skill_score(self.skill_scores[0], plot_folder=self.plot_path, model_setup="CNN")
 
     def calculate_test_score(self):
-        test_score = self.model.evaluate(generator=self.test_data_distributed.distribute_on_batches(),
-                                         use_multiprocessing=False, verbose=0, steps=1)
+        test_score = self.model.evaluate_generator(generator=self.test_data_distributed.distribute_on_batches(),
+                                                   use_multiprocessing=False, verbose=0, steps=1)
         logging.info(f"test score = {test_score}")
         self._save_test_score(test_score)
 
@@ -50,6 +86,7 @@ class PostProcessing(RunEnvironment):
         self.ols_model = OrdinaryLeastSquaredModel(self.train_data)
 
     def make_prediction(self, freq="1D"):
+        logging.debug("start make_prediction")
         nn_prediction_all_stations = []
         for i, v in enumerate(self.test_data):
             data = self.test_data.get_data_generator(i)
@@ -91,8 +128,8 @@ class PostProcessing(RunEnvironment):
         return nn_prediction_all_stations
 
     @staticmethod
-    def _create_orig_forecast(data, placeholder, mean, std, transformation_method):
-        return statistics.apply_inverse_transformation(data.label, mean, std, transformation_method)
+    def _create_orig_forecast(data, _, mean, std, transformation_method):
+        return statistics.apply_inverse_transformation(data.label.copy(), mean, std, transformation_method)
 
     def _create_ols_forecast(self, input_data, ols_prediction, mean, std, transformation_method):
         tmp_ols = self.ols_model.predict(input_data)
@@ -132,7 +169,7 @@ class PostProcessing(RunEnvironment):
 
     @staticmethod
     def _create_empty_prediction_arrays(generator, count=1):
-        return [generator.label.copy()] * count
+        return [generator.label.copy() for _ in range(count)]
 
     @staticmethod
     def create_fullindex(df, freq):
@@ -146,7 +183,7 @@ class PostProcessing(RunEnvironment):
         elif isinstance(df, xr.DataArray):
             earliest = df.index[0].values
             latest = df.index[-1].values
-        elif isinstance(df, pd.core.indexes.datetimes.DatetimeIndex):
+        elif isinstance(df, pd.DatetimeIndex):
             earliest = df[0]
             latest = df[-1]
         else:
@@ -178,4 +215,27 @@ class PostProcessing(RunEnvironment):
                 res.loc[match_index, :, k] = v.sel({'datetime': match_index}).squeeze('Stations').transpose()
         return res
 
-
+    def _get_external_data(self, station):
+        try:
+            data = self.train_val_data.get_data_generator(station)
+            mean, std, transformation_method = data.get_transformation_information(variable='o3')
+            external_data = self._create_orig_forecast(data, None, mean, std, transformation_method)
+            external_data = external_data.squeeze("Stations").sel(window=1).drop(["window", "Stations", "variables"])
+            return external_data.rename({'datetime': 'index'})
+        except KeyError:
+            return None
+
+    def calculate_skill_scores(self):
+        path = self.data_store.get("forecast_path", "general")
+        window_lead_time = self.data_store.get("window_lead_time", "general")
+        skill_score_general = {}
+        skill_score_climatological = {}
+        for station in self.test_data.stations:
+            file = os.path.join(path, f"forecasts_{station}_test.nc")
+            data = xr.open_dataarray(file)
+            skill_score = statistics.SkillScores(data)
+            external_data = self._get_external_data(station)
+            skill_score_general[station] = skill_score.skill_scores(window_lead_time)
+            skill_score_climatological[station] = skill_score.climatological_skill_scores(external_data,
+                                                                                          window_lead_time)
+        return skill_score_general, skill_score_climatological
diff --git a/src/run_modules/pre_processing.py b/src/run_modules/pre_processing.py
index 9faae7943eb4cd4adb6572c524f5d1795aaa65fe..dd0def055afc00202f9139638586807b0d2b832b 100644
--- a/src/run_modules/pre_processing.py
+++ b/src/run_modules/pre_processing.py
@@ -12,7 +12,8 @@ from src.join import EmptyQueryResult
 
 
 DEFAULT_ARGS_LIST = ["data_path", "network", "stations", "variables", "interpolate_dim", "target_dim", "target_var"]
-DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var", "station_type"]
+DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var",
+                       "station_type", "overwrite_local_data", "start", "end"]
 
 
 class PreProcessing(RunEnvironment):
@@ -33,11 +34,12 @@ class PreProcessing(RunEnvironment):
         self._run()
 
     def _run(self):
-        args = self.data_store.create_args_dict(DEFAULT_ARGS_LIST)
-        kwargs = self.data_store.create_args_dict(DEFAULT_KWARGS_LIST)
+        args = self.data_store.create_args_dict(DEFAULT_ARGS_LIST, scope="general.preprocessing")
+        kwargs = self.data_store.create_args_dict(DEFAULT_KWARGS_LIST, scope="general.preprocessing")
         valid_stations = self.check_valid_stations(args, kwargs, self.data_store.get("stations", "general"))
         self.data_store.set("stations", valid_stations, "general")
         self.split_train_val_test()
+        self.report_pre_processing()
 
     def report_pre_processing(self):
         logging.info(20 * '##')
@@ -55,26 +57,28 @@ class PreProcessing(RunEnvironment):
     def split_train_val_test(self):
         fraction_of_training = self.data_store.get("fraction_of_training", "general")
         stations = self.data_store.get("stations", "general")
-        train_index, val_index, test_index = self.split_set_indices(len(stations), fraction_of_training)
-        for (ind, scope) in zip([train_index, val_index, test_index], ["train", "val", "test"]):
+        train_index, val_index, test_index, train_val_index = self.split_set_indices(len(stations), fraction_of_training)
+        subset_names = ["train", "val", "test", "train_val"]
+        for (ind, scope) in zip([train_index, val_index, test_index, train_val_index], subset_names):
             self.create_set_split(ind, scope)
 
     @staticmethod
-    def split_set_indices(total_length: int, fraction: float) -> Tuple[slice, slice, slice]:
+    def split_set_indices(total_length: int, fraction: float) -> Tuple[slice, slice, slice, slice]:
         """
         create the training, validation and test subset slice indices for given total_length. The test data consists on
         (1-fraction) of total_length (fraction*len:end). Train and validation data therefore are made from fraction of
         total_length (0:fraction*len). Train and validation data is split by the factor 0.8 for train and 0.2 for
-        validation.
+        validation. In addition, split_set_indices returns also the combination of training and validation subset.
         :param total_length: list with all objects to split
         :param fraction: ratio between test and union of train/val data
-        :return: slices for each subset in the order: train, val, test
+        :return: slices for each subset in the order: train, val, test, train_val
         """
         pos_test_split = int(total_length * fraction)
         train_index = slice(0, int(pos_test_split * 0.8))
         val_index = slice(int(pos_test_split * 0.8), pos_test_split)
         test_index = slice(pos_test_split, total_length)
-        return train_index, val_index, test_index
+        train_val_index = slice(0, pos_test_split)
+        return train_index, val_index, test_index, train_val_index
 
     def create_set_split(self, index_list, set_name):
         scope = f"general.{set_name}"
diff --git a/src/run_modules/training.py b/src/run_modules/training.py
index fee1b38b97d8f4649730f0f7110cd3801ba7db33..96936ce124e05251af483e758401d833a44531f4 100644
--- a/src/run_modules/training.py
+++ b/src/run_modules/training.py
@@ -16,7 +16,7 @@ class Training(RunEnvironment):
 
     def __init__(self):
         super().__init__()
-        self.model = self.data_store.get("model", "general.model")
+        self.model: keras.Model = self.data_store.get("model", "general.model")
         self.train_set = None
         self.val_set = None
         self.test_set = None
diff --git a/src/statistics.py b/src/statistics.py
index 6f34187e32949910df5762a45d868701920b610f..fd8491748f7ebd30d7056af9cc1ce162c5743881 100644
--- a/src/statistics.py
+++ b/src/statistics.py
@@ -1,6 +1,11 @@
-__author__ = 'Lukas Leufen'
+from scipy import stats
+
+from src.run_modules.run_environment import RunEnvironment
+
+__author__ = 'Lukas Leufen, Felix Kleinert'
 __date__ = '2019-10-23'
 
+import numpy as np
 import xarray as xr
 import pandas as pd
 from typing import Union, Tuple
@@ -70,3 +75,145 @@ def centre_inverse(data: Data, mean: Data) -> Data:
     :return:
     """
     return data + mean
+
+
+def mean_squared_error(a, b):
+    return np.square(a - b).mean()
+
+
+class SkillScores(RunEnvironment):
+
+    def __init__(self, internal_data):
+        super().__init__()
+        self.internal_data = internal_data
+
+    def skill_scores(self, window_lead_time):
+        ahead_names = list(range(1, window_lead_time + 1))
+        skill_score = pd.DataFrame(index=['cnn-persi', 'ols-persi', 'cnn-ols'])
+        for iahead in ahead_names:
+            data = self.internal_data.sel(ahead=iahead)
+            skill_score[iahead] = [self.general_skill_score(data, forecast_name="CNN", reference_name="persi"),
+                                   self.general_skill_score(data, forecast_name="OLS", reference_name="persi"),
+                                   self.general_skill_score(data, forecast_name="CNN", reference_name="OLS")]
+        return skill_score
+
+    def climatological_skill_scores(self, external_data, window_lead_time):
+        ahead_names = list(range(1, window_lead_time + 1))
+
+        all_terms = ['AI', 'AII', 'AIII', 'AIV', 'BI', 'BII', 'BIV', 'CI', 'CIV', 'CASE I', 'CASE II', 'CASE III',
+                     'CASE IV']
+        skill_score = xr.DataArray(np.full((len(all_terms), len(ahead_names)), np.nan), coords=[all_terms, ahead_names],
+                                   dims=['terms', 'ahead'])
+
+        for iahead in ahead_names:
+
+            data = self.internal_data.sel(ahead=iahead)
+
+            skill_score.loc[["CASE I", "AI", "BI", "CI"], iahead] = np.stack(self._climatological_skill_score(
+                data, mu_type=1, forecast_name="CNN").values.flatten())
+
+            skill_score.loc[["CASE II", "AII", "BII"], iahead] = np.stack(self._climatological_skill_score(
+                data, mu_type=2, forecast_name="CNN").values.flatten())
+
+            if external_data is not None:
+                skill_score.loc[["CASE III", "AIII"], iahead] = np.stack(self._climatological_skill_score(
+                    data, mu_type=3, forecast_name="CNN",
+                    external_data=external_data).values.flatten())
+
+                skill_score.loc[["CASE IV", "AIV", "BIV", "CIV"], iahead] = np.stack(self._climatological_skill_score(
+                    data, mu_type=4, forecast_name="CNN",
+                    external_data=external_data).values.flatten())
+
+        return skill_score
+
+    def _climatological_skill_score(self, data, mu_type=1, observation_name="orig", forecast_name="CNN", external_data=None):
+        kwargs = {"external_data": external_data} if external_data is not None else {}
+        return self.__getattribute__(f"skill_score_mu_case_{mu_type}")(data, observation_name, forecast_name, **kwargs)
+
+    @staticmethod
+    def general_skill_score(data, observation_name="orig", forecast_name="CNN", reference_name="persi"):
+        data = data.dropna("index")
+        observation = data.sel(type=observation_name)
+        forecast = data.sel(type=forecast_name)
+        reference = data.sel(type=reference_name)
+        mse = statistics.mean_squared_error
+        skill_score = 1 - mse(observation, forecast) / mse(observation, reference)
+        return skill_score.values
+
+    @staticmethod
+    def skill_score_pre_calculations(data, observation_name, forecast_name):
+
+        data = data.loc[..., [observation_name, forecast_name]].drop("ahead")
+        data = data.dropna("index")
+
+        mean = data.mean("index")
+        sigma = np.sqrt(data.var("index"))
+        # r, p = stats.spearmanr(data.loc[..., [forecast_name, observation_name]])
+        r, p = stats.pearsonr(data.loc[..., forecast_name], data.loc[..., observation_name])
+
+        AI = np.array(r ** 2)
+        BI = ((r - (sigma.loc[..., forecast_name] / sigma.loc[..., observation_name])) ** 2).values
+        CI = (((mean.loc[..., forecast_name] - mean.loc[..., observation_name]) / sigma.loc[
+            ..., observation_name]) ** 2).values
+
+        suffix = {"mean": mean, "sigma": sigma, "r": r, "p": p}
+        return AI, BI, CI, data, suffix
+
+    def skill_score_mu_case_1(self, data, observation_name="orig", forecast_name="CNN"):
+        AI, BI, CI, data, _ = self.skill_score_pre_calculations(data, observation_name, forecast_name)
+        skill_score = np.array(AI - BI - CI)
+        return pd.DataFrame({"skill_score": [skill_score], "AI": [AI], "BI": [BI], "CI": [CI]}).to_xarray().to_array()
+
+    def skill_score_mu_case_2(self, data, observation_name="orig", forecast_name="CNN"):
+        AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
+        monthly_mean = self.create_monthly_mean_from_daily_data(data)
+        data = xr.concat([data, monthly_mean], dim="type")
+        sigma = suffix["sigma"]
+        sigma_monthly = np.sqrt(monthly_mean.var())
+        # r, p = stats.spearmanr(data.loc[..., [observation_name, observation_name + "X"]])
+        r, p = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"])
+        AII = np.array(r ** 2)
+        BII = ((r - sigma_monthly / sigma.loc[observation_name]) ** 2).values
+        skill_score = np.array((AI - BI - CI - AII + BII) / (1 - AII + BII))
+        return pd.DataFrame({"skill_score": [skill_score], "AII": [AII], "BII": [BII]}).to_xarray().to_array()
+
+    def skill_score_mu_case_3(self, data, observation_name="orig", forecast_name="CNN", external_data=None):
+        AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
+        mean, sigma = suffix["mean"], suffix["sigma"]
+        AIII = (((external_data.mean().values - mean.loc[observation_name]) / sigma.loc[observation_name])**2).values
+        skill_score = np.array((AI - BI - CI + AIII) / 1 + AIII)
+        return pd.DataFrame({"skill_score": [skill_score], "AIII": [AIII]}).to_xarray().to_array()
+
+    def skill_score_mu_case_4(self, data, observation_name="orig", forecast_name="CNN", external_data=None):
+        AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
+        monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, columns=data.type.values, index=data.index)
+        data = xr.concat([data, monthly_mean_external], dim="type")
+        mean, sigma = suffix["mean"], suffix["sigma"]
+        monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, columns=data.type.values)
+        mean_external = monthly_mean_external.mean()
+        sigma_external = np.sqrt(monthly_mean_external.var())
+
+        # r_mu, p_mu = stats.spearmanr(data.loc[..., [observation_name, observation_name+'X']])
+        r_mu, p_mu = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"])
+
+        AIV = np.array(r_mu**2)
+        BIV = ((r_mu - sigma_external / sigma.loc[observation_name])**2).values
+        CIV = (((mean_external - mean.loc[observation_name]) / sigma.loc[observation_name])**2).values
+        skill_score = np.array((AI - BI - CI - AIV + BIV + CIV) / (1 - AIV + BIV + CIV))
+        return pd.DataFrame({"skill_score": [skill_score], "AIV": [AIV], "BIV": [BIV], "CIV": CIV}).to_xarray().to_array()
+
+    @staticmethod
+    def create_monthly_mean_from_daily_data(data, columns=None, index=None):
+        if columns is None:
+            columns = data.type.values
+        if index is None:
+            index = data.index
+        coordinates = [index, [v + "X" for v in list(columns)]]
+        empty_data = np.full((len(index), len(columns)), np.nan)
+        monthly_mean = xr.DataArray(empty_data, coords=coordinates, dims=["index", "type"])
+        mu = data.groupby("index.month").mean()
+
+        for month in mu.month:
+            monthly_mean[monthly_mean.index.dt.month == month, :] = mu[mu.month == month].values
+
+        return monthly_mean
\ No newline at end of file
diff --git a/test/test_helpers.py b/test/test_helpers.py
index ce5d28a63d63dc4a793e6e07c60f95cb411ae97e..e98a46fad6365a3a05ab28c9d118a119e35ff86a 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -189,3 +189,55 @@ class TestSetExperimentName:
     def test_set_experiment_from_sys(self):
         exp_name, _ = set_experiment_name(experiment_date="2019-11-14")
         assert exp_name == "2019-11-14_network"
+
+
+class TestPytestRegex:
+
+    @pytest.fixture
+    def regex(self):
+        return PyTestRegex("teststring")
+
+    def test_pytest_regex_init(self, regex):
+        assert regex._regex.pattern == "teststring"
+
+    def test_pytest_regex_eq(self, regex):
+        assert regex == "teststringabcd"
+        assert regex != "teststgabcd"
+
+    def test_pytest_regex_repr(self, regex):
+        assert regex.__repr__() == "teststring"
+
+
+class TestDictToXarray:
+
+    def test_dict_to_xarray(self):
+        array1 = xr.DataArray(np.random.randn(2, 3), dims=('x', 'y'), coords={'x': [10, 20]})
+        array2 = xr.DataArray(np.random.randn(2, 3), dims=('x', 'y'), coords={'x': [10, 20]})
+        d = {"number1": array1, "number2": array2}
+        res = dict_to_xarray(d, "merge_dim")
+        assert type(res) == xr.DataArray
+        assert sorted(list(res.coords)) == ["merge_dim", "x"]
+        assert res.shape == (2, 2, 3)
+
+
+class TestFloatRound:
+
+    def test_float_round_ceil(self):
+        assert float_round(4.6) == 5
+        assert float_round(239.3992) == 240
+
+    def test_float_round_decimals(self):
+        assert float_round(23.0091, 2) == 23.01
+        assert float_round(23.1091, 3) == 23.11
+
+    def test_float_round_type(self):
+        assert float_round(34.9221, 2, math.floor) == 34.92
+        assert float_round(34.9221, 0, math.floor) == 34.
+        assert float_round(34.9221, 2, round) == 34.92
+        assert float_round(34.9221, 0, round) == 35.
+
+    def test_float_round_negative(self):
+        assert float_round(-34.9221, 2, math.floor) == -34.93
+        assert float_round(-34.9221, 0, math.floor) == -35.
+        assert float_round(-34.9221, 2) == -34.92
+        assert float_round(-34.9221, 0) == -34.