diff --git a/src/run_modules/post_processing.py b/src/run_modules/post_processing.py
index fa16307298bbc64b51e6532a139085ecd8b1e7b0..9627d7a5b0db34a7bc1621727a419808c4f2f323 100644
--- a/src/run_modules/post_processing.py
+++ b/src/run_modules/post_processing.py
@@ -8,18 +8,15 @@ import os
 import numpy as np
 import pandas as pd
 import xarray as xr
-import statsmodels.api as sm
 import keras
-from scipy import stats
 
 from src.run_modules.run_environment import RunEnvironment
 from src.data_handling.data_distributor import Distributor
 from src.data_handling.data_generator import DataGenerator
 from src.model_modules.linear_model import OrdinaryLeastSquaredModel
 from src import statistics
-from src import helpers
-from src.helpers import TimeTracking
-from src.plotting.postprocessing_plotting import plot_monthly_summary, plot_station_map, plot_conditional_quantiles, plot_climatological_skill_score, plot_competitive_skill_score
+from src.plotting.postprocessing_plotting import plot_monthly_summary, plot_station_map, plot_conditional_quantiles, \
+    plot_climatological_skill_score, plot_competitive_skill_score
 from src.datastore import NameNotFoundInDataStore
 
 
@@ -41,7 +38,7 @@ class PostProcessing(RunEnvironment):
     def _run(self):
         self.train_ols_model()
         preds_for_all_stations = self.make_prediction()
-        self.skill_scores = self.calculate_skill_scores()  #TODO: stopped here, continue with plot routine. Think about if skill score should be saved locally and loaded for plotting or if self.skill_scores should be used
+        self.skill_scores = self.calculate_skill_scores()
         self.plot()
 
     def _load_model(self):
@@ -60,19 +57,19 @@ class PostProcessing(RunEnvironment):
         path = self.data_store.get("forecast_path", "general")
         target_var = self.data_store.get("target_var", "general")
 
-        plot_conditional_quantiles(self.test_data.stations, plot_folder=self.plot_path, pred_name="CNN", ref_name="orig",
-                                   forecast_path=path, plot_name_affix="cali-ref")
-        plot_conditional_quantiles(self.test_data.stations, plot_folder=self.plot_path, pred_name="orig", ref_name="CNN",
-                                   forecast_path=path, plot_name_affix="like-bas")
+        plot_conditional_quantiles(self.test_data.stations, pred_name="CNN", ref_name="orig",
+                                   forecast_path=path, plot_name_affix="cali-ref", plot_folder=self.plot_path)
+        plot_conditional_quantiles(self.test_data.stations, pred_name="orig", ref_name="CNN",
+                                   forecast_path=path, plot_name_affix="like-bas", plot_folder=self.plot_path)
         plot_station_map(generators={'b': self.test_data}, plot_folder=self.plot_path)
         plot_monthly_summary(self.test_data.stations, path, r"forecasts_%s_test.nc", target_var,
                              plot_folder=self.plot_path)
         #
         plot_climatological_skill_score(self.skill_scores[1], plot_folder=self.plot_path, model_setup="CNN")
-        plot_climatological_skill_score(self.skill_scores[1], plot_folder=self.plot_path, score_only=False, extra_name_tag="all_terms_", model_setup="CNN")
+        plot_climatological_skill_score(self.skill_scores[1], plot_folder=self.plot_path, score_only=False,
+                                        extra_name_tag="all_terms_", model_setup="CNN")
         plot_competitive_skill_score(self.skill_scores[0], plot_folder=self.plot_path, model_setup="CNN")
 
-
     def calculate_test_score(self):
         test_score = self.model.evaluate_generator(generator=self.test_data_distributed.distribute_on_batches(),
                                                    use_multiprocessing=False, verbose=0, steps=1)
@@ -170,7 +167,7 @@ class PostProcessing(RunEnvironment):
         elif isinstance(df, xr.DataArray):
             earliest = df.index[0].values
             latest = df.index[-1].values
-        elif isinstance(df, pd.core.indexes.datetimes.DatetimeIndex):
+        elif isinstance(df, pd.DatetimeIndex):
             earliest = df[0]
             latest = df[-1]
         else:
@@ -212,154 +209,17 @@ class PostProcessing(RunEnvironment):
         except KeyError:
             return None
 
-    def calculate_skill_scores(self, threshold=60):
+    def calculate_skill_scores(self):
         path = self.data_store.get("forecast_path", "general")
         window_lead_time = self.data_store.get("window_lead_time", "general")
         skill_score_general = {}
         skill_score_climatological = {}
-        for station in self.test_data.stations:  # TODO: replace this by a more general approach to also calculate on train/val
+        for station in self.test_data.stations:
             file = os.path.join(path, f"forecasts_{station}_test.nc")
             data = xr.open_dataarray(file)
-            ss = SkillScores(data)
+            skill_score = statistics.SkillScores(data)
             external_data = self._get_external_data(station)
-            skill_score_general[station] = ss.skill_scores(window_lead_time)
-            skill_score_climatological[station] = ss.climatological_skill_scores(external_data, window_lead_time)
+            skill_score_general[station] = skill_score.skill_scores(window_lead_time)
+            skill_score_climatological[station] = skill_score.climatological_skill_scores(external_data,
+                                                                                          window_lead_time)
         return skill_score_general, skill_score_climatological
-
-
-class SkillScores(RunEnvironment):
-
-    def __init__(self, internal_data):
-        super().__init__()
-        self.internal_data = internal_data
-
-    def skill_scores(self, window_lead_time):
-        ahead_names = list(range(1, window_lead_time + 1))
-        skill_score = pd.DataFrame(index=['cnn-persi', 'ols-persi', 'cnn-ols'])
-        for iahead in ahead_names:
-            data = self.internal_data.sel(ahead=iahead)
-            skill_score[iahead] = [self.general_skill_score(data, forecast_name="CNN", reference_name="persi"),
-                                   self.general_skill_score(data, forecast_name="OLS", reference_name="persi"),
-                                   self.general_skill_score(data, forecast_name="CNN", reference_name="OLS")]
-        return skill_score
-
-    def climatological_skill_scores(self, external_data, window_lead_time):
-        ahead_names = list(range(1, window_lead_time + 1))
-
-        all_terms = ['AI', 'AII', 'AIII', 'AIV', 'BI', 'BII', 'BIV', 'CI', 'CIV', 'CASE I', 'CASE II', 'CASE III',
-                     'CASE IV']
-        skill_score = xr.DataArray(np.full((len(all_terms), len(ahead_names)), np.nan), coords=[all_terms, ahead_names],
-                                   dims=['terms', 'ahead'])
-
-        for iahead in ahead_names:
-
-            data = self.internal_data.sel(ahead=iahead)
-
-            skill_score.loc[["CASE I", "AI", "BI", "CI"], iahead] = np.stack(self._climatological_skill_score(
-                data, mu_type=1, forecast_name="CNN").values.flatten())
-
-            skill_score.loc[["CASE II", "AII", "BII"], iahead] = np.stack(self._climatological_skill_score(
-                data, mu_type=2, forecast_name="CNN").values.flatten())
-
-            if external_data is not None:
-                skill_score.loc[["CASE III", "AIII"], iahead] = np.stack(self._climatological_skill_score(
-                    data, mu_type=3, forecast_name="CNN",
-                    external_data=external_data).values.flatten())
-
-                skill_score.loc[["CASE IV", "AIV", "BIV", "CIV"], iahead] = np.stack(self._climatological_skill_score(
-                    data, mu_type=4, forecast_name="CNN",
-                    external_data=external_data).values.flatten())
-
-        return skill_score
-
-    def _climatological_skill_score(self, data, mu_type=1, observation_name="orig", forecast_name="CNN", external_data=None):
-        kwargs = {"external_data": external_data} if external_data is not None else {}
-        return self.__getattribute__(f"skill_score_mu_case_{mu_type}")(data, observation_name, forecast_name, **kwargs)
-
-    @staticmethod
-    def general_skill_score(data, observation_name="orig", forecast_name="CNN", reference_name="persi"):
-        data = data.dropna("index")
-        observation = data.sel(type=observation_name)
-        forecast = data.sel(type=forecast_name)
-        reference = data.sel(type=reference_name)
-        mse = statistics.mean_squared_error
-        skill_score = 1 - mse(observation, forecast) / mse(observation, reference)
-        return skill_score.values
-
-    @staticmethod
-    def skill_score_pre_calculations(data, observation_name, forecast_name):
-
-        data = data.loc[..., [observation_name, forecast_name]].drop("ahead")
-        data = data.dropna("index")
-
-        mean = data.mean("index")
-        sigma = np.sqrt(data.var("index"))
-        # r, p = stats.spearmanr(data.loc[..., [forecast_name, observation_name]])
-        r, p = stats.pearsonr(data.loc[..., forecast_name], data.loc[..., observation_name])
-
-        AI = np.array(r ** 2)
-        BI = ((r - (sigma.loc[..., forecast_name] / sigma.loc[..., observation_name])) ** 2).values
-        CI = (((mean.loc[..., forecast_name] - mean.loc[..., observation_name]) / sigma.loc[
-            ..., observation_name]) ** 2).values
-
-        suffix = {"mean": mean, "sigma": sigma, "r": r, "p": p}
-        return AI, BI, CI, data, suffix
-
-    def skill_score_mu_case_1(self, data, observation_name="orig", forecast_name="CNN"):
-        AI, BI, CI, data, _ = self.skill_score_pre_calculations(data, observation_name, forecast_name)
-        skill_score = np.array(AI - BI - CI)
-        return pd.DataFrame({"skill_score": [skill_score], "AI": [AI], "BI": [BI], "CI": [CI]}).to_xarray().to_array()
-
-    def skill_score_mu_case_2(self, data, observation_name="orig", forecast_name="CNN"):
-        AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
-        monthly_mean = self.create_monthly_mean_from_daily_data(data)
-        data = xr.concat([data, monthly_mean], dim="type")
-        sigma = suffix["sigma"]
-        sigma_monthly = np.sqrt(monthly_mean.var())
-        # r, p = stats.spearmanr(data.loc[..., [observation_name, observation_name + "X"]])
-        r, p = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"])
-        AII = np.array(r ** 2)
-        BII = ((r - sigma_monthly / sigma.loc[observation_name]) ** 2).values
-        skill_score = np.array((AI - BI - CI - AII + BII) / (1 - AII + BII))
-        return pd.DataFrame({"skill_score": [skill_score], "AII": [AII], "BII": [BII]}).to_xarray().to_array()
-
-    def skill_score_mu_case_3(self, data, observation_name="orig", forecast_name="CNN", external_data=None):
-        AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
-        mean, sigma = suffix["mean"], suffix["sigma"]
-        AIII = (((external_data.mean().values - mean.loc[observation_name]) / sigma.loc[observation_name])**2).values
-        skill_score = np.array((AI - BI - CI + AIII) / 1 + AIII)
-        return pd.DataFrame({"skill_score": [skill_score], "AIII": [AIII]}).to_xarray().to_array()
-
-    def skill_score_mu_case_4(self, data, observation_name="orig", forecast_name="CNN", external_data=None):
-        AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
-        monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, columns=data.type.values, index=data.index)
-        data = xr.concat([data, monthly_mean_external], dim="type")
-        mean, sigma = suffix["mean"], suffix["sigma"]
-        monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, columns=data.type.values)
-        mean_external = monthly_mean_external.mean()
-        sigma_external = np.sqrt(monthly_mean_external.var())
-
-        # r_mu, p_mu = stats.spearmanr(data.loc[..., [observation_name, observation_name+'X']])
-        r_mu, p_mu = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"])
-
-        AIV = np.array(r_mu**2)
-        BIV = ((r_mu - sigma_external / sigma.loc[observation_name])**2).values
-        CIV = (((mean_external - mean.loc[observation_name]) / sigma.loc[observation_name])**2).values
-        skill_score = np.array((AI - BI - CI - AIV + BIV + CIV) / (1 - AIV + BIV + CIV))
-        return pd.DataFrame({"skill_score": [skill_score], "AIV": [AIV], "BIV": [BIV], "CIV": CIV}).to_xarray().to_array()
-
-    @staticmethod
-    def create_monthly_mean_from_daily_data(data, columns=None, index=None):
-        if columns is None:
-            columns = data.type.values
-        if index is None:
-            index = data.index
-        coordinates = [index, [v + "X" for v in list(columns)]]  # TODO
-        empty_data = np.full((len(index), len(columns)), np.nan)
-        monthly_mean = xr.DataArray(empty_data, coords=coordinates, dims=["index", "type"])
-        mu = data.groupby("index.month").mean()
-
-        for month in mu.month:
-            monthly_mean[monthly_mean.index.dt.month == month, :] = mu[mu.month == month].values
-
-        return monthly_mean
diff --git a/src/statistics.py b/src/statistics.py
index 7c3caf91fbdf41eaedf7712b6e2cee21492c4ee7..fd8491748f7ebd30d7056af9cc1ce162c5743881 100644
--- a/src/statistics.py
+++ b/src/statistics.py
@@ -1,4 +1,8 @@
-__author__ = 'Lukas Leufen'
+from scipy import stats
+
+from src.run_modules.run_environment import RunEnvironment
+
+__author__ = 'Lukas Leufen, Felix Kleinert'
 __date__ = '2019-10-23'
 
 import numpy as np
@@ -75,3 +79,141 @@ def centre_inverse(data: Data, mean: Data) -> Data:
 
 def mean_squared_error(a, b):
     return np.square(a - b).mean()
+
+
+class SkillScores(RunEnvironment):
+
+    def __init__(self, internal_data):
+        super().__init__()
+        self.internal_data = internal_data
+
+    def skill_scores(self, window_lead_time):
+        ahead_names = list(range(1, window_lead_time + 1))
+        skill_score = pd.DataFrame(index=['cnn-persi', 'ols-persi', 'cnn-ols'])
+        for iahead in ahead_names:
+            data = self.internal_data.sel(ahead=iahead)
+            skill_score[iahead] = [self.general_skill_score(data, forecast_name="CNN", reference_name="persi"),
+                                   self.general_skill_score(data, forecast_name="OLS", reference_name="persi"),
+                                   self.general_skill_score(data, forecast_name="CNN", reference_name="OLS")]
+        return skill_score
+
+    def climatological_skill_scores(self, external_data, window_lead_time):
+        ahead_names = list(range(1, window_lead_time + 1))
+
+        all_terms = ['AI', 'AII', 'AIII', 'AIV', 'BI', 'BII', 'BIV', 'CI', 'CIV', 'CASE I', 'CASE II', 'CASE III',
+                     'CASE IV']
+        skill_score = xr.DataArray(np.full((len(all_terms), len(ahead_names)), np.nan), coords=[all_terms, ahead_names],
+                                   dims=['terms', 'ahead'])
+
+        for iahead in ahead_names:
+
+            data = self.internal_data.sel(ahead=iahead)
+
+            skill_score.loc[["CASE I", "AI", "BI", "CI"], iahead] = np.stack(self._climatological_skill_score(
+                data, mu_type=1, forecast_name="CNN").values.flatten())
+
+            skill_score.loc[["CASE II", "AII", "BII"], iahead] = np.stack(self._climatological_skill_score(
+                data, mu_type=2, forecast_name="CNN").values.flatten())
+
+            if external_data is not None:
+                skill_score.loc[["CASE III", "AIII"], iahead] = np.stack(self._climatological_skill_score(
+                    data, mu_type=3, forecast_name="CNN",
+                    external_data=external_data).values.flatten())
+
+                skill_score.loc[["CASE IV", "AIV", "BIV", "CIV"], iahead] = np.stack(self._climatological_skill_score(
+                    data, mu_type=4, forecast_name="CNN",
+                    external_data=external_data).values.flatten())
+
+        return skill_score
+
+    def _climatological_skill_score(self, data, mu_type=1, observation_name="orig", forecast_name="CNN", external_data=None):
+        kwargs = {"external_data": external_data} if external_data is not None else {}
+        return self.__getattribute__(f"skill_score_mu_case_{mu_type}")(data, observation_name, forecast_name, **kwargs)
+
+    @staticmethod
+    def general_skill_score(data, observation_name="orig", forecast_name="CNN", reference_name="persi"):
+        data = data.dropna("index")
+        observation = data.sel(type=observation_name)
+        forecast = data.sel(type=forecast_name)
+        reference = data.sel(type=reference_name)
+        mse = statistics.mean_squared_error
+        skill_score = 1 - mse(observation, forecast) / mse(observation, reference)
+        return skill_score.values
+
+    @staticmethod
+    def skill_score_pre_calculations(data, observation_name, forecast_name):
+
+        data = data.loc[..., [observation_name, forecast_name]].drop("ahead")
+        data = data.dropna("index")
+
+        mean = data.mean("index")
+        sigma = np.sqrt(data.var("index"))
+        # r, p = stats.spearmanr(data.loc[..., [forecast_name, observation_name]])
+        r, p = stats.pearsonr(data.loc[..., forecast_name], data.loc[..., observation_name])
+
+        AI = np.array(r ** 2)
+        BI = ((r - (sigma.loc[..., forecast_name] / sigma.loc[..., observation_name])) ** 2).values
+        CI = (((mean.loc[..., forecast_name] - mean.loc[..., observation_name]) / sigma.loc[
+            ..., observation_name]) ** 2).values
+
+        suffix = {"mean": mean, "sigma": sigma, "r": r, "p": p}
+        return AI, BI, CI, data, suffix
+
+    def skill_score_mu_case_1(self, data, observation_name="orig", forecast_name="CNN"):
+        AI, BI, CI, data, _ = self.skill_score_pre_calculations(data, observation_name, forecast_name)
+        skill_score = np.array(AI - BI - CI)
+        return pd.DataFrame({"skill_score": [skill_score], "AI": [AI], "BI": [BI], "CI": [CI]}).to_xarray().to_array()
+
+    def skill_score_mu_case_2(self, data, observation_name="orig", forecast_name="CNN"):
+        AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
+        monthly_mean = self.create_monthly_mean_from_daily_data(data)
+        data = xr.concat([data, monthly_mean], dim="type")
+        sigma = suffix["sigma"]
+        sigma_monthly = np.sqrt(monthly_mean.var())
+        # r, p = stats.spearmanr(data.loc[..., [observation_name, observation_name + "X"]])
+        r, p = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"])
+        AII = np.array(r ** 2)
+        BII = ((r - sigma_monthly / sigma.loc[observation_name]) ** 2).values
+        skill_score = np.array((AI - BI - CI - AII + BII) / (1 - AII + BII))
+        return pd.DataFrame({"skill_score": [skill_score], "AII": [AII], "BII": [BII]}).to_xarray().to_array()
+
+    def skill_score_mu_case_3(self, data, observation_name="orig", forecast_name="CNN", external_data=None):
+        AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
+        mean, sigma = suffix["mean"], suffix["sigma"]
+        AIII = (((external_data.mean().values - mean.loc[observation_name]) / sigma.loc[observation_name])**2).values
+        skill_score = np.array((AI - BI - CI + AIII) / 1 + AIII)
+        return pd.DataFrame({"skill_score": [skill_score], "AIII": [AIII]}).to_xarray().to_array()
+
+    def skill_score_mu_case_4(self, data, observation_name="orig", forecast_name="CNN", external_data=None):
+        AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
+        monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, columns=data.type.values, index=data.index)
+        data = xr.concat([data, monthly_mean_external], dim="type")
+        mean, sigma = suffix["mean"], suffix["sigma"]
+        monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, columns=data.type.values)
+        mean_external = monthly_mean_external.mean()
+        sigma_external = np.sqrt(monthly_mean_external.var())
+
+        # r_mu, p_mu = stats.spearmanr(data.loc[..., [observation_name, observation_name+'X']])
+        r_mu, p_mu = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"])
+
+        AIV = np.array(r_mu**2)
+        BIV = ((r_mu - sigma_external / sigma.loc[observation_name])**2).values
+        CIV = (((mean_external - mean.loc[observation_name]) / sigma.loc[observation_name])**2).values
+        skill_score = np.array((AI - BI - CI - AIV + BIV + CIV) / (1 - AIV + BIV + CIV))
+        return pd.DataFrame({"skill_score": [skill_score], "AIV": [AIV], "BIV": [BIV], "CIV": CIV}).to_xarray().to_array()
+
+    @staticmethod
+    def create_monthly_mean_from_daily_data(data, columns=None, index=None):
+        if columns is None:
+            columns = data.type.values
+        if index is None:
+            index = data.index
+        coordinates = [index, [v + "X" for v in list(columns)]]
+        empty_data = np.full((len(index), len(columns)), np.nan)
+        monthly_mean = xr.DataArray(empty_data, coords=coordinates, dims=["index", "type"])
+        mu = data.groupby("index.month").mean()
+
+        for month in mu.month:
+            monthly_mean[monthly_mean.index.dt.month == month, :] = mu[mu.month == month].values
+
+        return monthly_mean
\ No newline at end of file