diff --git a/src/plotting/postprocessing_plotting.py b/src/plotting/postprocessing_plotting.py
index a41c636b5ab17d2039f7976fca625e9c8e11ce6e..b39de8e957a110121c0e8812608d32aad3431570 100644
--- a/src/plotting/postprocessing_plotting.py
+++ b/src/plotting/postprocessing_plotting.py
@@ -479,6 +479,76 @@ class PlotCompetitiveSkillScore(RunEnvironment):
         plt.close()
 
 
+class PlotBootstrapSkillScore(RunEnvironment):
+    """
+    Create plot of climatological skill score after Murphy (1988) as box plot over all stations. A forecast time step
+    (called "ahead") is separately shown to highlight the differences for each prediction time step. Either each single
+    term is plotted (score_only=False) or only the resulting scores CASE I to IV are displayed (score_only=True,
+    default). Y-axis is adjusted following the data and not hard coded. The plot is saved under plot_folder path with
+    name skill_score_clim_{extra_name_tag}{model_setup}.pdf and resolution of 500dpi.
+    """
+
+    def __init__(self, data: Dict, plot_folder: str = ".", model_setup: str = ""):
+        """
+        Sets attributes and create plot
+        :param data: dictionary with station names as keys and 2D xarrays as values, consist on axis ahead and terms.
+        :param plot_folder: path to save the plot (default: current directory)
+        :param model_setup: architecture type to specify plot name (default "CNN")
+        """
+        super().__init__()
+        self._labels = None
+        self._data = self._prepare_data(data)
+        self._plot(plot_folder, model_setup)
+
+    def _prepare_data(self, data: Dict) -> pd.DataFrame:
+        """
+        Shrink given data, if only scores are relevant. In any case, transform data to a plot friendly format. Also set
+        plot labels depending on the lead time dimensions.
+        :param data: dictionary with station names as keys and 2D xarrays as values
+        :return: pre-processed data set
+        """
+        data = helpers.dict_to_xarray(data, "station")
+        self._labels = [str(i) + "d" for i in data.coords["ahead"].values]
+        return data.to_dataframe("data").reset_index(level=[0, 1, 2])
+
+    def _label_add(self, score_only: bool):
+        """
+        Adds the phrase "terms and " if score_only is disabled or empty string (if score_only=True).
+        :param score_only: if false all terms are relevant, otherwise only CASE I to IV
+        :return: additional label
+        """
+        return "" if score_only else "terms and "
+
+    def _plot(self, plot_folder,  model_setup):
+        """
+        Main plot function to plot climatological skill score.
+        :param plot_folder: path to save the plot
+        :param model_setup: architecture type to specify plot name
+        """
+        fig, ax = plt.subplots()
+        sns.boxplot(x="boot_var", y="data", hue="ahead", data=self._data, ax=ax, whis=1., palette="Blues_d",
+                    showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
+        ax.axhline(y=0, color="grey", linewidth=.5)
+        ax.set(ylabel=f"skill score", xlabel="", title="summary of all stations")
+        handles, _ = ax.get_legend_handles_labels()
+        ax.legend(handles, self._labels)
+        plt.tight_layout()
+        self._save(plot_folder, model_setup)
+
+    @staticmethod
+    def _save(plot_folder, model_setup):
+        """
+        Standard save method to store plot locally. The name of this plot is dynamic. It includes the model setup like
+        'CNN' and can additionally be adjusted using an extra name tag.
+        :param plot_folder: path to save the plot
+        :param model_setup: architecture type to specify plot name
+        """
+        plot_name = os.path.join(plot_folder, f"skill_score_bootstrap_{model_setup}.pdf")
+        logging.debug(f"... save plot to {plot_name}")
+        plt.savefig(plot_name, dpi=500)
+        plt.close('all')
+
+
 class PlotTimeSeries(RunEnvironment):
 
     def __init__(self, stations: List, data_path: str, name: str, window_lead_time: int = None, plot_folder: str = ".",
diff --git a/src/run_modules/post_processing.py b/src/run_modules/post_processing.py
index 0a791617a8c35b57cc3a646abb7e2e9fe5aa968c..5c392a402da47251c51668e0b06a3067104a61e6 100644
--- a/src/run_modules/post_processing.py
+++ b/src/run_modules/post_processing.py
@@ -18,7 +18,7 @@ from src.datastore import NameNotFoundInDataStore
 from src.helpers import TimeTracking
 from src.model_modules.linear_model import OrdinaryLeastSquaredModel
 from src.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStationMap, PlotClimatologicalSkillScore, \
-    PlotCompetitiveSkillScore, PlotTimeSeries
+    PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore
 from src.plotting.postprocessing_plotting import plot_conditional_quantiles
 from src.run_modules.run_environment import RunEnvironment
 
@@ -38,6 +38,7 @@ class PostProcessing(RunEnvironment):
         self.target_var = self.data_store.get("target_var", "general")
         self._sampling = self.data_store.get("sampling", "general")
         self.skill_scores = None
+        self.bootstrap_skill_scores = None
         self._run()
 
     def _run(self):
@@ -49,9 +50,9 @@ class PostProcessing(RunEnvironment):
             self.make_prediction()
             logging.info("take a look on the next reported time measure. If this increases a lot, one should think to "
                          "skip make_prediction() whenever it is possible to save time.")
-        # self.skill_scores = self.calculate_skill_scores()
-        # self.plot()
-        self.create_boot_straps()
+        self.bootstrap_skill_scores = self.create_boot_straps()
+        self.skill_scores = self.calculate_skill_scores()
+        self.plot()
 
     def create_boot_straps(self):
 
@@ -60,7 +61,7 @@ class PostProcessing(RunEnvironment):
         bootstrap_path = self.data_store.get("bootstrap_path", "general")
         forecast_path = self.data_store.get("forecast_path", "general")
         window_lead_time = self.data_store.get("window_lead_time", "general")
-        bootstraps = BootStraps(self.test_data, bootstrap_path, 2)
+        bootstraps = BootStraps(self.test_data, bootstrap_path, 20)
         with TimeTracking(name="boot predictions"):
             bootstrap_predictions = self.model.predict_generator(generator=bootstraps.boot_strap_generator(),
                                                                  steps=bootstraps.get_boot_strap_generator_length())
@@ -72,18 +73,19 @@ class PostProcessing(RunEnvironment):
                 ind = np.all(bootstrap_meta == [boot, station], axis=1)
                 length = sum(ind)
                 sel = bootstrap_predictions[ind].reshape((length, window_lead_time, 1))
-                coords = (range(length), range(window_lead_time))
-                tmp = xr.DataArray(sel, coords=(*coords, [boot]), dims=["index", "window", "type"])
+                coords = (range(length), range(1, window_lead_time + 1))
+                tmp = xr.DataArray(sel, coords=(*coords, [boot]), dims=["index", "ahead", "type"])
                 file_name = os.path.join(forecast_path, f"bootstraps_{boot}_{station}.nc")
                 tmp.to_netcdf(file_name)
             labels = bootstraps.get_labels(station).reshape((length, window_lead_time, 1))
             file_name = os.path.join(forecast_path, f"bootstraps_labels_{station}.nc")
-            labels = xr.DataArray(labels, coords=(*coords, ["obs"]), dims=["index", "window", "type"])
+            labels = xr.DataArray(labels, coords=(*coords, ["obs"]), dims=["index", "ahead", "type"])
             labels.to_netcdf(file_name)
 
         # file_name = os.path.join(forecast_path, f"bootstraps_orig.nc")
         # orig = xr.open_dataarray(file_name)
 
+
         # calc skill scores
         skill_scores = statistics.SkillScores(None)
         score = {}
@@ -92,17 +94,20 @@ class PostProcessing(RunEnvironment):
             labels = xr.open_dataarray(file_name)
             shape = labels.shape
             orig = bootstraps.get_orig_prediction(forecast_path,  f"forecasts_norm_{station}_test.nc").reshape(shape)
-            orig = xr.DataArray(orig, coords=(range(shape[0]), range(shape[1]), ["orig"]), dims=["index", "window", "type"])
-            score[station] = {}
+            orig = xr.DataArray(orig, coords=(range(shape[0]), range(1, shape[1] + 1), ["orig"]), dims=["index", "ahead", "type"])
+            skill = pd.DataFrame(columns=range(1, window_lead_time + 1))
             for boot in variables:
                 file_name = os.path.join(forecast_path, f"bootstraps_{boot}_{station}.nc")
                 boot_data = xr.open_dataarray(file_name)
                 boot_data = boot_data.combine_first(labels)
                 boot_data = boot_data.combine_first(orig)
-                score[station][boot] = skill_scores.general_skill_score(boot_data, forecast_name=boot, reference_name="orig")
-
-        # plot
-
+                boot_scores = []
+                for iahead in range(window_lead_time):
+                    data = boot_data.sel(ahead=iahead + 1)
+                    boot_scores.append(skill_scores.general_skill_score(data, forecast_name=boot, reference_name="orig"))
+                skill.loc[boot] = np.array(boot_scores)
+            score[station] = xr.DataArray(skill, dims=["boot_var", "ahead"])
+        return score
 
     def _load_model(self):
         try:
@@ -128,6 +133,7 @@ class PostProcessing(RunEnvironment):
         PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path, score_only=False,
                                      extra_name_tag="all_terms_", model_setup="CNN")
         PlotCompetitiveSkillScore(self.skill_scores[0], plot_folder=self.plot_path, model_setup="CNN")
+        PlotBootstrapSkillScore(self.bootstrap_skill_scores, plot_folder=self.plot_path, model_setup="CNN")
         PlotTimeSeries(self.test_data.stations, path, r"forecasts_%s_test.nc", plot_folder=self.plot_path, sampling=self._sampling)
 
     def calculate_test_score(self):