diff --git a/src/run_modules/post_processing.py b/src/run_modules/post_processing.py
index 80d12fc4c0c073c62ac2a72b49bfdf73acbb45da..e7d4bc75a17fd73636cda3fdef05767c1621b205 100644
--- a/src/run_modules/post_processing.py
+++ b/src/run_modules/post_processing.py
@@ -54,23 +54,20 @@ class PostProcessing(RunEnvironment):
 
         # bootstraps
         if self.data_store.get("evaluate_bootstraps", "general.postprocessing"):
-            bootstrap_path = self.data_store.get("bootstrap_path", "general.postprocessing")
-            number_of_bootstraps = self.data_store.get("number_of_bootstraps", "general.postprocessing")
-            BootStraps(self.test_data, bootstrap_path, number_of_bootstraps)
             with TimeTracking(name="split (refac_1): create_boot_straps_refac_2()"):
-                self.create_boot_straps_refac_2()
+                bootstrap_path = self.data_store.get("bootstrap_path", "general.postprocessing")
+                number_of_bootstraps = self.data_store.get("number_of_bootstraps", "general.postprocessing")
+                BootStraps(self.test_data, bootstrap_path, number_of_bootstraps)
+                self.create_boot_straps()
                 self.bootstrap_skill_scores = self.calculate_bootstrap_skill_scores()
-            with TimeTracking(name="merged: combined_boot_forecast_and_skill()"):
-                self.bootstrap_skill_scores = self.combined_boot_forecast_and_skill()
-
 
         # skill scores
-        # self.skill_scores = self.calculate_skill_scores()
+        self.skill_scores = self.calculate_skill_scores()
 
         # plotting
-        # self.plot()
+        self.plot()
 
-    def create_boot_straps_refac_2(self):
+    def create_boot_straps(self):
         # forecast
         with TimeTracking(name="boot predictions"):
             bootstrap_path = self.data_store.get("bootstrap_path", "general")
@@ -142,62 +139,6 @@ class PostProcessing(RunEnvironment):
                 score[station] = xr.DataArray(skill, dims=["boot_var", "ahead"])
             return score
 
-    def combined_boot_forecast_and_skill(self):
-        # forecast
-        with TimeTracking(name="boot predictions"):
-            bootstrap_path = self.data_store.get("bootstrap_path", "general")
-            forecast_path = self.data_store.get("forecast_path", "general")
-            window_lead_time = self.data_store.get("window_lead_time", "general")
-            number_of_bootstraps = self.data_store.get("number_of_bootstraps", "general.postprocessing")
-            bootstraps = BootStraps(self.test_data, bootstrap_path, number_of_bootstraps)
-            skill_scores = statistics.SkillScores(None)
-            score = {}
-
-            for station in bootstraps.stations:
-                with TimeTracking(name=station):
-                    logging.info(station)
-                    # store also true labels for each station
-                    labels = bootstraps.get_labels(station)
-                    shape = labels.shape
-                    labels = labels.reshape((*shape, 1))
-                    coords = (range(labels.shape[0]), range(1, labels.shape[1] + 1))
-                    # file_name = os.path.join(forecast_path, f"bootstraps_labels_{station}.nc")
-                    labels = xr.DataArray(labels, coords=(*coords, ["obs"]), dims=["index", "ahead", "type"])
-                    # labels.to_netcdf(file_name)
-                    shape = labels.shape
-                    orig = bootstraps.get_orig_prediction(forecast_path,  f"forecasts_norm_{station}_test.nc").reshape(shape)
-                    coords = (range(shape[0]), range(1, shape[1] + 1), ["orig"])
-                    orig = xr.DataArray(orig, coords=coords, dims=["index", "ahead", "type"])
-                    skill = pd.DataFrame(columns=range(1, window_lead_time + 1))
-                    for var in bootstraps.variables:
-                        hist, label, station_bootstrap, length = bootstraps.get_generator_station_var_wise(station, var)
-
-                        # make bootstrap predictions
-                        bootstrap_predictions = self.model.predict_generator(generator=station_bootstrap(),
-                                                                             steps=length,
-                                                                             use_multiprocessing=True)
-                        if isinstance(bootstrap_predictions, list):
-                            bootstrap_predictions = bootstrap_predictions[-1]
-                        # get bootstrap prediction meta data
-                        bootstrap_meta = np.array(bootstraps.get_bootstrap_meta_station_var_wise(station, var))
-                        # save bootstrap predictions separately for each station and variable combination
-                        # store each variable - station - combination
-                        ind = np.all(bootstrap_meta == [var, station], axis=1)
-                        length = sum(ind)
-                        sel = bootstrap_predictions[ind].reshape((length, window_lead_time, 1))
-                        coords = (range(length), range(1, window_lead_time + 1))
-                        boot_data = xr.DataArray(sel, coords=(*coords, [var]), dims=["index", "ahead", "type"])
-                        # file_name = os.path.join(forecast_path, f"bootstraps_{var}_{station}.nc")
-                        # boot_data.to_netcdf(file_name)
-                        boot_data = boot_data.combine_first(labels).combine_first(orig)
-                        boot_scores = []
-                        for ahead in range(1, window_lead_time + 1):
-                            data = boot_data.sel(ahead=ahead)
-                            boot_scores.append(skill_scores.general_skill_score(data, forecast_name=var, reference_name="orig"))
-                        skill.loc[var] = np.array(boot_scores)
-                    score[station] = xr.DataArray(skill, dims=["boot_var", "ahead"])
-            return score
-
     def _load_model(self):
         try:
             model = self.data_store.get("best_model", "general")