diff --git a/src/run_modules/post_processing.py b/src/run_modules/post_processing.py index 80d12fc4c0c073c62ac2a72b49bfdf73acbb45da..e7d4bc75a17fd73636cda3fdef05767c1621b205 100644 --- a/src/run_modules/post_processing.py +++ b/src/run_modules/post_processing.py @@ -54,23 +54,20 @@ class PostProcessing(RunEnvironment): # bootstraps if self.data_store.get("evaluate_bootstraps", "general.postprocessing"): - bootstrap_path = self.data_store.get("bootstrap_path", "general.postprocessing") - number_of_bootstraps = self.data_store.get("number_of_bootstraps", "general.postprocessing") - BootStraps(self.test_data, bootstrap_path, number_of_bootstraps) with TimeTracking(name="split (refac_1): create_boot_straps_refac_2()"): - self.create_boot_straps_refac_2() + bootstrap_path = self.data_store.get("bootstrap_path", "general.postprocessing") + number_of_bootstraps = self.data_store.get("number_of_bootstraps", "general.postprocessing") + BootStraps(self.test_data, bootstrap_path, number_of_bootstraps) + self.create_boot_straps() self.bootstrap_skill_scores = self.calculate_bootstrap_skill_scores() - with TimeTracking(name="merged: combined_boot_forecast_and_skill()"): - self.bootstrap_skill_scores = self.combined_boot_forecast_and_skill() - # skill scores - # self.skill_scores = self.calculate_skill_scores() + self.skill_scores = self.calculate_skill_scores() # plotting - # self.plot() + self.plot() - def create_boot_straps_refac_2(self): + def create_boot_straps(self): # forecast with TimeTracking(name="boot predictions"): bootstrap_path = self.data_store.get("bootstrap_path", "general") @@ -142,62 +139,6 @@ class PostProcessing(RunEnvironment): score[station] = xr.DataArray(skill, dims=["boot_var", "ahead"]) return score - def combined_boot_forecast_and_skill(self): - # forecast - with TimeTracking(name="boot predictions"): - bootstrap_path = self.data_store.get("bootstrap_path", "general") - forecast_path = self.data_store.get("forecast_path", "general") - window_lead_time = self.data_store.get("window_lead_time", "general") - number_of_bootstraps = self.data_store.get("number_of_bootstraps", "general.postprocessing") - bootstraps = BootStraps(self.test_data, bootstrap_path, number_of_bootstraps) - skill_scores = statistics.SkillScores(None) - score = {} - - for station in bootstraps.stations: - with TimeTracking(name=station): - logging.info(station) - # store also true labels for each station - labels = bootstraps.get_labels(station) - shape = labels.shape - labels = labels.reshape((*shape, 1)) - coords = (range(labels.shape[0]), range(1, labels.shape[1] + 1)) - # file_name = os.path.join(forecast_path, f"bootstraps_labels_{station}.nc") - labels = xr.DataArray(labels, coords=(*coords, ["obs"]), dims=["index", "ahead", "type"]) - # labels.to_netcdf(file_name) - shape = labels.shape - orig = bootstraps.get_orig_prediction(forecast_path, f"forecasts_norm_{station}_test.nc").reshape(shape) - coords = (range(shape[0]), range(1, shape[1] + 1), ["orig"]) - orig = xr.DataArray(orig, coords=coords, dims=["index", "ahead", "type"]) - skill = pd.DataFrame(columns=range(1, window_lead_time + 1)) - for var in bootstraps.variables: - hist, label, station_bootstrap, length = bootstraps.get_generator_station_var_wise(station, var) - - # make bootstrap predictions - bootstrap_predictions = self.model.predict_generator(generator=station_bootstrap(), - steps=length, - use_multiprocessing=True) - if isinstance(bootstrap_predictions, list): - bootstrap_predictions = bootstrap_predictions[-1] - # get bootstrap prediction meta data - bootstrap_meta = np.array(bootstraps.get_bootstrap_meta_station_var_wise(station, var)) - # save bootstrap predictions separately for each station and variable combination - # store each variable - station - combination - ind = np.all(bootstrap_meta == [var, station], axis=1) - length = sum(ind) - sel = bootstrap_predictions[ind].reshape((length, window_lead_time, 1)) - coords = (range(length), range(1, window_lead_time + 1)) - boot_data = xr.DataArray(sel, coords=(*coords, [var]), dims=["index", "ahead", "type"]) - # file_name = os.path.join(forecast_path, f"bootstraps_{var}_{station}.nc") - # boot_data.to_netcdf(file_name) - boot_data = boot_data.combine_first(labels).combine_first(orig) - boot_scores = [] - for ahead in range(1, window_lead_time + 1): - data = boot_data.sel(ahead=ahead) - boot_scores.append(skill_scores.general_skill_score(data, forecast_name=var, reference_name="orig")) - skill.loc[var] = np.array(boot_scores) - score[station] = xr.DataArray(skill, dims=["boot_var", "ahead"]) - return score - def _load_model(self): try: model = self.data_store.get("best_model", "general")