diff --git a/src/plotting/postprocessing_plotting.py b/src/plotting/postprocessing_plotting.py index d3f6a56a2ec42e41454f0ced8853cb8d1001e495..a41c636b5ab17d2039f7976fca625e9c8e11ce6e 100644 --- a/src/plotting/postprocessing_plotting.py +++ b/src/plotting/postprocessing_plotting.py @@ -50,8 +50,8 @@ class PlotMonthlySummary(RunEnvironment): def _prepare_data(self, stations: List) -> xr.DataArray: """ - Pre-process data required to plot. For each station, load locally saved predictions, extract the CNN and orig - prediction and group them into monthly bins (no aggregation, only sorting them). + Pre-process data required to plot. For each station, load locally saved predictions, extract the CNN prediction + and the observation and group them into monthly bins (no aggregation, only sorting them). :param stations: all stations to plot :return: The entire data set, flagged with the corresponding month. """ @@ -65,10 +65,10 @@ class PlotMonthlySummary(RunEnvironment): if len(data_cnn.shape) > 1: data_cnn.coords["ahead"].values = [f"{days}d" for days in data_cnn.coords["ahead"].values] - data_orig = data.sel(type="orig", ahead=1).squeeze() - data_orig.coords["ahead"] = "orig" + data_obs = data.sel(type="obs", ahead=1).squeeze() + data_obs.coords["ahead"] = "obs" - data_concat = xr.concat([data_orig, data_cnn], dim="ahead") + data_concat = xr.concat([data_obs, data_cnn], dim="ahead") data_concat = data_concat.drop("type") data_concat.index.values = data_concat.index.values.astype("datetime64[M]").astype(int) % 12 + 1 @@ -189,7 +189,7 @@ class PlotStationMap(RunEnvironment): plt.close('all') -def plot_conditional_quantiles(stations: list, plot_folder: str = ".", rolling_window: int = 3, ref_name: str = 'orig', +def plot_conditional_quantiles(stations: list, plot_folder: str = ".", rolling_window: int = 3, ref_name: str = 'obs', pred_name: str = 'CNN', season: str = "", forecast_path: str = None, plot_name_affix: str = "", units: str = "ppb"): """ @@ -222,7 +222,7 @@ def plot_conditional_quantiles(stations: list, plot_folder: str = ".", rolling_w for station in stations: file = os.path.join(forecast_path, f"forecasts_{station}_test.nc") data_tmp = xr.open_dataarray(file) - data_collector.append(data_tmp.loc[:, :, ['CNN', 'orig', 'OLS']].assign_coords(station=station)) + data_collector.append(data_tmp.loc[:, :, ['CNN', 'obs', 'OLS']].assign_coords(station=station)) return xr.concat(data_collector, dim='station').transpose('index', 'type', 'ahead', 'station') def segment_data(data): @@ -252,7 +252,7 @@ def plot_conditional_quantiles(stations: list, plot_folder: str = ".", rolling_w def labels(plot_type, data_unit="ppb"): names = (f"forecast concentration (in {data_unit})", f"observed concentration (in {data_unit})") - if plot_type == "orig": + if plot_type == "obs": return names else: return names[::-1] @@ -515,7 +515,7 @@ class PlotTimeSeries(RunEnvironment): logging.debug(f"... preprocess station {station}") file_name = os.path.join(self._data_path, self._data_name % station) data = xr.open_dataarray(file_name) - return data.sel(type=["CNN", "orig"]) + return data.sel(type=["CNN", "obs"]) def _plot(self, plot_folder): pdf_pages = self._create_pdf_pages(plot_folder) @@ -529,7 +529,7 @@ class PlotTimeSeries(RunEnvironment): for i_half_of_year in range(factor): pos = factor * i_year + i_half_of_year plot_data = self._create_plot_data(data_year, factor, i_half_of_year) - self._plot_orig(axes[pos], plot_data) + self._plot_obs(axes[pos], plot_data) self._plot_ahead(axes[pos], plot_data) if np.isnan(plot_data.values).all(): nan_list.append(pos) @@ -574,10 +574,10 @@ class PlotTimeSeries(RunEnvironment): label = f"{ahead}{self._sampling}" ax.plot(index, plot_data.values, color=color[ahead-1], label=label) - def _plot_orig(self, ax, data): - orig_data = data.sel(type="orig", ahead=1) + def _plot_obs(self, ax, data): + obs_data = data.sel(type="obs", ahead=1) index = data.index + np.timedelta64(1, self._sampling) - ax.plot(index, orig_data.values, color=matplotlib.colors.cnames["green"], label="orig") + ax.plot(index, obs_data.values, color=matplotlib.colors.cnames["green"], label="obs") @staticmethod def _get_time_range(data): diff --git a/src/run_modules/post_processing.py b/src/run_modules/post_processing.py index 3c50799b939da2bc8517d1e58dd2c73baebe367b..06203c879872891f57c719040482fe052824c65e 100644 --- a/src/run_modules/post_processing.py +++ b/src/run_modules/post_processing.py @@ -43,7 +43,7 @@ class PostProcessing(RunEnvironment): with TimeTracking(): self.train_ols_model() logging.info("take a look on the next reported time measure. If this increases a lot, one should think to " - "skip make_prediction() whenever it is possible to save time.") + "skip train_ols_model() whenever it is possible to save time.") with TimeTracking(): self.make_prediction() logging.info("take a look on the next reported time measure. If this increases a lot, one should think to " @@ -64,9 +64,9 @@ class PostProcessing(RunEnvironment): logging.debug("Run plotting routines...") path = self.data_store.get("forecast_path", "general") - plot_conditional_quantiles(self.test_data.stations, pred_name="CNN", ref_name="orig", + plot_conditional_quantiles(self.test_data.stations, pred_name="CNN", ref_name="obs", forecast_path=path, plot_name_affix="cali-ref", plot_folder=self.plot_path) - plot_conditional_quantiles(self.test_data.stations, pred_name="orig", ref_name="CNN", + plot_conditional_quantiles(self.test_data.stations, pred_name="obs", ref_name="CNN", forecast_path=path, plot_name_affix="like-bas", plot_folder=self.plot_path) PlotStationMap(generators={'b': self.test_data}, plot_folder=self.plot_path) PlotMonthlySummary(self.test_data.stations, path, r"forecasts_%s_test.nc", self.target_var, @@ -113,15 +113,15 @@ class PostProcessing(RunEnvironment): # ols ols_prediction = self._create_ols_forecast(input_data, ols_prediction, mean, std, transformation_method) - # orig pred - orig_pred = self._create_orig_forecast(data, None, mean, std, transformation_method) + # observation + observation = self._create_observation(data, None, mean, std, transformation_method) # merge all predictions full_index = self.create_fullindex(data.data.indexes['datetime'], self._get_frequency()) all_predictions = self.create_forecast_arrays(full_index, list(data.label.indexes['window']), CNN=nn_prediction, persi=persistence_prediction, - orig=orig_pred, + obs=observation, OLS=ols_prediction) # save all forecasts locally @@ -134,7 +134,7 @@ class PostProcessing(RunEnvironment): return getter.get(self._sampling, None) @staticmethod - def _create_orig_forecast(data, _, mean, std, transformation_method): + def _create_observation(data, _, mean, std, transformation_method): return statistics.apply_inverse_transformation(data.label.copy(), mean, std, transformation_method) def _create_ols_forecast(self, input_data, ols_prediction, mean, std, transformation_method): @@ -227,7 +227,7 @@ class PostProcessing(RunEnvironment): try: data = self.train_val_data.get_data_generator(station) mean, std, transformation_method = data.get_transformation_information(variable=self.target_var) - external_data = self._create_orig_forecast(data, None, mean, std, transformation_method) + external_data = self._create_observation(data, None, mean, std, transformation_method) external_data = external_data.squeeze("Stations").sel(window=1).drop(["window", "Stations", "variables"]) return external_data.rename({'datetime': 'index'}) except KeyError: diff --git a/src/statistics.py b/src/statistics.py index df73784df830d5f7b96bf0fcd18a65d362516f12..e3481d0e0f0561ac8a903648a69e92c6d6acc40d 100644 --- a/src/statistics.py +++ b/src/statistics.py @@ -126,12 +126,12 @@ class SkillScores(RunEnvironment): return skill_score - def _climatological_skill_score(self, data, mu_type=1, observation_name="orig", forecast_name="CNN", external_data=None): + def _climatological_skill_score(self, data, mu_type=1, observation_name="obs", forecast_name="CNN", external_data=None): kwargs = {"external_data": external_data} if external_data is not None else {} return self.__getattribute__(f"skill_score_mu_case_{mu_type}")(data, observation_name, forecast_name, **kwargs) @staticmethod - def general_skill_score(data, observation_name="orig", forecast_name="CNN", reference_name="persi"): + def general_skill_score(data, observation_name="obs", forecast_name="CNN", reference_name="persi"): data = data.dropna("index") observation = data.sel(type=observation_name) forecast = data.sel(type=forecast_name) @@ -159,12 +159,12 @@ class SkillScores(RunEnvironment): suffix = {"mean": mean, "sigma": sigma, "r": r, "p": p} return AI, BI, CI, data, suffix - def skill_score_mu_case_1(self, data, observation_name="orig", forecast_name="CNN"): + def skill_score_mu_case_1(self, data, observation_name="obs", forecast_name="CNN"): AI, BI, CI, data, _ = self.skill_score_pre_calculations(data, observation_name, forecast_name) skill_score = np.array(AI - BI - CI) return pd.DataFrame({"skill_score": [skill_score], "AI": [AI], "BI": [BI], "CI": [CI]}).to_xarray().to_array() - def skill_score_mu_case_2(self, data, observation_name="orig", forecast_name="CNN"): + def skill_score_mu_case_2(self, data, observation_name="obs", forecast_name="CNN"): AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name) monthly_mean = self.create_monthly_mean_from_daily_data(data) data = xr.concat([data, monthly_mean], dim="type") @@ -177,14 +177,14 @@ class SkillScores(RunEnvironment): skill_score = np.array((AI - BI - CI - AII + BII) / (1 - AII + BII)) return pd.DataFrame({"skill_score": [skill_score], "AII": [AII], "BII": [BII]}).to_xarray().to_array() - def skill_score_mu_case_3(self, data, observation_name="orig", forecast_name="CNN", external_data=None): + def skill_score_mu_case_3(self, data, observation_name="obs", forecast_name="CNN", external_data=None): AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name) mean, sigma = suffix["mean"], suffix["sigma"] AIII = (((external_data.mean().values - mean.loc[observation_name]) / sigma.loc[observation_name])**2).values skill_score = np.array((AI - BI - CI + AIII) / 1 + AIII) return pd.DataFrame({"skill_score": [skill_score], "AIII": [AIII]}).to_xarray().to_array() - def skill_score_mu_case_4(self, data, observation_name="orig", forecast_name="CNN", external_data=None): + def skill_score_mu_case_4(self, data, observation_name="obs", forecast_name="CNN", external_data=None): AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name) monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, columns=data.type.values, index=data.index) data = xr.concat([data, monthly_mean_external], dim="type")