diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py index 028032253b6051b303b21cae398822a13a9a3a2b..11c71ae77bedd3d423d163b6a92c11d01f6b20af 100644 --- a/mlair/helpers/statistics.py +++ b/mlair/helpers/statistics.py @@ -12,6 +12,7 @@ from typing import Union, Tuple, Dict, List import itertools import dask.array as da from collections import OrderedDict +from mlair.helpers import to_list Data = Union[xr.DataArray, pd.DataFrame] @@ -235,7 +236,12 @@ def skill_score_based_on_mse(data: xr.DataArray, obs_name: str, pred_name: str, aggregation_dim: str = "index", competitor_dim: str = "type") -> xr.DataArray: obs = data.sel({competitor_dim: obs_name}) pred = data.sel({competitor_dim: pred_name}) - ref = data.sel({competitor_dim: ref_name}) + # ref = data.sel({competitor_dim: ref_name}) + href = [] + for ref_n in to_list(ref_name): + href.append(data.sel({competitor_dim: ref_n})) + ref = xr.concat(href, dim=competitor_dim).transpose(*data.dims) + ss = 1 - mean_squared_error_nan(obs, pred, dim=aggregation_dim) / mean_squared_error_nan(obs, ref, dim=aggregation_dim) return ss diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py index 335e67399e239c82d17f572dbc7a3bc3d401d409..6cf92c08a8d3d81ac57eaa0e3618c8eb8ace67c7 100644 --- a/mlair/plotting/postprocessing_plotting.py +++ b/mlair/plotting/postprocessing_plotting.py @@ -632,60 +632,79 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): # pragma: no cover class PlotSectorialSkillScore(AbstractPlotClass): # pragma: no cover def __init__(self, data: xr.DataArray, plot_folder: str = ".", model_setup: str = "NN", sampling: str = "daily", - model_name_for_plots: Union[str, None] = None, ahead_dim: str = "ahead", ): + model_name_for_plots: Union[str, None] = None, ahead_dim: str = "ahead", reference_dim: str = "type"): """Initialise.""" super().__init__(plot_folder, f"skill_score_sectorial_{model_setup}") self._model_setup = model_setup self._sampling = self._get_sampling(sampling) self._ahead_dim = ahead_dim + self._reference_dim = reference_dim self._labels = None self._model_name_for_plots = model_name_for_plots + self._sns_settings = {"hue": "ahead", "whis": 1., "palette": "Blues_r", + "showmeans": False, "flierprops": {"marker": "."}} self._data, self._reference_model = self._prepare_data(data) - logging.info("PlotSectorialSkillScore: finished _prepare_data(data)") self._plot() - logging.info("PlotSectorialSkillScore: finished _plot()") - self._save() self.plot_name = self.plot_name + "_vertical" self._plot_vertical() - logging.info("PlotSectorialSkillScore: finished _plot_vertical()") - self._save() @TimeTrackingWrapper def _prepare_data(self, data: xr.DataArray): self._labels = [str(i) + self._sampling for i in data.coords[self._ahead_dim].values] reference_model = data.attrs["reference_model"] logging.info(f"PlotSectorialSkillScore._prepare_data: shape of data (xarray) is {data.shape}\n dims: {data.dims}") - data = data.to_dataframe("data")[["data"]].stack(level=0).reset_index(level=3, drop=True).reset_index(name="data") + # data = data.to_dataframe("data")[["data"]].stack(level=0).reset_index(level=3, drop=True).reset_index(name="data") + data = data.to_dataframe("data")[["data"]].stack(level=0).reset_index(level=4, drop=True).reset_index( + name="data") return data, reference_model + @TimeTrackingWrapper def _plot(self): size = max([len(np.unique(self._data.sector)), 6]) - fig, ax = plt.subplots(figsize=(size, size * 0.8)) data = self._data - sns.boxplot(x="sector", y="data", hue="ahead", data=data, whis=1, ax=ax, palette="Blues_r", - showmeans=False, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."}, - ) - ax.axhline(y=0, color="grey", linewidth=.5) - ax.set(ylabel=f"skill score ({self._model_setup} vs. {self._reference_model})", xlabel="sector", - title="summary of all stations", ylim=self._lim(data)) - handles, _ = ax.get_legend_handles_labels() - plt.xticks(rotation=45, horizontalalignment="right") - ax.legend(handles, self._labels) - plt.tight_layout() + plot_path = os.path.join(os.path.abspath(self.plot_folder), f"{self.plot_name}.pdf") + pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path) + for ref in self._reference_model: + ref_data = data[data[self._reference_dim]==ref] + fig, ax = plt.subplots(figsize=(size, size * 0.8)) + # sns.boxplot(x="sector", y="data", hue="ahead", data=ref_data, + # whis=1, ax=ax, palette="Blues_r", + # showmeans=False, #meanprops={"markersize": 3, "markeredgecolor": "k"}, + # flierprops={"marker": "."}, + # ) + sns.boxplot(x="sector", y="data", data=ref_data, ax=ax, **self._sns_settings) + ax.axhline(y=0, color="grey", linewidth=.5) + ax.set(ylabel=f"skill score ({self._model_setup} vs. {ref})", xlabel="sector", + title="summary of all stations", ylim=self._lim(ref_data)) + handles, _ = ax.get_legend_handles_labels() + # plt.xticks(rotation=45, horizontalalignment="right") + ax.legend(handles, self._labels) + plt.tight_layout() + pdf_pages.savefig() + # close all open figures / plots + pdf_pages.close() + plt.close('all') + @TimeTrackingWrapper def _plot_vertical(self): """Plot skill scores of the comparisons, but vertically aligned.""" - fig, ax = plt.subplots() data = self._data - sns.boxplot(y="sector", x="data", hue="ahead", data=data, whis=1.5, ax=ax, palette="Blues_r", - showmeans=False, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."}, - ) - ax.axvline(x=0, color="grey", linewidth=.5) - ax.set(xlabel=f"skill score ({self._model_setup} vs. {self._reference_model})", ylabel="sector", - title="summary of all stations", xlim=self._lim(data)) - handles, _ = ax.get_legend_handles_labels() - ax.legend(handles, self._labels) - plt.tight_layout() + plot_path = os.path.join(os.path.abspath(self.plot_folder), f"{self.plot_name}.pdf") + pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path) + for ref in self._reference_model: + ref_data = data[data[self._reference_dim] == ref] + fig, ax = plt.subplots() + sns.boxplot(y="sector", x="data", data=ref_data, ax=ax, **self._sns_settings) + ax.axvline(x=0, color="grey", linewidth=.5) + ax.set(xlabel=f"skill score ({self._model_setup} vs. {ref})", ylabel="sector", + title="summary of all stations", xlim=self._lim(ref_data)) + handles, _ = ax.get_legend_handles_labels() + ax.legend(handles, self._labels) + plt.tight_layout() + pdf_pages.savefig() + # close all open figures / plots + pdf_pages.close() + plt.close('all') @staticmethod def _lim(data) -> Tuple[float, float]: diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index d529bc715006c08d16b56c40cac98d0bc90787cd..4149f9a1b3aa74139a35dc6e8ba85de88f06725c 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -101,6 +101,7 @@ class PostProcessing(RunEnvironment): self.index_dim = "index" self.iter_dim = self.data_store.get("iter_dim") self.upstream_wind_sector = None + self.model_and_competitor_name_list = None self.model_display_name = self.data_store.get_default("model_display_name", default=self.model.model_name) self._run() @@ -112,17 +113,6 @@ class PostProcessing(RunEnvironment): self.make_prediction(self.test_data) self.make_prediction(self.train_val_data) - # load upstream wind sector for test_data - try: - self.load_upstream_wind_sector(name_of_set="test") - self.skill_score_per_sector = self.calculate_error_metrics_based_on_upstream_wind_dir() - except Exception as e: - logging.info(f"Can not process upsstream wind sectors due to: {e}") - if self.skill_score_per_sector is not None: - path_sector_skill_scores = os.path.join(self.data_store.get("experiment_path"), - f"data/skill_scores_per_sector_test.nc") - self.skill_score_per_sector.to_netcdf(path_sector_skill_scores) - # calculate error metrics on test data self.calculate_test_score() @@ -149,6 +139,19 @@ class PostProcessing(RunEnvironment): self.report_error_metrics({self.forecast_indicator: skill_score_climatological}) self.report_error_metrics({"skill_score": skill_score_competitive}) + # load upstream wind sector for test_data + try: + cometitor_names = remove_items(self.model_and_competitor_name_list, self.model_display_name) + self.load_upstream_wind_sector(name_of_set="test") + self.skill_score_per_sector = self.calculate_error_metrics_based_on_upstream_wind_dir( + ref_name=cometitor_names) + except Exception as e: + logging.info(f"Can not process upsstream wind sectors due to: {e}") + if self.skill_score_per_sector is not None: + path_sector_skill_scores = os.path.join(self.data_store.get("experiment_path"), + f"data/skill_scores_per_sector_test.nc") + self.skill_score_per_sector.to_netcdf(path_sector_skill_scores) + # plotting self.plot() @@ -603,7 +606,8 @@ class PostProcessing(RunEnvironment): if "PlotSectorialSkillScore" in plot_list: PlotSectorialSkillScore(self.skill_score_per_sector, plot_folder=self.plot_path, model_setup=self.model_display_name, sampling=self._sampling, - model_name_for_plots=self.model_name_for_plots, ahead_dim=self.ahead_dim + model_name_for_plots=self.model_name_for_plots, ahead_dim=self.ahead_dim, + reference_dim=self.model_type_dim ) except Exception as e: logging.error(f"Could not create plot PlotSectorialSkillScore due to the following error: {e}" @@ -992,7 +996,7 @@ class PostProcessing(RunEnvironment): external_data_expd.to_netcdf(os.path.join(path, f"forecasts_ds_{str(station)}_test.nc")) @TimeTrackingWrapper - def calculate_error_metrics_based_on_upstream_wind_dir(self, ref_name: str = "ols") -> xr.DataArray: + def calculate_error_metrics_based_on_upstream_wind_dir(self, ref_name: str = ["ols", "persi"]) -> xr.DataArray: """ Calculates the error metrics (mse)/skill scores based on the wind sector at time t0. @@ -1023,7 +1027,8 @@ class PostProcessing(RunEnvironment): ) ) sector_skill_scores = xr.concat(h_sector_skill_scores, dim="sector") - sector_skill_scores = sector_skill_scores.assign_attrs({f"reference_model": ref_name}) + sector_skill_scores = sector_skill_scores.assign_attrs({f"reference_model": ref_name, + "reference_model_dim": self.model_type_dim}) return sector_skill_scores def calculate_error_metrics(self) -> Tuple[Dict, Dict, Dict, Dict]: @@ -1066,6 +1071,7 @@ class PostProcessing(RunEnvironment): else: model_list = None + self.model_and_competitor_name_list = model_list # test errors of competitors for model_type in (model_list or []): if self.observation_indicator not in combined.coords[self.model_type_dim]: