diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py index a4768d96b1cdea15c28d2af9f59020b550a82afe..335e67399e239c82d17f572dbc7a3bc3d401d409 100644 --- a/mlair/plotting/postprocessing_plotting.py +++ b/mlair/plotting/postprocessing_plotting.py @@ -22,7 +22,7 @@ from scipy.stats import mannwhitneyu from mlair import helpers from mlair.data_handler.iterator import DataCollection -from mlair.helpers import TimeTrackingWrapper +from mlair.helpers import TimeTrackingWrapper, tables from mlair.plotting.abstract_plot_class import AbstractPlotClass from mlair.helpers.statistics import mann_whitney_u_test, represent_p_values_as_asteriks @@ -65,7 +65,7 @@ class PlotMonthlySummary(AbstractPlotClass): # pragma: no cover self._data = self._prepare_data(stations) self._window_lead_time = self._get_window_lead_time(window_lead_time) self._plot(target_var, target_var_unit) - self._save() + # self._save() def _prepare_data(self, stations: List) -> xr.DataArray: """ @@ -131,14 +131,33 @@ class PlotMonthlySummary(AbstractPlotClass): # pragma: no cover """ data = self._data.to_dataset(name='values').to_dask_dataframe() logging.debug("... start plotting") + plot_path = os.path.join(os.path.abspath(self.plot_folder), f"{self.plot_name}.pdf") + pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path) color_palette = [matplotlib.colors.cnames["green"]] + sns.color_palette("Blues_r", self._window_lead_time).as_hex() - ax = sns.boxplot(x='index', y='values', hue='ahead', data=data.compute(), whis=1.5, palette=color_palette, - flierprops={'marker': '.', 'markersize': 1}, showmeans=True, - meanprops={'markersize': 1, 'markeredgecolor': 'k'}) - ylabel = self._spell_out_chemical_concentrations(target_var) - ax.set(xlabel='month', ylabel=f'{ylabel} (in {target_var_unit})') - plt.tight_layout() + for i in range(3): + ax = sns.boxplot(x='index', y='values', hue='ahead', data=data.compute(), whis=1.5, palette=color_palette, + flierprops={'marker': '.', 'markersize': 1}, showmeans=True, + meanprops={'markersize': 1, 'markeredgecolor': 'k'}) + ylabel = self._spell_out_chemical_concentrations(target_var) + ax.set(xlabel='month', ylabel=f'{ylabel} (in {target_var_unit})') + + handles, labels = ax.axes.get_legend_handles_labels() + if i > 0: + ax.legend().remove() + if i == 1: + ax.legend(handles[:len(color_palette)-1], labels[:len(color_palette)-1], + ncol=len(color_palette)-1, loc='lower center') + if i == 2: + ax.legend(handles[:len(color_palette)-1], labels[:len(color_palette)-1], + ncol=len(color_palette)-1, bbox_to_anchor=(0., 1.02, 1., .102), loc=3) + + plt.tight_layout() + pdf_pages.savefig() + # close all open figures / plots + pdf_pages.close() + plt.close('all') + @TimeTrackingWrapper @@ -644,7 +663,7 @@ class PlotSectorialSkillScore(AbstractPlotClass): # pragma: no cover fig, ax = plt.subplots(figsize=(size, size * 0.8)) data = self._data sns.boxplot(x="sector", y="data", hue="ahead", data=data, whis=1, ax=ax, palette="Blues_r", - showmeans=True, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."}, + showmeans=False, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."}, ) ax.axhline(y=0, color="grey", linewidth=.5) ax.set(ylabel=f"skill score ({self._model_setup} vs. {self._reference_model})", xlabel="sector", @@ -659,7 +678,7 @@ class PlotSectorialSkillScore(AbstractPlotClass): # pragma: no cover fig, ax = plt.subplots() data = self._data sns.boxplot(y="sector", x="data", hue="ahead", data=data, whis=1.5, ax=ax, palette="Blues_r", - showmeans=True, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."}, + showmeans=False, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."}, ) ax.axvline(x=0, color="grey", linewidth=.5) ax.set(xlabel=f"skill score ({self._model_setup} vs. {self._reference_model})", ylabel="sector", @@ -1291,7 +1310,8 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover def __init__(self, data: xr.DataArray, plot_folder: str = ".", model_type_dim: str = "type", error_measure: str = "mse", error_unit: str = None, dim_name_boots: str = 'boots', - block_length: str = None, model_name: str = "NN", model_indicator: str = "nn"): + block_length: str = None, model_name: str = "NN", model_indicator: str = "nn", + report_path: str = None): super().__init__(plot_folder, "sample_uncertainty_from_bootstrap") default_name = self.plot_name self.model_type_dim = model_type_dim @@ -1300,6 +1320,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover self.error_unit = error_unit self.block_length = block_length self.model_name = model_name + self.report_path = report_path data = self.rename_model_indicator(data, model_name, model_indicator) self.prepare_data(data) @@ -1322,9 +1343,9 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover @property def get_asteriks_from_mann_whitney_u_result(self): - return represent_p_values_as_asteriks(mann_whitney_u_test(data=self._data_table, - reference_col_name=self.model_name, - axis=0, alternative="two-sided").iloc[-1]) + utest = mann_whitney_u_test(data=self._data_table, reference_col_name=self.model_name, + axis=0, alternative="two-sided") + return represent_p_values_as_asteriks(utest).iloc[-1], utest def rename_model_indicator(self, data, model_name, model_indicator): data.coords[self.model_type_dim] = [{model_indicator: model_name}.get(n, n) @@ -1345,7 +1366,6 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover data_table = self._data_table n_boots = self._n_boots size = len(np.unique(data_table.columns)) - asteriks = self.get_asteriks_from_mann_whitney_u_result if orientation == "v": figsize, width = (size, 5), 0.4 elif orientation == "h": @@ -1358,6 +1378,13 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover flierprops={"marker": "o", "markerfacecolor": "black", "markeredgecolor": "none", "markersize": 3}, boxprops={'facecolor': 'none', 'edgecolor': 'k'}, width=width, orient=orientation) + if apply_u_test: #ToDo move calc of u-test from plotting to postproc! + asteriks, utest = self.get_asteriks_from_mann_whitney_u_result + column_format = tables.create_column_format_for_tex(utest) + file_name = f"{self.plot_name}.%s" + tables.save_to_tex(self.report_path, file_name % "tex", column_format=column_format, df=utest) + tables.save_to_md(self.report_path, file_name % "md", df=utest) + utest.to_csv(os.path.join(self.report_path, file_name % "csv")) if orientation == "v": if apply_u_test: ax = self.set_significance_bars(asteriks, ax, data_table, orientation) diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index b5c1fcb14a15f0f5b66ea5ad814360c91909c459..d529bc715006c08d16b56c40cac98d0bc90787cd 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -675,11 +675,13 @@ class PostProcessing(RunEnvironment): try: if "PlotSampleUncertaintyFromBootstrap" in plot_list and self.uncertainty_estimate is not None: block_length = self.data_store.get_default("block_length", default="1m", scope="uncertainty_estimate") + report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report") PlotSampleUncertaintyFromBootstrap( data=self.uncertainty_estimate, plot_folder=self.plot_path, model_type_dim=self.model_type_dim, dim_name_boots=self.uncertainty_estimate_boot_dim, error_measure="mean squared error", error_unit=fr"{self.target_var_unit}$^2$", block_length=block_length, - model_name=self.model_display_name, model_indicator=self.forecast_indicator) + model_name=self.model_display_name, model_indicator=self.forecast_indicator, + report_path=report_path) except Exception as e: logging.error(f"Could not create plot PlotSampleUncertaintyFromBootstrap due to the following error: {e}" @@ -1170,3 +1172,4 @@ class PostProcessing(RunEnvironment): file_name = f"error_report_{metric}_{model_type}.%s".replace(' ', '_').replace('/', '_') tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df) tables.save_to_md(report_path, file_name % "md", df=df) + df.to_csv(os.path.join(report_path, file_name % "csv"))