diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py index af7975f3a042163a885f590c6624076fe91f03aa..19a4893d49c7702cd092858c5c885453e974cbc1 100644 --- a/mlair/helpers/statistics.py +++ b/mlair/helpers/statistics.py @@ -284,7 +284,7 @@ class SkillScores: def get_model_name_combinations(self): """Return all combinations of two models as tuple and string.""" combinations = list(itertools.combinations(self.models, 2)) - combination_strings = [f"{first}-{second}" for (first, second) in combinations] + combination_strings = [f"{first} - {second}" for (first, second) in combinations] return combinations, combination_strings def skill_scores(self) -> [pd.DataFrame, pd.DataFrame]: diff --git a/mlair/plotting/abstract_plot_class.py b/mlair/plotting/abstract_plot_class.py index dab45156ac1bbe033ba073e01245ffc8b65ca6b3..c91dbec78c4bc990cc9c40c3afb6c506b62928d8 100644 --- a/mlair/plotting/abstract_plot_class.py +++ b/mlair/plotting/abstract_plot_class.py @@ -59,7 +59,7 @@ class AbstractPlotClass: if not os.path.exists(plot_folder): os.makedirs(plot_folder) self.plot_folder = plot_folder - self.plot_name = plot_name + self.plot_name = plot_name.replace("/", "_") self.resolution = resolution if rc_params is None: rc_params = {'axes.labelsize': 'large', diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py index 43f1864f7354c1f711bb886f4f97eda56439ab89..2a41aab81d7ed62b1b58af515d703a2281236645 100644 --- a/mlair/plotting/postprocessing_plotting.py +++ b/mlair/plotting/postprocessing_plotting.py @@ -171,14 +171,14 @@ class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover warnings.filterwarnings("ignore", message="Attempted to set non-positive bottom ylim on a log-scaled axis.") def __init__(self, stations: List, data_pred_path: str, plot_folder: str = ".", plot_per_seasons=True, - rolling_window: int = 3, model_name: str = "nn", obs_name: str = "obs", **kwargs): + rolling_window: int = 3, forecast_indicator: str = "nn", obs_indicator: str = "obs", **kwargs): """Initialise.""" super().__init__(plot_folder, "conditional_quantiles") self._data_pred_path = data_pred_path self._stations = stations self._rolling_window = rolling_window - self._model_name = model_name - self._obs_name = obs_name + self._forecast_indicator = forecast_indicator + self._obs_name = obs_indicator self._opts = self._get_opts(kwargs) self._seasons = ['DJF', 'MAM', 'JJA', 'SON'] if plot_per_seasons is True else "" self._data = self._load_data() @@ -205,7 +205,8 @@ class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover for station in self._stations: file = os.path.join(self._data_pred_path, f"forecasts_{station}_test.nc") data_tmp = xr.open_dataarray(file) - data_collector.append(data_tmp.loc[:, :, [self._model_name, self._obs_name]].assign_coords(station=station)) + data_collector.append(data_tmp.loc[:, :, [self._forecast_indicator, + self._obs_name]].assign_coords(station=station)) res = xr.concat(data_collector, dim='station').transpose('index', 'type', 'ahead', 'station') return res @@ -312,15 +313,15 @@ class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover def _plot_seasons(self): """Create seasonal plots.""" for season in self._seasons: - self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._model_name, + self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._forecast_indicator, y_model=self._obs_name, plot_name_affix="cali-ref", season=season) self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._obs_name, - y_model=self._model_name, plot_name_affix="like-base", season=season) + y_model=self._forecast_indicator, plot_name_affix="like-base", season=season) def _plot_all(self): """Plot overall conditional quantiles on full data.""" - self._plot_base(data=self._data, x_model=self._model_name, y_model=self._obs_name, plot_name_affix="cali-ref") - self._plot_base(data=self._data, x_model=self._obs_name, y_model=self._model_name, plot_name_affix="like-base") + self._plot_base(data=self._data, x_model=self._forecast_indicator, y_model=self._obs_name, plot_name_affix="cali-ref") + self._plot_base(data=self._data, x_model=self._obs_name, y_model=self._forecast_indicator, plot_name_affix="like-base") @TimeTrackingWrapper def _plot_base(self, data: xr.DataArray, x_model: str, y_model: str, plot_name_affix: str, season: str = ""): @@ -401,14 +402,14 @@ class PlotClimatologicalSkillScore(AbstractPlotClass): # pragma: no cover :param plot_folder: path to save the plot (default: current directory) :param score_only: if true plot only scores of CASE I to IV, otherwise plot all single terms (default True) :param extra_name_tag: additional tag that can be included in the plot name (default "") - :param model_setup: architecture type to specify plot name (default "") + :param model_name: architecture type to specify plot name (default "") """ def __init__(self, data: Dict, plot_folder: str = ".", score_only: bool = True, extra_name_tag: str = "", - model_setup: str = ""): + model_name: str = ""): """Initialise.""" - super().__init__(plot_folder, f"skill_score_clim_{extra_name_tag}{model_setup}") + super().__init__(plot_folder, f"skill_score_clim_{extra_name_tag}{model_name}") self._labels = None self._data = self._prepare_data(data, score_only) self._plot(score_only) @@ -565,13 +566,13 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): # pragma: no cover def _create_pseudo_order(self, data): """Provide first predefined elements and append all remaining.""" - first_elements = [f"{self._model_setup}-persi", "ols-persi", f"{self._model_setup}-ols"] + first_elements = [f"{self._model_setup} - persi", "ols - persi", f"{self._model_setup} - ols"] first_elements = list(filter(lambda x: x in data.comparison.tolist(), first_elements)) uniq, index = np.unique(first_elements + data.comparison.unique().tolist(), return_index=True) return uniq[index.argsort()] def _filter_comparisons(self, data): - filtered_headers = list(filter(lambda x: "nn-" in x, data.comparison.unique())) + filtered_headers = list(filter(lambda x: f"{self._model_setup} - " in x, data.comparison.unique())) return data[data.comparison.isin(filtered_headers)] @staticmethod @@ -606,23 +607,22 @@ class PlotFeatureImportanceSkillScore(AbstractPlotClass): # pragma: no cover """ - def __init__(self, data: Dict, plot_folder: str = ".", model_setup: str = "", separate_vars: List = None, - sampling: str = "daily", ahead_dim: str = "ahead", bootstrap_type: str = None, - bootstrap_method: str = None, boot_dim: str = "boots", model_name: str = "NN", - branch_names: list = None, ylim: tuple = None): + def __init__(self, data: Dict, plot_folder: str = ".", separate_vars: List = None, sampling: str = "daily", + ahead_dim: str = "ahead", bootstrap_type: str = None, bootstrap_method: str = None, + boot_dim: str = "boots", model_name: str = "NN", branch_names: list = None, ylim: tuple = None): """ Set attributes and create plot. :param data: dictionary with station names as keys and 2D xarrays as values, consist on axis ahead and terms. :param plot_folder: path to save the plot (default: current directory) - :param model_setup: architecture type to specify plot name (default "CNN") :param separate_vars: variables to plot separated (default: ['o3']) :param sampling: type of sampling rate, should be either hourly or daily (default: "daily") :param ahead_dim: name of the ahead dimensions (default: "ahead") :param bootstrap_annotation: additional information to use in the file name (default: None) + :param model_name: architecture type to specify plot name (default "NN") """ annotation = ["_".join([s for s in ["", bootstrap_type, bootstrap_method] if s is not None])][0] - super().__init__(plot_folder, f"feature_importance_{model_setup}{annotation}") + super().__init__(plot_folder, f"feature_importance_{model_name}{annotation}") if separate_vars is None: separate_vars = ['o3'] self._labels = None @@ -1053,7 +1053,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover def __init__(self, data: xr.DataArray, plot_folder: str = ".", model_type_dim: str = "type", error_measure: str = "mse", error_unit: str = None, dim_name_boots: str = 'boots', - block_length: str = None): + block_length: str = None, model_name: str = "NN", model_indicator: str = "nn"): super().__init__(plot_folder, "sample_uncertainty_from_bootstrap") default_name = self.plot_name self.model_type_dim = model_type_dim @@ -1061,6 +1061,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover self.dim_name_boots = dim_name_boots self.error_unit = error_unit self.block_length = block_length + data = self.rename_model_indicator(data, model_name, model_indicator) self.prepare_data(data) self._plot(orientation="v") @@ -1078,6 +1079,11 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover self._data_table = None self._n_boots = None + def rename_model_indicator(self, data, model_name, model_indicator): + data.coords[self.model_type_dim] = [{model_indicator: model_name}.get(n, n) + for n in data.coords[self.model_type_dim].values] + return data + def prepare_data(self, data: xr.DataArray): self._data_table = data.to_pandas() if "persi" in self._data_table.columns: diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py index 63be6eb4c6e8b5f8d3149df023e07d23805f077f..70b23c3730d9091d3780746cbb3913eefe4dcf95 100644 --- a/mlair/run_modules/experiment_setup.py +++ b/mlair/run_modules/experiment_setup.py @@ -224,7 +224,7 @@ class ExperimentSetup(RunEnvironment): max_number_multiprocessing: int = None, start_script: Union[Callable, str] = None, overwrite_lazy_data: bool = None, uncertainty_estimate_block_length: str = None, uncertainty_estimate_evaluate_competitors: bool = None, uncertainty_estimate_n_boots: int = None, - do_uncertainty_estimate: bool = None, **kwargs): + do_uncertainty_estimate: bool = None, model_display_name: str = None, **kwargs): # create run framework super().__init__() @@ -377,6 +377,8 @@ class ExperimentSetup(RunEnvironment): default=DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE, scope="feature_importance") self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing") + if model_display_name is not None: + self._set_param("model_display_name", model_display_name) self._set_param("neighbors", ["DEBW030"]) # TODO: just for testing # set competitors diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index 3f20d7b5cd8fa8d57c43f204b537ef02c08a8c95..71c49433f34949b538423312c10152d03312165f 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -95,6 +95,7 @@ class PostProcessing(RunEnvironment): self.uncertainty_estimate_boot_dim = "boots" self.model_type_dim = "type" self.index_dim = "index" + self.model_display_name = self.data_store.get_default("model_display_name", default=self.model.model_name) self._run() def _run(self): @@ -480,7 +481,7 @@ class PostProcessing(RunEnvironment): for boot_method, boot_skill_score in boot_data.items(): try: PlotFeatureImportanceSkillScore( - boot_skill_score, plot_folder=self.plot_path, model_setup=self.forecast_indicator, + boot_skill_score, plot_folder=self.plot_path, model_name=self.model_display_name, sampling=self._sampling, ahead_dim=self.ahead_dim, separate_vars=to_list(self.target_var), bootstrap_type=boot_type, bootstrap_method=boot_method) @@ -493,7 +494,9 @@ class PostProcessing(RunEnvironment): try: if "PlotConditionalQuantiles" in plot_list: - PlotConditionalQuantiles(self.test_data.keys(), data_pred_path=path, plot_folder=self.plot_path) + PlotConditionalQuantiles(self.test_data.keys(), data_pred_path=path, plot_folder=self.plot_path, + forecast_indicator=self.forecast_indicator, + obs_indicator=self.observation_indicator) except Exception as e: logging.error(f"Could not create plot PlotConditionalQuantiles due to the following error:" f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") @@ -509,9 +512,9 @@ class PostProcessing(RunEnvironment): try: if "PlotClimatologicalSkillScore" in plot_list: PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path, - model_setup=self.forecast_indicator) + model_name=self.model_display_name) PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path, score_only=False, - extra_name_tag="all_terms_", model_setup=self.forecast_indicator) + extra_name_tag="all_terms_", model_name=self.model_display_name) except Exception as e: logging.error(f"Could not create plot PlotClimatologicalSkillScore due to the following error: {e}" f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") @@ -519,7 +522,7 @@ class PostProcessing(RunEnvironment): try: if "PlotCompetitiveSkillScore" in plot_list: PlotCompetitiveSkillScore(self.skill_scores[0], plot_folder=self.plot_path, - model_setup=self.forecast_indicator) + model_setup=self.model_display_name) except Exception as e: logging.error(f"Could not create plot PlotCompetitiveSkillScore due to the following error: {e}" f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") @@ -593,7 +596,8 @@ class PostProcessing(RunEnvironment): PlotSampleUncertaintyFromBootstrap( data=self.uncertainty_estimate, plot_folder=self.plot_path, model_type_dim=self.model_type_dim, dim_name_boots=self.uncertainty_estimate_boot_dim, error_measure="mean squared error", - error_unit=r"ppb$^2$", block_length=block_length) + error_unit=r"ppb$^2$", block_length=block_length, model_name=self.model_display_name, + model_indicator=self.forecast_indicator) except Exception as e: logging.error(f"Could not create plot PlotSampleUncertaintyFromBootstrap due to the following error: {e}" f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") @@ -903,7 +907,8 @@ class PostProcessing(RunEnvironment): errors = {} for station in all_stations: external_data = self._get_external_data(station, path) # test data - + external_data.coords[self.model_type_dim] = [{self.forecast_indicator: self.model_display_name}.get(n, n) + for n in external_data.coords[self.model_type_dim].values] # test errors if external_data is not None: model_type_list = external_data.coords[self.model_type_dim].values.tolist() @@ -1022,8 +1027,8 @@ class PostProcessing(RunEnvironment): df.reindex(df.index.drop(["total"]).to_list() + ["total"], ) column_format = tables.create_column_format_for_tex(df) if model_type == "skill_score": - file_name = f"error_report_{model_type}_{metric}.%s".replace(' ', '_') + file_name = f"error_report_{model_type}_{metric}.%s".replace(' ', '_').replace('/', '_') else: - file_name = f"error_report_{metric}_{model_type}.%s".replace(' ', '_') + file_name = f"error_report_{metric}_{model_type}.%s".replace(' ', '_').replace('/', '_') tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df) tables.save_to_md(report_path, file_name % "md", df=df) diff --git a/mlair/run_modules/training.py b/mlair/run_modules/training.py index c076253d92a0e24f419046805687d2a80143176c..8d82afb4c002c660e6fb966945b2e383007d5b70 100644 --- a/mlair/run_modules/training.py +++ b/mlair/run_modules/training.py @@ -70,7 +70,7 @@ class Training(RunEnvironment): self.model: keras.Model = self.data_store.get("model", "model") self.train_set: Union[KerasIterator, None] = None self.val_set: Union[KerasIterator, None] = None - self.test_set: Union[KerasIterator, None] = None + # self.test_set: Union[KerasIterator, None] = None self.batch_size = self.data_store.get("batch_size") self.epochs = self.data_store.get("epochs") self.callbacks: CallbackHandler = self.data_store.get("callbacks", "model") @@ -81,9 +81,9 @@ class Training(RunEnvironment): def _run(self) -> None: """Run training. Details in class description.""" - self.set_generators() self.make_predict_function() if self._train_model: + self.set_generators() self.train() self.save_model() self.report_training() @@ -118,7 +118,9 @@ class Training(RunEnvironment): The called sub-method will automatically distribute the data according to the batch size. The subsets can be accessed as class variables train_set, val_set, and test_set. """ - for mode in ["train", "val", "test"]: + logging.info("set generators for training and validation") + # for mode in ["train", "val", "test"]: + for mode in ["train", "val"]: self._set_gen(mode) def train(self) -> None: diff --git a/test/test_run_modules/test_training.py b/test/test_run_modules/test_training.py index b16c0c2586f87af8368ac0059edc8a3997780f69..1b83b3823519d63d5dcbc10f0e31fc3433f98f34 100644 --- a/test/test_run_modules/test_training.py +++ b/test/test_run_modules/test_training.py @@ -234,7 +234,7 @@ class TestTraining: statistics_per_var, window_history_size, window_lead_time) -> Training: channels = len(list(statistics_per_var.keys())) - model = FCN([(window_history_size + 1, 1, channels)], [window_lead_time]) + model = FCN([(window_history_size + 1, 1, channels)], [window_lead_time]) obj = object.__new__(Training) super(Training, obj).__init__() @@ -306,7 +306,7 @@ class TestTraining: assert init_without_run.train_set._collection.return_value == "mock_train_gen" def test_set_generators(self, init_without_run): - sets = ["train", "val", "test"] + sets = ["train", "val"] assert all([getattr(init_without_run, f"{obj}_set") is None for obj in sets]) init_without_run.set_generators() assert not all([getattr(init_without_run, f"{obj}_set") is None for obj in sets]) @@ -366,10 +366,10 @@ class TestTraining: def test_resume_training1(self, path: str, model_path, batch_path, data_collection, statistics_per_var, window_history_size, window_lead_time): - obj_1st = self.create_training_obj(2, path, data_collection, batch_path, model_path, statistics_per_var, + obj_1st = self.create_training_obj(4, path, data_collection, batch_path, model_path, statistics_per_var, window_history_size, window_lead_time) keras.utils.get_custom_objects().update(obj_1st.model.custom_objects) assert obj_1st._run() is None - obj_2nd = self.create_training_obj(4, path, data_collection, batch_path, model_path, statistics_per_var, + obj_2nd = self.create_training_obj(8, path, data_collection, batch_path, model_path, statistics_per_var, window_history_size, window_lead_time) assert obj_2nd._run() is None