From d0e229562592ffecc164b8ba1c5770bcbac076bd Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz-juelich.de> Date: Fri, 20 Jan 2023 21:43:10 +0100 Subject: [PATCH] first implementation of bias free evaluation --- mlair/configuration/defaults.py | 1 + mlair/helpers/statistics.py | 13 +++++ mlair/run_modules/experiment_setup.py | 8 ++- mlair/run_modules/post_processing.py | 73 ++++++++++++++++++++++++--- 4 files changed, 87 insertions(+), 8 deletions(-) diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py index d0b35929..b63000c2 100644 --- a/mlair/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -53,6 +53,7 @@ DEFAULT_DO_UNCERTAINTY_ESTIMATE = True DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH = "1m" DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS = True DEFAULT_UNCERTAINTY_ESTIMATE_N_BOOTS = 1000 +DEFAULT_DO_BIAS_FREE_EVALUATION = True DEFAULT_EVALUATE_FEATURE_IMPORTANCE = True DEFAULT_FEATURE_IMPORTANCE_CREATE_NEW_BOOTSTRAPS = False DEFAULT_FEATURE_IMPORTANCE_N_BOOTS = 20 diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py index 9415e0fc..15cd926b 100644 --- a/mlair/helpers/statistics.py +++ b/mlair/helpers/statistics.py @@ -637,3 +637,16 @@ def create_n_bootstrap_realizations(data: xr.DataArray, dim_name_time: str, dim_ realizations[season][boot] = calculate_average(shuffled.sel({dim_name_time: sel}), dim=dim_name_time, skipna=True) return realizations + + +def calculate_bias_free_data(data, time_dim="index", window_size=30): + + # overall mean + overall_mean = data.mean(time_dim) + bias_free_data = data - overall_mean + + # seasonal mean + seasonal_mean = data.rolling(**{time_dim: window_size}, min_periods=int(window_size/2)).mean() + seasonal_bias_free_data = data - seasonal_mean + + return bias_free_data, seasonal_bias_free_data \ No newline at end of file diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py index f89633cb..8bbcfddf 100644 --- a/mlair/run_modules/experiment_setup.py +++ b/mlair/run_modules/experiment_setup.py @@ -24,7 +24,8 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE, DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_METHOD, DEFAULT_OVERWRITE_LAZY_DATA, \ DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH, DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS, \ DEFAULT_UNCERTAINTY_ESTIMATE_N_BOOTS, DEFAULT_DO_UNCERTAINTY_ESTIMATE, DEFAULT_CREATE_SNAPSHOT, \ - DEFAULT_EARLY_STOPPING_EPOCHS, DEFAULT_RESTORE_BEST_MODEL_WEIGHTS, DEFAULT_COMPETITORS + DEFAULT_EARLY_STOPPING_EPOCHS, DEFAULT_RESTORE_BEST_MODEL_WEIGHTS, DEFAULT_COMPETITORS, \ + DEFAULT_DO_BIAS_FREE_EVALUATION from mlair.data_handler import DefaultDataHandler from mlair.run_modules.run_environment import RunEnvironment from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel @@ -240,7 +241,8 @@ class ExperimentSetup(RunEnvironment): max_number_multiprocessing: int = None, start_script: Union[Callable, str] = None, overwrite_lazy_data: bool = None, uncertainty_estimate_block_length: str = None, uncertainty_estimate_evaluate_competitors: bool = None, uncertainty_estimate_n_boots: int = None, - do_uncertainty_estimate: bool = None, model_display_name: str = None, transformation_file: str = None, + do_uncertainty_estimate: bool = None, do_bias_free_evaluation: bool = None, + model_display_name: str = None, transformation_file: str = None, calculate_fresh_transformation: bool = None, snapshot_load_path: str = None, create_snapshot: bool = None, snapshot_path: str = None, **kwargs): @@ -393,6 +395,8 @@ class ExperimentSetup(RunEnvironment): default=DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS, scope="uncertainty_estimate") self._set_param("n_boots", uncertainty_estimate_n_boots, default=DEFAULT_UNCERTAINTY_ESTIMATE_N_BOOTS, scope="uncertainty_estimate") + self._set_param("do_bias_free_evaluation", do_bias_free_evaluation, + default=DEFAULT_DO_BIAS_FREE_EVALUATION, scope="general.postprocessing") self._set_param("evaluate_feature_importance", evaluate_feature_importance, default=DEFAULT_EVALUATE_FEATURE_IMPORTANCE, scope="general.postprocessing") diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index e4cc34f6..cbe112ca 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -89,6 +89,7 @@ class PostProcessing(RunEnvironment): self.window_lead_time = extract_value(self.data_store.get("output_shape", "model")) self.skill_scores = None self.errors = None + self.bias_free_errors = None self.feature_importance_skill_scores = None self.uncertainty_estimate = None self.uncertainty_estimate_seasons = {} @@ -147,7 +148,12 @@ class PostProcessing(RunEnvironment): self.report_error_metrics(errors) self.report_error_metrics({self.forecast_indicator: skill_score_climatological}) self.report_error_metrics({"skill_score": skill_score_competitive}) - self.store_errors(errors) + self.errors = self.store_errors(errors) + + # bias free evaluation + if self.data_store.get("do_bias_free_evaluation", "postprocessing") is True: + bias_free_errors = self.calculate_bias_free_error_metrics() + self.bias_free_errors = [self.store_errors(bias_free_errors[0]), self.store_errors(bias_free_errors[1])] # plotting self.plot() @@ -658,6 +664,29 @@ class PostProcessing(RunEnvironment): logging.error(f"Could not create plot PlotErrorMetrics due to the following error: {e}" f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + try: + if "PlotErrorMetrics" in plot_list and self.bias_free_errors is not None: + error_metric_units = statistics.get_error_metrics_units("ppb") + error_metrics_name = statistics.get_error_metrics_long_name() + tag = {0: "", 1: "seasonal_"} + for i, errors in enumerate(self.bias_free_errors): + for error_metric in errors.keys(): + try: + PlotSampleUncertaintyFromBootstrap( + data=errors[error_metric], plot_folder=self.plot_path, + model_type_dim=self.model_type_dim, + dim_name_boots="station", error_measure=error_metrics_name[error_metric], + error_unit=error_metric_units[error_metric], model_name=self.model_display_name, + model_indicator=self.model_display_name, sampling=self._sampling, apply_root=False, + plot_name=f"{tag[i]}bias_free_error_plot_{error_metric}") + except Exception as e: + logging.error(f"Could not create plot PlotErrorMetrics for {error_metric} (bias free " + f"{tag[i]}) due to the following error: " + f"{e}\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + except Exception as e: + logging.error(f"Could not create plot PlotErrorMetrics (bias free) due to the following error: {e}" + f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + try: if "PlotStationMap" in plot_list: gens = [(self.train_data, {"marker": 5, "ms": 9}), @@ -746,10 +775,6 @@ class PostProcessing(RunEnvironment): logging.error(f"Could not create plot PlotErrorsOnMap due to the following error: {e}" f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") - - - - @TimeTrackingWrapper def calculate_test_score(self): """Evaluate test score of model and save locally.""" @@ -1067,6 +1092,42 @@ class PostProcessing(RunEnvironment): except (TypeError, AttributeError): return forecast if competitor is None else competitor + def calculate_bias_free_error_metrics(self): + all_stations = self.data_store.get("stations") + errors = [{}, {}] # errors_total_bias_free, errors_seasonal_bias_free + for station in all_stations: + external_data = self._get_external_data(station, self.forecast_path) # test data + + # test errors + if external_data is not None: + external_data.coords[self.model_type_dim] = [{self.forecast_indicator: self.model_display_name}.get(n, n) + for n in external_data.coords[self.model_type_dim].values] + model_type_list = external_data.coords[self.model_type_dim].values.tolist() + + # load competitors + competitor = self.load_competitors(station) + combined = self._combine_forecasts(external_data, competitor, dim=self.model_type_dim) + if combined is not None: + model_list = remove_items(combined.coords[self.model_type_dim].values.tolist(), + self.observation_indicator) + else: + model_list = None + + # data_total_bias_free, data_seasonal_bias_free + bias_free_data = statistics.calculate_bias_free_data(combined, time_dim=self.index_dim, window_size=30) + + # test errors of competitors + for model_type in (model_list or []): + for data, e in zip(bias_free_data, errors): + if self.observation_indicator not in data.coords[self.model_type_dim]: + continue + if model_type not in e.keys(): + e[model_type] = {} + e[model_type][station] = statistics.calculate_error_metrics( + *map(lambda x: data.sel(**{self.model_type_dim: x}), + [model_type, self.observation_indicator]), dim=self.index_dim) + return errors + def calculate_error_metrics(self) -> Tuple[Dict, Dict, Dict, Dict]: """ Calculate error metrics and skill scores of NN forecast. @@ -1226,4 +1287,4 @@ class PostProcessing(RunEnvironment): metric_collection[model_type] = xr.Dataset(station_collection).to_array(station_dim) coll = xr.Dataset(metric_collection).to_array(self.model_type_dim) coll = coll.transpose(station_dim, self.ahead_dim, self.model_type_dim, error_dim) - self.errors = {k: coll.sel({error_dim: k}, drop=True) for k in coll.coords[error_dim].values} + return {k: coll.sel({error_dim: k}, drop=True) for k in coll.coords[error_dim].values} -- GitLab