diff --git a/docs/requirements_docs.txt b/docs/requirements_docs.txt index a39acca8a7cf887237f595d7992960ac10233a85..8ccf3ba6515d31ebd2b35901d3c9e58734d653d8 100644 --- a/docs/requirements_docs.txt +++ b/docs/requirements_docs.txt @@ -3,4 +3,5 @@ sphinx-autoapi==1.3.0 sphinx-autodoc-typehints==1.10.3 sphinx-rtd-theme==0.4.3 #recommonmark==0.6.0 -m2r2==0.2.5 \ No newline at end of file +m2r2==0.2.5 +docutils<0.18 \ No newline at end of file diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py index 242dcd31d01134b75f70a12c2708d4c99bb94301..9b79dc6ac815ce6e7f55f67062fc1e6172544512 100644 --- a/mlair/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -44,14 +44,19 @@ DEFAULT_TEST_END = "2017-12-31" DEFAULT_TEST_MIN_LENGTH = 90 DEFAULT_TRAIN_VAL_MIN_LENGTH = 180 DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS = True -DEFAULT_EVALUATE_BOOTSTRAPS = True -DEFAULT_CREATE_NEW_BOOTSTRAPS = False -DEFAULT_NUMBER_OF_BOOTSTRAPS = 20 -DEFAULT_BOOTSTRAP_TYPE = "singleinput" -DEFAULT_BOOTSTRAP_METHOD = "shuffle" +DEFAULT_DO_UNCERTAINTY_ESTIMATE = True +DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH = "1m" +DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS = True +DEFAULT_UNCERTAINTY_ESTIMATE_N_BOOTS = 1000 +DEFAULT_EVALUATE_FEATURE_IMPORTANCE = True +DEFAULT_FEATURE_IMPORTANCE_CREATE_NEW_BOOTSTRAPS = False +DEFAULT_FEATURE_IMPORTANCE_N_BOOTS = 20 +DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE = "singleinput" +DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_METHOD = "shuffle" DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles", - "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram", "PlotPeriodogram"] + "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram", "PlotPeriodogram", + "PlotSampleUncertaintyFromBootstrap"] DEFAULT_SAMPLING = "daily" DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA", "press": "REA", "relhum": "REA", "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "", diff --git a/mlair/data_handler/__init__.py b/mlair/data_handler/__init__.py index 495b6e7c8604a839a084a2b78a54563c13eb06e6..d119977802038155af0d07d99c75c665622a148c 100644 --- a/mlair/data_handler/__init__.py +++ b/mlair/data_handler/__init__.py @@ -9,7 +9,7 @@ __author__ = 'Lukas Leufen, Felix Kleinert' __date__ = '2020-04-17' -from .bootstraps import BootStraps +from .input_bootstraps import Bootstraps from .iterator import KerasIterator, DataCollection from .default_data_handler import DefaultDataHandler from .abstract_data_handler import AbstractDataHandler diff --git a/mlair/data_handler/bootstraps.py b/mlair/data_handler/input_bootstraps.py similarity index 98% rename from mlair/data_handler/bootstraps.py rename to mlair/data_handler/input_bootstraps.py index e03881484bfc9b8275ede8a4432072c74643994a..ab4c71f1c77ab12b0e751f6991fbf20cd55aa8ae 100644 --- a/mlair/data_handler/bootstraps.py +++ b/mlair/data_handler/input_bootstraps.py @@ -28,8 +28,8 @@ class BootstrapIterator(Iterator): _position: int = None - def __init__(self, data: "BootStraps", method): - assert isinstance(data, BootStraps) + def __init__(self, data: "Bootstraps", method): + assert isinstance(data, Bootstraps) self._data = data self._dimension = data.bootstrap_dimension self.boot_dim = "boots" @@ -184,7 +184,7 @@ class MeanBootstraps: return np.ones_like(data) * self._mean -class BootStraps(Iterable): +class Bootstraps(Iterable): """ Main class to perform bootstrap operations. diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py index 543cff3624577ac617733b8b593c5f52f25196b3..36c93b04486fc9be013af2c4f34d2b3ee1bd84c2 100644 --- a/mlair/helpers/filter.py +++ b/mlair/helpers/filter.py @@ -768,6 +768,7 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass): def firwin_kzf(m, k): + m, k = int(m), int(k) coef = np.ones(m) for i in range(1, k): t = np.zeros((m, m + i * (m - 1))) diff --git a/mlair/helpers/helpers.py b/mlair/helpers/helpers.py index 16b36921773a4af131065063de23963a56cb4c65..679f5a28fc564d56cd6f3794ee8fe8e1877b2b4c 100644 --- a/mlair/helpers/helpers.py +++ b/mlair/helpers/helpers.py @@ -118,8 +118,10 @@ def remove_items(obj: Union[List, Dict, Tuple], items: Any): def remove_from_list(list_obj, item_list): """Remove implementation for lists.""" - if len(items) > 1: + if len(item_list) > 1: return [e for e in list_obj if e not in item_list] + elif len(item_list) == 0: + return list_obj else: list_obj = list_obj.copy() try: diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py index 87f780f9a6133edfcb2f9c71c2956b92f332e915..aa89da0ea66e263d076af9abd578ba125c260bec 100644 --- a/mlair/helpers/statistics.py +++ b/mlair/helpers/statistics.py @@ -287,7 +287,7 @@ class SkillScores: combination_strings = [f"{first}-{second}" for (first, second) in combinations] return combinations, combination_strings - def skill_scores(self) -> pd.DataFrame: + def skill_scores(self) -> [pd.DataFrame, pd.DataFrame]: """ Calculate skill scores for all combinations of model names. @@ -296,6 +296,7 @@ class SkillScores: ahead_names = list(self.external_data[self.ahead_dim].data) combinations, combination_strings = self.get_model_name_combinations() skill_score = pd.DataFrame(index=combination_strings) + count = pd.DataFrame(index=combination_strings) for iahead in ahead_names: data = self.external_data.sel({self.ahead_dim: iahead}) skill_score[iahead] = [self.general_skill_score(data, @@ -303,7 +304,12 @@ class SkillScores: reference_name=second, observation_name=self.observation_name) for (first, second) in combinations] - return skill_score + count[iahead] = [self.get_count(data, + forecast_name=first, + reference_name=second, + observation_name=self.observation_name) + for (first, second) in combinations] + return skill_score, count def climatological_skill_scores(self, internal_data: Data, forecast_name: str) -> xr.DataArray: """ @@ -376,6 +382,21 @@ class SkillScores: skill_score = 1 - mse(observation, forecast) / mse(observation, reference) return skill_score.values + def get_count(self, data: Data, forecast_name: str, reference_name: str, + observation_name: str = None) -> np.ndarray: + r""" + Calculate general skill score based on mean squared error. + + :param data: internal data containing data for observation, forecast and reference + :param observation_name: name of observation + :param forecast_name: name of forecast + :param reference_name: name of reference + + :return: skill score of forecast + """ + data = data.dropna("index") + return data.count("index").max().values + def skill_score_pre_calculations(self, data: Data, observation_name: str, forecast_name: str) -> Tuple[np.ndarray, np.ndarray, np.ndarray, @@ -481,3 +502,58 @@ class SkillScores: return monthly_mean + +def create_single_bootstrap_realization(data: xr.DataArray, dim_name_time: str) -> xr.DataArray: + """ + Return a bootstraped realization of data + :param data: data from which to draw ONE bootstrap realization + :param dim_name_time: name of time dimension + :return: bootstrapped realization of data + """ + + num_of_blocks = data.coords[dim_name_time].shape[0] + boot_idx = np.random.choice(num_of_blocks, size=num_of_blocks, replace=True) + return data.isel({dim_name_time: boot_idx}) + + +def calculate_average(data: xr.DataArray, **kwargs) -> xr.DataArray: + """ + Calculate mean of data + :param data: data for which to calculate mean + :return: mean of data + """ + return data.mean(**kwargs) + + +def create_n_bootstrap_realizations(data: xr.DataArray, dim_name_time: str, dim_name_model: str, n_boots: int = 1000, + dim_name_boots: str = 'boots') -> xr.DataArray: + """ + Create n bootstrap realizations and calculate averages across realizations + + :param data: original data from which to create bootstrap realizations + :param dim_name_time: name of time dimension + :param dim_name_model: name of model dimension + :param n_boots: number of bootstap realizations + :param dim_name_boots: name of bootstap dimension + :return: + """ + res_dims = [dim_name_boots] + dims = list(data.dims) + coords = {dim_name_boots: range(n_boots), dim_name_model: data.coords[dim_name_model] } + if len(dims) > 1: + res_dims = res_dims + dims[1:] + res = xr.DataArray(np.nan, dims=res_dims, coords=coords) + for boot in range(n_boots): + res[boot] = (calculate_average( + create_single_bootstrap_realization(data, dim_name_time=dim_name_time), + dim=dim_name_time, skipna=True)) + return res + + + + + + + + + diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py index 68068533cd328b597f6009d91ce96a5cf3550c74..8d4ab2689b1eea24dc9d39d53b04e51405a3a874 100644 --- a/mlair/plotting/data_insight_plotting.py +++ b/mlair/plotting/data_insight_plotting.py @@ -21,6 +21,8 @@ from mlair.data_handler import DataCollection from mlair.helpers import TimeTrackingWrapper, to_list, remove_items from mlair.plotting.abstract_plot_class import AbstractPlotClass +matplotlib.use("Agg") + @TimeTrackingWrapper class PlotStationMap(AbstractPlotClass): # pragma: no cover @@ -632,7 +634,10 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover self._plot_total(raw=True) self._plot_total(raw=False) if multiple > 1: - self._plot_difference(label_names) + self._plot_difference(label_names, plot_name_add="_last") + self._prepare_pgram(generator, pos, multiple, use_multiprocessing=use_multiprocessing, + use_last_input_value=False) + self._plot_difference(label_names, plot_name_add="_first") @staticmethod def _has_filter_dimension(g, pos): @@ -649,7 +654,7 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover return check_data.coords[filter_dim].shape[0], check_data.coords[filter_dim].values.tolist() @TimeTrackingWrapper - def _prepare_pgram(self, generator, pos, multiple=1, use_multiprocessing=False): + def _prepare_pgram(self, generator, pos, multiple=1, use_multiprocessing=False, use_last_input_value=True): """ Create periodogram data. """ @@ -663,7 +668,8 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover plot_data_raw_single = dict() plot_data_mean_single = dict() self.f_index = np.logspace(-3, 0 if self._sampling == "daily" else np.log10(24), 1000) - raw_data_single = self._prepare_pgram_parallel_gen(generator, m, pos, use_multiprocessing) + raw_data_single = self._prepare_pgram_parallel_gen(generator, m, pos, use_multiprocessing, + use_last_input_value=use_last_input_value) for var in raw_data_single.keys(): pgram_com = [] pgram_mean = 0 @@ -715,7 +721,7 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover raw_data_single[var_str] = raw_data_single[var_str] + [(f, pgram)] return raw_data_single - def _prepare_pgram_parallel_gen(self, generator, m, pos, use_multiprocessing): + def _prepare_pgram_parallel_gen(self, generator, m, pos, use_multiprocessing, use_last_input_value=True): """Implementation of data preprocessing using parallel generator element processing.""" raw_data_single = dict() res = [] @@ -723,14 +729,15 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover pool = multiprocessing.Pool( min([psutil.cpu_count(logical=False), len(generator), 16])) # use only physical cpus output = [ - pool.apply_async(f_proc_2, args=(g, m, pos, self.variables_dim, self.time_dim, self.f_index)) + pool.apply_async(f_proc_2, args=(g, m, pos, self.variables_dim, self.time_dim, self.f_index, + use_last_input_value)) for g in generator] for i, p in enumerate(output): res.append(p.get()) pool.close() else: for g in generator: - res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim, self.f_index)) + res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim, self.f_index, use_last_input_value)) for res_dict in res: for k, v in res_dict.items(): if k not in raw_data_single.keys(): @@ -816,8 +823,8 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover pdf_pages.close() plt.close('all') - def _plot_difference(self, label_names): - plot_name = f"{self.plot_name}_{self._sampling}_{self._add_text}_filter.pdf" + def _plot_difference(self, label_names, plot_name_add = ""): + plot_name = f"{self.plot_name}_{self._sampling}_{self._add_text}_filter{plot_name_add}.pdf" plot_path = os.path.join(os.path.abspath(self.plot_folder), plot_name) logging.info(f"... plotting {plot_name}") pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path) @@ -846,19 +853,19 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover plt.close('all') -def f_proc(var, d_var, f_index, time_dim="datetime"): # pragma: no cover +def f_proc(var, d_var, f_index, time_dim="datetime", use_last_value=True): # pragma: no cover var_str = str(var) t = (d_var[time_dim] - d_var[time_dim][0]).astype("timedelta64[h]").values / np.timedelta64(1, "D") if len(d_var.shape) > 1: # use only max value if dimensions are remaining (e.g. max(window) -> latest value) to_remove = remove_items(d_var.coords.dims, time_dim) for e in to_list(to_remove): - d_var = d_var.sel({e: d_var[e].max()}) + d_var = d_var.sel({e: d_var[e].max() if use_last_value is True else d_var[e].min()}) pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").power(f_index) # f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").autopower() return var_str, f_index, pgram -def f_proc_2(g, m, pos, variables_dim, time_dim, f_index): # pragma: no cover +def f_proc_2(g, m, pos, variables_dim, time_dim, f_index, use_last_value): # pragma: no cover raw_data_single = dict() if hasattr(g.id_class, "lazy"): g.id_class.load_lazy() if g.id_class.lazy is True else None @@ -880,7 +887,7 @@ def f_proc_2(g, m, pos, variables_dim, time_dim, f_index): # pragma: no cover d = d[pos] if isinstance(d, tuple) else d for var in d[variables_dim].values: d_var = d.loc[{variables_dim: var}].squeeze().dropna(time_dim) - var_str, f, pgram = f_proc(var, d_var, f_index) + var_str, f, pgram = f_proc(var, d_var, f_index, use_last_value=use_last_value) raw_data_single[var_str] = [(f, pgram)] if hasattr(g.id_class, "lazy"): g.id_class.clean_up() if g.id_class.lazy is True else None diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py index 7ba84656cc00a89a7ec88efcc30bf665e0849e2f..2afb5f08ec6401fa2e693c924cf263204ffbb75d 100644 --- a/mlair/plotting/postprocessing_plotting.py +++ b/mlair/plotting/postprocessing_plotting.py @@ -15,6 +15,7 @@ import pandas as pd import seaborn as sns import xarray as xr from matplotlib.backends.backend_pdf import PdfPages +from matplotlib.offsetbox import AnchoredText from mlair import helpers from mlair.data_handler.iterator import DataCollection @@ -30,7 +31,7 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING) @TimeTrackingWrapper -class PlotMonthlySummary(AbstractPlotClass): +class PlotMonthlySummary(AbstractPlotClass): # pragma: no cover """ Show a monthly summary over all stations for each lead time ("ahead") as box and whiskers plot. @@ -137,7 +138,7 @@ class PlotMonthlySummary(AbstractPlotClass): @TimeTrackingWrapper -class PlotConditionalQuantiles(AbstractPlotClass): +class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover """ Create cond.quantile plots as originally proposed by Murphy, Brown and Chen (1989) [But in log scale]. @@ -381,7 +382,7 @@ class PlotConditionalQuantiles(AbstractPlotClass): @TimeTrackingWrapper -class PlotClimatologicalSkillScore(AbstractPlotClass): +class PlotClimatologicalSkillScore(AbstractPlotClass): # pragma: no cover """ Create plot of climatological skill score after Murphy (1988) as box plot over all stations. @@ -473,7 +474,7 @@ class PlotClimatologicalSkillScore(AbstractPlotClass): @TimeTrackingWrapper -class PlotCompetitiveSkillScore(AbstractPlotClass): +class PlotCompetitiveSkillScore(AbstractPlotClass): # pragma: no cover """ Create competitive skill score plot. @@ -491,12 +492,12 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): """ - def __init__(self, data: pd.DataFrame, plot_folder=".", model_setup="NN"): + def __init__(self, data: Dict[str, pd.DataFrame], plot_folder=".", model_setup="NN"): """Initialise.""" super().__init__(plot_folder, f"skill_score_competitive_{model_setup}") self._model_setup = model_setup self._labels = None - self._data = self._prepare_data(data) + self._data = self._prepare_data(helpers.remove_items(data, "total")) default_plot_name = self.plot_name # draw full detail plot self.plot_name = default_plot_name + "_full_detail" @@ -590,7 +591,7 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): @TimeTrackingWrapper -class PlotBootstrapSkillScore(AbstractPlotClass): +class PlotBootstrapSkillScore(AbstractPlotClass): # pragma: no cover """ Create plot of climatological skill score after Murphy (1988) as box plot over all stations. @@ -839,7 +840,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass): @TimeTrackingWrapper -class PlotTimeSeries: +class PlotTimeSeries: # pragma: no cover """ Create time series plot. @@ -972,7 +973,7 @@ class PlotTimeSeries: @TimeTrackingWrapper -class PlotSeparationOfScales(AbstractPlotClass): +class PlotSeparationOfScales(AbstractPlotClass): # pragma: no cover def __init__(self, collection: DataCollection, plot_folder: str = ".", time_dim="datetime", window_dim="window", filter_dim="filter", target_dim="variables"): @@ -998,6 +999,80 @@ class PlotSeparationOfScales(AbstractPlotClass): self._save() +@TimeTrackingWrapper +class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover + + def __init__(self, data: xr.DataArray, plot_folder: str = ".", model_type_dim: str = "type", + error_measure: str = "mse", error_unit: str = None, dim_name_boots: str = 'boots', + block_length: str = None): + super().__init__(plot_folder, "sample_uncertainty_from_bootstrap") + default_name = self.plot_name + self.model_type_dim = model_type_dim + self.error_measure = error_measure + self.dim_name_boots = dim_name_boots + self.error_unit = error_unit + self.block_length = block_length + self.prepare_data(data) + self._plot(orientation="v") + + self.plot_name = default_name + "_horizontal" + self._plot(orientation="h") + + self._apply_root() + + self.plot_name = default_name + "_sqrt" + self._plot(orientation="v") + + self.plot_name = default_name + "_horizontal_sqrt" + self._plot(orientation="h") + + self._data_table = None + self._n_boots = None + + def prepare_data(self, data: xr.DataArray): + self._data_table = data.to_pandas() + if "persi" in self._data_table.columns: + self._data_table["persi"] = self._data_table.pop("persi") + self._n_boots = self._data_table.shape[0] + + def _apply_root(self): + self._data_table = np.sqrt(self._data_table) + self.error_measure = f"root {self.error_measure}" + self.error_unit = self.error_unit.replace("$^2$", "") + + def _plot(self, orientation: str = "v"): + data_table = self._data_table + n_boots = self._n_boots + size = len(np.unique(data_table.columns)) + if orientation == "v": + figsize, width = (size, 5), 0.4 + elif orientation == "h": + figsize, width = (6, (1+.5*size)), 0.65 + else: + raise ValueError(f"orientation must be `v' or `h' but is: {orientation}") + fig, ax = plt.subplots(figsize=figsize) + sns.boxplot(data=data_table, ax=ax, whis=1., color="white", + showmeans=True, meanprops={"markersize": 6, "markeredgecolor": "k"}, + flierprops={"marker": "o", "markerfacecolor": "black", "markeredgecolor": "none", "markersize": 3}, + boxprops={'facecolor': 'none', 'edgecolor': 'k'}, + width=width, orient=orientation) + if orientation == "v": + ax.set_ylabel(f"{self.error_measure} (in {self.error_unit})") + ax.set_xticklabels(ax.get_xticklabels(), rotation=45) + elif orientation == "h": + ax.set_xlabel(f"{self.error_measure} (in {self.error_unit})") + else: + raise ValueError(f"orientation must be `v' or `h' but is: {orientation}") + text = f"n={n_boots}" if self.block_length is None else f"{self.block_length}, n={n_boots}" + text_box = AnchoredText(text, frameon=True, loc=1, pad=0.5) + plt.setp(text_box.patch, edgecolor='k', facecolor='w') + ax.add_artist(text_box) + plt.setp(ax.lines, color='k') + plt.tight_layout() + self._save() + plt.close("all") + + if __name__ == "__main__": stations = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087'] path = "../../testrun_network/forecasts" diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py index 28277d5057698c01594431008b81d959d415c3e2..63be6eb4c6e8b5f8d3149df023e07d23805f077f 100644 --- a/mlair/run_modules/experiment_setup.py +++ b/mlair/run_modules/experiment_setup.py @@ -18,10 +18,12 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_WINDOW_DIM, DEFAULT_DIMENSIONS, DEFAULT_TIME_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_INTERPOLATION_LIMIT, \ DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \ DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \ - DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \ - DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \ + DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_FEATURE_IMPORTANCE, DEFAULT_FEATURE_IMPORTANCE_CREATE_NEW_BOOTSTRAPS, \ + DEFAULT_FEATURE_IMPORTANCE_N_BOOTS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \ DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG, DEFAULT_MAX_NUMBER_MULTIPROCESSING, \ - DEFAULT_BOOTSTRAP_TYPE, DEFAULT_BOOTSTRAP_METHOD, DEFAULT_OVERWRITE_LAZY_DATA + DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE, DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_METHOD, DEFAULT_OVERWRITE_LAZY_DATA, \ + DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH, DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS, \ + DEFAULT_UNCERTAINTY_ESTIMATE_N_BOOTS, DEFAULT_DO_UNCERTAINTY_ESTIMATE from mlair.data_handler import DefaultDataHandler from mlair.run_modules.run_environment import RunEnvironment from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel @@ -212,14 +214,17 @@ class ExperimentSetup(RunEnvironment): sampling: str = None, create_new_model=None, bootstrap_path=None, permute_data_on_training=None, transformation=None, train_min_length=None, val_min_length=None, test_min_length=None, extreme_values: list = None, - extremes_on_right_tail_only: bool = None, evaluate_bootstraps=None, plot_list=None, - number_of_bootstraps=None, create_new_bootstraps=None, bootstrap_method=None, bootstrap_type=None, + extremes_on_right_tail_only: bool = None, evaluate_feature_importance: bool = None, plot_list=None, + feature_importance_n_boots: int = None, feature_importance_create_new_bootstraps: bool = None, + feature_importance_bootstrap_method=None, feature_importance_bootstrap_type=None, data_path: str = None, batch_path: str = None, login_nodes=None, hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None, data_origin: Dict = None, competitors: list = None, competitor_path: str = None, use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None, max_number_multiprocessing: int = None, start_script: Union[Callable, str] = None, - overwrite_lazy_data: bool = None, **kwargs): + overwrite_lazy_data: bool = None, uncertainty_estimate_block_length: str = None, + uncertainty_estimate_evaluate_competitors: bool = None, uncertainty_estimate_n_boots: int = None, + do_uncertainty_estimate: bool = None, **kwargs): # create run framework super().__init__() @@ -349,15 +354,28 @@ class ExperimentSetup(RunEnvironment): default=DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS) # set post-processing instructions - self._set_param("evaluate_bootstraps", evaluate_bootstraps, default=DEFAULT_EVALUATE_BOOTSTRAPS, - scope="general.postprocessing") - create_new_bootstraps = max([self.data_store.get("train_model", "general"), - create_new_bootstraps or DEFAULT_CREATE_NEW_BOOTSTRAPS]) - self._set_param("create_new_bootstraps", create_new_bootstraps, scope="general.postprocessing") - self._set_param("number_of_bootstraps", number_of_bootstraps, default=DEFAULT_NUMBER_OF_BOOTSTRAPS, - scope="general.postprocessing") - self._set_param("bootstrap_method", bootstrap_method, default=DEFAULT_BOOTSTRAP_METHOD) - self._set_param("bootstrap_type", bootstrap_type, default=DEFAULT_BOOTSTRAP_TYPE) + self._set_param("do_uncertainty_estimate", do_uncertainty_estimate, + default=DEFAULT_DO_UNCERTAINTY_ESTIMATE, scope="general.postprocessing") + self._set_param("block_length", uncertainty_estimate_block_length, + default=DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH, scope="uncertainty_estimate") + self._set_param("evaluate_competitors", uncertainty_estimate_evaluate_competitors, + default=DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS, scope="uncertainty_estimate") + self._set_param("n_boots", uncertainty_estimate_n_boots, + default=DEFAULT_UNCERTAINTY_ESTIMATE_N_BOOTS, scope="uncertainty_estimate") + + self._set_param("evaluate_feature_importance", evaluate_feature_importance, + default=DEFAULT_EVALUATE_FEATURE_IMPORTANCE, scope="general.postprocessing") + feature_importance_create_new_bootstraps = max([self.data_store.get("train_model", "general"), + feature_importance_create_new_bootstraps or + DEFAULT_FEATURE_IMPORTANCE_CREATE_NEW_BOOTSTRAPS]) + self._set_param("create_new_bootstraps", feature_importance_create_new_bootstraps, scope="feature_importance") + self._set_param("n_boots", feature_importance_n_boots, default=DEFAULT_FEATURE_IMPORTANCE_N_BOOTS, + scope="feature_importance") + self._set_param("bootstrap_method", feature_importance_bootstrap_method, + default=DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_METHOD, scope="feature_importance") + self._set_param("bootstrap_type", feature_importance_bootstrap_type, + default=DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE, scope="feature_importance") + self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing") self._set_param("neighbors", ["DEBW030"]) # TODO: just for testing @@ -384,8 +402,10 @@ class ExperimentSetup(RunEnvironment): if len(self.data_store.search_name(k)) == 0: self._set_param(k, v) else: + s = ", ".join([f"{k}({s})={self.data_store.get(k, scope=s)}" + for s in self.data_store.search_name(k)]) raise KeyError(f"Given argument {k} with value {v} cannot be set for this experiment due to a " - f"conflict with an existing entry with same naming: {k}={self.data_store.get(k)}") + f"conflict with an existing entry with same naming: {s}") def _set_param(self, param: str, value: Any, default: Any = None, scope: str = "general", apply: Callable = None) -> Any: diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index fa6326050b611ed4e27d75867e920b7e70d2fa9f..25bd4465556a594393050f80f2a189f4f8039e1b 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -16,13 +16,14 @@ import pandas as pd import xarray as xr from mlair.configuration import path_config -from mlair.data_handler import BootStraps, KerasIterator +from mlair.data_handler import Bootstraps, KerasIterator from mlair.helpers.datastore import NameNotFoundInDataStore from mlair.helpers import TimeTracking, statistics, extract_value, remove_items, to_list, tables from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel from mlair.model_modules import AbstractModelClass from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \ - PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, PlotSeparationOfScales + PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, \ + PlotSeparationOfScales, PlotSampleUncertaintyFromBootstrap from mlair.plotting.data_insight_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \ PlotPeriodogram, PlotDataHistogram from mlair.run_modules.run_environment import RunEnvironment @@ -48,7 +49,7 @@ class PostProcessing(RunEnvironment): * `target_var` [.] * `sampling` [.] * `output_shape` [model] - * `evaluate_bootstraps` [postprocessing] and if enabled: + * `evaluate_feature_importance` [postprocessing] and if enabled: * `create_new_bootstraps` [postprocessing] * `bootstrap_path` [postprocessing] @@ -83,11 +84,17 @@ class PostProcessing(RunEnvironment): self._sampling = self.data_store.get("sampling") self.window_lead_time = extract_value(self.data_store.get("output_shape", "model")) self.skill_scores = None - self.bootstrap_skill_scores = None + self.feature_importance_skill_scores = None + self.uncertainty_estimate = None self.competitor_path = self.data_store.get("competitor_path") self.competitors = to_list(self.data_store.get_default("competitors", default=[])) self.forecast_indicator = "nn" + self.observation_indicator = "obs" self.ahead_dim = "ahead" + self.boot_var_dim = "boot_var" + self.uncertainty_estimate_boot_dim = "boots" + self.model_type_dim = "type" + self.index_dim = "index" self._run() def _run(self): @@ -101,25 +108,132 @@ class PostProcessing(RunEnvironment): # calculate error metrics on test data self.calculate_test_score() - # bootstraps - if self.data_store.get("evaluate_bootstraps", "postprocessing"): - with TimeTracking(name="calculate bootstraps"): - create_new_bootstraps = self.data_store.get("create_new_bootstraps", "postprocessing") - bootstrap_method = self.data_store.get("bootstrap_method", "postprocessing") - bootstrap_type = self.data_store.get("bootstrap_type", "postprocessing") - self.bootstrap_postprocessing(create_new_bootstraps, bootstrap_type=bootstrap_type, - bootstrap_method=bootstrap_method) + # sample uncertainty + if self.data_store.get("do_uncertainty_estimate", "postprocessing"): + self.estimate_sample_uncertainty() + + # feature importance bootstraps + if self.data_store.get("evaluate_feature_importance", "postprocessing"): + with TimeTracking(name="calculate feature importance using bootstraps"): + create_new_bootstraps = self.data_store.get("create_new_bootstraps", "feature_importance") + bootstrap_method = self.data_store.get("bootstrap_method", "feature_importance") + bootstrap_type = self.data_store.get("bootstrap_type", "feature_importance") + self.calculate_feature_importance(create_new_bootstraps, bootstrap_type=bootstrap_type, + bootstrap_method=bootstrap_method) + if self.feature_importance_skill_scores is not None: + self.report_feature_importance_results(self.feature_importance_skill_scores) # skill scores and error metrics with TimeTracking(name="calculate skill scores"): - skill_score_competitive, skill_score_climatological, errors = self.calculate_error_metrics() + skill_score_competitive, _, skill_score_climatological, errors = self.calculate_error_metrics() self.skill_scores = (skill_score_competitive, skill_score_climatological) self.report_error_metrics(errors) - self.report_error_metrics(skill_score_climatological) + self.report_error_metrics({self.forecast_indicator: skill_score_climatological}) + self.report_error_metrics({"skill_score": skill_score_competitive}) # plotting self.plot() + def estimate_sample_uncertainty(self, separate_ahead=False): + """ + Estimate sample uncertainty by using a bootstrap approach. Forecasts are split into individual blocks along time + and randomly drawn with replacement. The resulting behaviour of the error indicates the robustness of each + analyzed model to quantify which model might be superior compared to others. + """ + n_boots = self.data_store.get_default("n_boots", default=1000, scope="uncertainty_estimate") + block_length = self.data_store.get_default("block_length", default="1m", scope="uncertainty_estimate") + evaluate_competitors = self.data_store.get_default("evaluate_competitors", default=True, + scope="uncertainty_estimate") + block_mse = self.calculate_block_mse(evaluate_competitors=evaluate_competitors, separate_ahead=separate_ahead, + block_length=block_length) + self.uncertainty_estimate = statistics.create_n_bootstrap_realizations( + block_mse, dim_name_time=self.index_dim, dim_name_model=self.model_type_dim, + dim_name_boots=self.uncertainty_estimate_boot_dim, n_boots=n_boots) + self.report_sample_uncertainty() + + def report_sample_uncertainty(self, percentiles: list = None): + """ + Store raw results of uncertainty estimate and calculate aggregate statistcs and store as raw data but also as + markdown and latex. + """ + report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report") + path_config.check_path_and_create(report_path) + + # store raw results as nc + file_name = os.path.join(report_path, "uncertainty_estimate_raw_results.nc") + self.uncertainty_estimate.to_netcdf(path=file_name) + + # store statistics + if percentiles is None: + percentiles = [.05, .1, .25, .5, .75, .9, .95] + df_descr = self.uncertainty_estimate.to_pandas().describe(percentiles=percentiles).astype("float32") + column_format = tables.create_column_format_for_tex(df_descr) + file_name = os.path.join(report_path, "uncertainty_estimate_statistics.%s") + tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df_descr) + tables.save_to_md(report_path, file_name % "md", df=df_descr) + df_descr.to_csv(file_name % "csv", sep=";") + + def calculate_block_mse(self, evaluate_competitors=True, separate_ahead=False, block_length="1m"): + """ + Transform data into blocks along time axis. Block length can be any frequency like '1m' or '7d. Data are only + split along time axis, which means that a single block can have very diverse quantities regarding the number of + station or actual data contained. This is intended to analyze not only the robustness against the time but also + against the number of observations and diversity ot stations. + """ + path = self.data_store.get("forecast_path") + all_stations = self.data_store.get("stations") + start = self.data_store.get("start", "test") + end = self.data_store.get("end", "test") + index_dim = self.index_dim + coll_dim = "station" + collector = [] + for station in all_stations: + # test data + external_data = self._get_external_data(station, path) + if external_data is not None: + pass + # competitors + if evaluate_competitors is True: + competitor = self.load_competitors(station) + combined = self._combine_forecasts(external_data, competitor, dim=self.model_type_dim) + else: + combined = external_data + + if combined is None: + continue + else: + combined = self.create_full_time_dim(combined, index_dim, self._sampling, start, end) + # get squared errors + errors = self.create_error_array(combined) + # calc mse for each block (single station) + mse = errors.resample(indexer={index_dim: block_length}).mean(skipna=True) + collector.append(mse.assign_coords({coll_dim: station})) + # calc mse for each block (average over all stations) + mse_blocks = xr.concat(collector, dim=coll_dim).mean(dim=coll_dim, skipna=True) + # average also on ahead steps + if separate_ahead is False: + mse_blocks = mse_blocks.mean(dim=self.ahead_dim, skipna=True) + return mse_blocks + + def create_error_array(self, data): + """Calculate squared error of all given time series in relation to observation.""" + errors = data.drop_sel({self.model_type_dim: self.observation_indicator}) + errors1 = errors - data.sel({self.model_type_dim: self.observation_indicator}) + errors2 = errors1 ** 2 + return errors2 + + @staticmethod + def create_full_time_dim(data, dim, sampling, start, end): + """Ensure time dimension to be equidistant. Sometimes dates if missing values have been dropped.""" + start_data = data.coords[dim].values[0] + freq = {"daily": "1D", "hourly": "1H"}.get(sampling) + datetime_index = pd.DataFrame(index=pd.date_range(start, end, freq=freq)) + t = data.sel({dim: start_data}, drop=True) + res = xr.DataArray(coords=[datetime_index.index, *[t.coords[c] for c in t.coords]], dims=[dim, *t.coords]) + res = res.transpose(*data.dims) + res.loc[data.coords] = data + return res + def load_competitors(self, station_name: str) -> xr.DataArray: """ Load all requested and available competitors for a given station. Forecasts must be available in the competitor @@ -139,10 +253,10 @@ class PostProcessing(RunEnvironment): except (FileNotFoundError, KeyError): logging.debug(f"No competitor found for combination '{station_name}' and '{competitor_name}'.") continue - return xr.concat(competing_predictions, "type") if len(competing_predictions) > 0 else None + return xr.concat(competing_predictions, self.model_type_dim) if len(competing_predictions) > 0 else None - def bootstrap_postprocessing(self, create_new_bootstraps: bool, _iter: int = 0, bootstrap_type="singleinput", - bootstrap_method="shuffle") -> None: + def calculate_feature_importance(self, create_new_bootstraps: bool, _iter: int = 0, bootstrap_type="singleinput", + bootstrap_method="shuffle") -> None: """ Calculate skill scores of bootstrapped data. @@ -155,26 +269,29 @@ class PostProcessing(RunEnvironment): :param _iter: internal counter to reduce unnecessary recursive calls (maximum number is 2, otherwise something went wrong). """ - self.bootstrap_skill_scores = {} + if _iter == 0: + self.feature_importance_skill_scores = {} for boot_type in to_list(bootstrap_type): - self.bootstrap_skill_scores[boot_type] = {} + self.feature_importance_skill_scores[boot_type] = {} for boot_method in to_list(bootstrap_method): try: if create_new_bootstraps: - self.create_bootstrap_forecast(bootstrap_type=boot_type, bootstrap_method=boot_method) - boot_skill_score = self.calculate_bootstrap_skill_scores(bootstrap_type=boot_type, - bootstrap_method=boot_method) - self.bootstrap_skill_scores[boot_type][boot_method] = boot_skill_score + self.create_feature_importance_bootstrap_forecast(bootstrap_type=boot_type, + bootstrap_method=boot_method) + boot_skill_score = self.calculate_feature_importance_skill_scores(bootstrap_type=boot_type, + bootstrap_method=boot_method) + self.feature_importance_skill_scores[boot_type][boot_method] = boot_skill_score except FileNotFoundError: if _iter != 0: - raise RuntimeError(f"bootstrap_postprocessing ({boot_type}, {boot_type}) was called for the 2nd" - f" time. This means, that something internally goes wrong. Please check for " - f"possible errors") - logging.info(f"Could not load all files for bootstrapping ({boot_type}, {boot_type}), restart " - f"bootstrap postprocessing with create_new_bootstraps=True.") - self.bootstrap_postprocessing(True, _iter=1, bootstrap_type=boot_type, bootstrap_method=boot_method) + raise RuntimeError(f"calculate_feature_importance ({boot_type}, {boot_type}) was called for the " + f"2nd time. This means, that something internally goes wrong. Please check " + f"for possible errors") + logging.info(f"Could not load all files for feature importance ({boot_type}, {boot_type}), restart " + f"calculate_feature_importance with create_new_bootstraps=True.") + self.calculate_feature_importance(True, _iter=1, bootstrap_type=boot_type, + bootstrap_method=boot_method) - def create_bootstrap_forecast(self, bootstrap_type, bootstrap_method) -> None: + def create_feature_importance_bootstrap_forecast(self, bootstrap_type, bootstrap_method) -> None: """ Create bootstrapped predictions for all stations and variables. @@ -185,11 +302,11 @@ class PostProcessing(RunEnvironment): with TimeTracking(name=f"{inspect.stack()[0].function} ({bootstrap_type}, {bootstrap_method})"): # extract all requirements from data store forecast_path = self.data_store.get("forecast_path") - number_of_bootstraps = self.data_store.get("number_of_bootstraps", "postprocessing") - dims = ["index", self.ahead_dim, "type"] + number_of_bootstraps = self.data_store.get("n_boots", "feature_importance") + dims = [self.index_dim, self.ahead_dim, self.model_type_dim] for station in self.test_data: X, Y = None, None - bootstraps = BootStraps(station, number_of_bootstraps, bootstrap_type=bootstrap_type, + bootstraps = Bootstraps(station, number_of_bootstraps, bootstrap_type=bootstrap_type, bootstrap_method=bootstrap_method) for boot in bootstraps: X, Y, (index, dimension) = boot @@ -210,10 +327,10 @@ class PostProcessing(RunEnvironment): # store also true labels for each station labels = np.expand_dims(Y, axis=-1) file_name = os.path.join(forecast_path, f"bootstraps_{station}_{bootstrap_method}_labels.nc") - labels = xr.DataArray(labels, coords=(*coords, ["obs"]), dims=dims) + labels = xr.DataArray(labels, coords=(*coords, [self.observation_indicator]), dims=dims) labels.to_netcdf(file_name) - def calculate_bootstrap_skill_scores(self, bootstrap_type, bootstrap_method) -> Dict[str, xr.DataArray]: + def calculate_feature_importance_skill_scores(self, bootstrap_type, bootstrap_method) -> Dict[str, xr.DataArray]: """ Calculate skill score of bootstrapped variables. @@ -226,10 +343,11 @@ class PostProcessing(RunEnvironment): with TimeTracking(name=f"{inspect.stack()[0].function} ({bootstrap_type}, {bootstrap_method})"): # extract all requirements from data store forecast_path = self.data_store.get("forecast_path") - number_of_bootstraps = self.data_store.get("number_of_bootstraps", "postprocessing") + number_of_bootstraps = self.data_store.get("n_boots", "feature_importance") forecast_file = f"forecasts_norm_%s_test.nc" + reference_name = "orig" - bootstraps = BootStraps(self.test_data[0], number_of_bootstraps, bootstrap_type=bootstrap_type, + bootstraps = Bootstraps(self.test_data[0], number_of_bootstraps, bootstrap_type=bootstrap_type, bootstrap_method=bootstrap_method) number_of_bootstraps = bootstraps.number_of_bootstraps bootstrap_iter = bootstraps.bootstraps() @@ -245,8 +363,8 @@ class PostProcessing(RunEnvironment): # get original forecasts orig = self.get_orig_prediction(forecast_path, forecast_file % str(station), number_of_bootstraps) orig = orig.reshape(shape) - coords = (range(shape[0]), range(1, shape[1] + 1), ["orig"]) - orig = xr.DataArray(orig, coords=coords, dims=["index", self.ahead_dim, "type"]) + coords = (range(shape[0]), range(1, shape[1] + 1), [reference_name]) + orig = xr.DataArray(orig, coords=coords, dims=[self.index_dim, self.ahead_dim, self.model_type_dim]) # calculate skill scores for each variable skill = pd.DataFrame(columns=range(1, self.window_lead_time + 1)) @@ -261,11 +379,12 @@ class PostProcessing(RunEnvironment): for ahead in range(1, self.window_lead_time + 1): data = boot_data.sel({self.ahead_dim: ahead}) boot_scores.append( - skill_scores.general_skill_score(data, forecast_name=boot_var, reference_name="orig")) + skill_scores.general_skill_score(data, forecast_name=boot_var, + reference_name=reference_name)) skill.loc[boot_var] = np.array(boot_scores) # collect all results in single dictionary - score[str(station)] = xr.DataArray(skill, dims=["boot_var", self.ahead_dim]) + score[str(station)] = xr.DataArray(skill, dims=[self.boot_var_dim, self.ahead_dim]) return score def get_orig_prediction(self, path, file_name, number_of_bootstraps, prediction_name=None): @@ -342,8 +461,8 @@ class PostProcessing(RunEnvironment): f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") try: - if (self.bootstrap_skill_scores is not None) and ("PlotBootstrapSkillScore" in plot_list): - for boot_type, boot_data in self.bootstrap_skill_scores.items(): + if (self.feature_importance_skill_scores is not None) and ("PlotBootstrapSkillScore" in plot_list): + for boot_type, boot_data in self.feature_importance_skill_scores.items(): for boot_method, boot_skill_score in boot_data.items(): try: PlotBootstrapSkillScore(boot_skill_score, plot_folder=self.plot_path, @@ -453,6 +572,17 @@ class PostProcessing(RunEnvironment): logging.error(f"Could not create plot PlotDataHistogram due to the following error: {e}" f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + try: + if "PlotSampleUncertaintyFromBootstrap" in plot_list and self.uncertainty_estimate is not None: + block_length= self.data_store.get_default("block_length", default="1m", scope="uncertainty_estimate") + PlotSampleUncertaintyFromBootstrap( + data=self.uncertainty_estimate, plot_folder=self.plot_path, model_type_dim=self.model_type_dim, + dim_name_boots=self.uncertainty_estimate_boot_dim, error_measure="mean squared error", + error_unit=r"ppb$^2$", block_length=block_length) + except Exception as e: + logging.error(f"Could not create plot PlotSampleUncertaintyFromBootstrap due to the following error: {e}" + f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + def calculate_test_score(self): """Evaluate test score of model and save locally.""" @@ -512,10 +642,11 @@ class PostProcessing(RunEnvironment): full_index = self.create_fullindex(observation_data.indexes[time_dimension], self._get_frequency()) prediction_dict = {self.forecast_indicator: nn_prediction, "persi": persistence_prediction, - "obs": observation, + self.observation_indicator: observation, "ols": ols_prediction} all_predictions = self.create_forecast_arrays(full_index, list(target_data.indexes[window_dim]), time_dimension, ahead_dim=self.ahead_dim, + index_dim=self.index_dim, type_dim=self.model_type_dim, **prediction_dict) # save all forecasts locally @@ -545,7 +676,7 @@ class PostProcessing(RunEnvironment): with xr.open_dataarray(file) as da: data = da.load() forecast = data.sel(type=[self.forecast_indicator]) - forecast.coords["type"] = [competitor_name] + forecast.coords[self.model_type_dim] = [competitor_name] return forecast def _create_observation(self, data, _, transformation_func: Callable, normalised: bool) -> xr.DataArray: @@ -675,7 +806,7 @@ class PostProcessing(RunEnvironment): @staticmethod def create_forecast_arrays(index: pd.DataFrame, ahead_names: List[Union[str, int]], time_dimension, - ahead_dim="ahead", **kwargs): + ahead_dim="ahead", index_dim="index", type_dim="type", **kwargs): """ Combine different forecast types into single xarray. @@ -688,7 +819,7 @@ class PostProcessing(RunEnvironment): """ keys = list(kwargs.keys()) res = xr.DataArray(np.full((len(index.index), len(ahead_names), len(keys)), np.nan), - coords=[index.index, ahead_names, keys], dims=['index', ahead_dim, 'type']) + coords=[index.index, ahead_names, keys], dims=[index_dim, ahead_dim, type_dim]) for k, v in kwargs.items(): intersection = set(res.index.values) & set(v.indexes[time_dimension].values) match_index = np.array(list(intersection)) @@ -727,18 +858,19 @@ class PostProcessing(RunEnvironment): except (IndexError, KeyError, FileNotFoundError): return None - @staticmethod - def _combine_forecasts(forecast, competitor, dim="type"): + def _combine_forecasts(self, forecast, competitor, dim=None): """ Combine forecast and competitor if both are xarray. If competitor is None, this returns forecasts and vise versa. """ + if dim is None: + dim = self.model_type_dim try: return xr.concat([forecast, competitor], dim=dim) except (TypeError, AttributeError): return forecast if competitor is None else competitor - def calculate_error_metrics(self) -> Tuple[Dict, Dict, Dict]: + def calculate_error_metrics(self) -> Tuple[Dict, Dict, Dict, Dict]: """ Calculate error metrics and skill scores of NN forecast. @@ -751,6 +883,7 @@ class PostProcessing(RunEnvironment): path = self.data_store.get("forecast_path") all_stations = self.data_store.get("stations") skill_score_competitive = {} + skill_score_competitive_count = {} skill_score_climatological = {} errors = {} for station in all_stations: @@ -758,24 +891,61 @@ class PostProcessing(RunEnvironment): # test errors if external_data is not None: - errors[station] = statistics.calculate_error_metrics(*map(lambda x: external_data.sel(type=x), - [self.forecast_indicator, "obs"]), - dim="index") - # skill score + model_type_list = external_data.coords[self.model_type_dim].values.tolist() + for model_type in remove_items(model_type_list, self.observation_indicator): + if model_type not in errors.keys(): + errors[model_type] = {} + errors[model_type][station] = statistics.calculate_error_metrics( + *map(lambda x: external_data.sel(**{self.model_type_dim: x}), + [model_type, self.observation_indicator]), dim=self.index_dim) + + # load competitors competitor = self.load_competitors(station) - combined = self._combine_forecasts(external_data, competitor, dim="type") - model_list = remove_items(list(combined.type.values), "obs") if combined is not None else None + combined = self._combine_forecasts(external_data, competitor, dim=self.model_type_dim) + if combined is not None: + model_list = remove_items(combined.coords[self.model_type_dim].values.tolist(), + self.observation_indicator) + else: + model_list = None + + # test errors of competitors + for model_type in remove_items(model_list or [], list(errors.keys())): + if self.observation_indicator not in combined.coords[self.model_type_dim]: + continue + if model_type not in errors.keys(): + errors[model_type] = {} + errors[model_type][station] = statistics.calculate_error_metrics( + *map(lambda x: combined.sel(**{self.model_type_dim: x}), + [model_type, self.observation_indicator]), dim=self.index_dim) + + # skill score skill_score = statistics.SkillScores(combined, models=model_list, ahead_dim=self.ahead_dim) if external_data is not None: - skill_score_competitive[station] = skill_score.skill_scores() + skill_score_competitive[station], skill_score_competitive_count[station] = skill_score.skill_scores() internal_data = self._get_internal_data(station, path) if internal_data is not None: skill_score_climatological[station] = skill_score.climatological_skill_scores( internal_data, forecast_name=self.forecast_indicator) - errors.update({"total": self.calculate_average_errors(errors)}) - return skill_score_competitive, skill_score_climatological, errors + for model_type in errors.keys(): + errors[model_type].update({"total": self.calculate_average_errors(errors[model_type])}) + skill_score_competitive.update({"total": self.calculate_average_skill_scores(skill_score_competitive, + skill_score_competitive_count)}) + return skill_score_competitive, skill_score_competitive_count, skill_score_climatological, errors + + @staticmethod + def calculate_average_skill_scores(scores, counts): + avg_skill_score = 0 + n_total = None + for vals in counts.values(): + n_total = vals if n_total is None else n_total.add(vals, fill_value=0) + for station, station_scores in scores.items(): + n = counts.get(station) + avg_skill_score = station_scores.mul(n.div(n_total, fill_value=0), fill_value=0).add(avg_skill_score, + fill_value=0) + return avg_skill_score + @staticmethod def calculate_average_errors(errors): @@ -788,28 +958,55 @@ class PostProcessing(RunEnvironment): avg_error[error_metric] = new_val return avg_error + def report_feature_importance_results(self, results): + """Create a csv file containing all results from feature importance.""" + report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report") + path_config.check_path_and_create(report_path) + res = [[self.model_type_dim, "method", "station", self.boot_var_dim, self.ahead_dim, "vals"]] + for boot_type, d0 in results.items(): + for boot_method, d1 in d0.items(): + for station_name, vals in d1.items(): + for boot_var in vals.coords[self.boot_var_dim].values.tolist(): + for ahead in vals.coords[self.ahead_dim].values.tolist(): + res.append([boot_type, boot_method, station_name, boot_var, ahead, + float(vals.sel({self.boot_var_dim: boot_var, self.ahead_dim: ahead}))]) + col_names = res.pop(0) + df = pd.DataFrame(res, columns=col_names) + file_name = "feature_importance_skill_score_report_raw.csv" + df.to_csv(os.path.join(report_path, file_name), sep=";") + def report_error_metrics(self, errors): report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report") path_config.check_path_and_create(report_path) - metric_collection = {} - for station, station_errors in errors.items(): - if isinstance(station_errors, xr.DataArray): - dim = station_errors.dims[0] - sel_index = [sel for sel in station_errors.coords[dim] if "CASE" in str(sel)] - station_errors = {str(i.values): station_errors.sel(**{dim: i}) for i in sel_index} - for metric, vals in station_errors.items(): - if metric == "n": - continue - pd_vals = pd.DataFrame.from_dict({station: vals}).T - pd_vals.columns = [f"{metric}(t+{x})" for x in vals.coords["ahead"].values] - mc = metric_collection.get(metric, pd.DataFrame()) - mc = mc.append(pd_vals) - metric_collection[metric] = mc - for metric, error_df in metric_collection.items(): - df = error_df.sort_index() - if "total" in df.index: - df.reindex(df.index.drop(["total"]).to_list() + ["total"], ) - column_format = tables.create_column_format_for_tex(df) - file_name = f"error_report_{metric}.%s".replace(' ', '_') - tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df) - tables.save_to_md(report_path, file_name % "md", df=df) + for model_type in errors.keys(): + metric_collection = {} + for station, station_errors in errors[model_type].items(): + if isinstance(station_errors, xr.DataArray): + dim = station_errors.dims[0] + sel_index = [sel for sel in station_errors.coords[dim] if "CASE" in str(sel)] + station_errors = {str(i.values): station_errors.sel(**{dim: i}) for i in sel_index} + elif isinstance(station_errors, pd.DataFrame): + sel_index = station_errors.index.tolist() + ahead = station_errors.columns.values + station_errors = {k: xr.DataArray(station_errors[station_errors.index == k].values.flatten(), + dims=["ahead"], coords={"ahead": ahead}).astype(float) + for k in sel_index} + for metric, vals in station_errors.items(): + if metric == "n": + metric = "count" + pd_vals = pd.DataFrame.from_dict({station: vals}).T + pd_vals.columns = [f"{metric}(t+{x})" for x in vals.coords["ahead"].values] + mc = metric_collection.get(metric, pd.DataFrame()) + mc = mc.append(pd_vals) + metric_collection[metric] = mc + for metric, error_df in metric_collection.items(): + df = error_df.sort_index() + if "total" in df.index: + df.reindex(df.index.drop(["total"]).to_list() + ["total"], ) + column_format = tables.create_column_format_for_tex(df) + if model_type == "skill_score": + file_name = f"error_report_{model_type}_{metric}.%s".replace(' ', '_') + else: + file_name = f"error_report_{metric}_{model_type}.%s".replace(' ', '_') + tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df) + tables.save_to_md(report_path, file_name % "md", df=df) diff --git a/mlair/workflows/default_workflow.py b/mlair/workflows/default_workflow.py index 5894555a6af52299efcd8d88d76c0d3791a1599e..961979cb774e928bda96d4cd1a3a7b0f8565e968 100644 --- a/mlair/workflows/default_workflow.py +++ b/mlair/workflows/default_workflow.py @@ -31,7 +31,6 @@ class DefaultWorkflow(Workflow): permute_data_on_training=None, extreme_values=None, extremes_on_right_tail_only=None, transformation=None, train_min_length=None, val_min_length=None, test_min_length=None, - evaluate_bootstraps=None, number_of_bootstraps=None, create_new_bootstraps=None, plot_list=None, model=None, batch_size=None, diff --git a/run.py b/run.py index 1d607acfc8a789080a38da9ee546e516be845797..82bb0e2814d403b5be602eaebd1bc44b6cf6d6f9 100644 --- a/run.py +++ b/run.py @@ -28,8 +28,8 @@ def main(parser_args): # stations=["DEBW087","DEBW013", "DEBW107", "DEBW076"], stations=["DEBW013", "DEBW087", "DEBW107", "DEBW076"], train_model=False, create_new_model=True, network="UBA", - evaluate_bootstraps=False, # plot_list=["PlotCompetitiveSkillScore"], - competitors=["test_model", "test_model2"], # model=chosen_model, + evaluate_feature_importance=False, # plot_list=["PlotCompetitiveSkillScore"], + competitors=["test_model", "test_model2"], competitor_path=os.path.join(os.getcwd(), "data", "comp_test"), **parser_args.__dict__, start_script=__file__) workflow.run() diff --git a/run_climate_filter.py b/run_climate_filter.py index 5577375e2fc135676f71151791c1d564dcb25a2e..7f3fcbaaba94506faeadd884073620496155f8ea 100644 --- a/run_climate_filter.py +++ b/run_climate_filter.py @@ -65,9 +65,9 @@ def main(parser_args): train_model=False, create_new_model=True, epochs=1, model=NN, - evaluate_bootstraps=False, - bootstrap_type=["singleinput", "branch", "variable"], - bootstrap_method=["shuffle", "zero_mean"], + evaluate_feature_importance=False, + feature_importance_bootstrap_type=["singleinput", "branch", "variable"], + feature_importance_bootstrap_method=["shuffle", "zero_mean"], plot_list=DEFAULT_PLOT_LIST, # competitors=["IntelliO3-ts-v1", "MFCN_64_32_16_climFIR", "FCN_1449_512_32_4"], # "FCN_1449_16_8_4",], lazy_preprocessing=True, diff --git a/run_mixed_sampling.py b/run_mixed_sampling.py index 819ef51129854b4539632ef91a55e33a2607eb55..784f653fbfb2eb4c78e6e858acf67cd0ae47a593 100644 --- a/run_mixed_sampling.py +++ b/run_mixed_sampling.py @@ -20,7 +20,7 @@ data_origin = {'o3': '', 'no': '', 'no2': '', def main(parser_args): args = dict(stations=["DEBW107", "DEBW013"], network="UBA", - evaluate_bootstraps=False, plot_list=[], + evaluate_feature_importance=False, plot_list=[], data_origin=data_origin, data_handler=DataHandlerMixedSampling, interpolation_limit=(3, 1), overwrite_local_data=False, sampling=("hourly", "daily"), diff --git a/test/test_configuration/test_defaults.py b/test/test_configuration/test_defaults.py index b6bdd9556f73ff711003b01c3a2b65a1c20c66d3..8644181185203186bb6c8549e8faa99e75a31a81 100644 --- a/test/test_configuration/test_defaults.py +++ b/test/test_configuration/test_defaults.py @@ -62,10 +62,18 @@ class TestAllDefaults: assert DEFAULT_HPC_LOGIN_LIST == ["ju", "hdfmll"] def test_postprocessing_parameters(self): - assert DEFAULT_EVALUATE_BOOTSTRAPS is True - assert DEFAULT_CREATE_NEW_BOOTSTRAPS is False - assert DEFAULT_NUMBER_OF_BOOTSTRAPS == 20 + assert DEFAULT_DO_UNCERTAINTY_ESTIMATE is True + assert DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH == "1m" + assert DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS is True + assert DEFAULT_UNCERTAINTY_ESTIMATE_N_BOOTS == 1000 + assert DEFAULT_EVALUATE_FEATURE_IMPORTANCE is True + assert DEFAULT_FEATURE_IMPORTANCE_CREATE_NEW_BOOTSTRAPS is False + assert DEFAULT_FEATURE_IMPORTANCE_N_BOOTS == 20 + assert DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE == "singleinput" + assert DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_METHOD == "shuffle" assert DEFAULT_PLOT_LIST == ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles", "PlotAvailability", "PlotAvailabilityHistogram", - "PlotDataHistogram", "PlotPeriodogram"] + "PlotDataHistogram", "PlotPeriodogram", "PlotSampleUncertaintyFromBootstrap"] + + diff --git a/test/test_data_handler/old_t_bootstraps.py b/test/test_data_handler/old_t_bootstraps.py index 21c18c6c2d6f6a6a38a41250f00d3d14a29ed457..e21af9f614d4cfaaadf946cdab72cc69cd5b19a7 100644 --- a/test/test_data_handler/old_t_bootstraps.py +++ b/test/test_data_handler/old_t_bootstraps.py @@ -7,7 +7,7 @@ import numpy as np import pytest import xarray as xr -from mlair.data_handler.bootstraps import BootStraps +from mlair.data_handler.input_bootstraps import Bootstraps from src.data_handler import DataPrepJoin @@ -171,22 +171,22 @@ class TestBootStraps: @pytest.fixture def bootstrap(self, orig_generator, data_path): - return BootStraps(orig_generator, data_path, 20) + return Bootstraps(orig_generator, data_path, 20) @pytest.fixture @mock.patch("mlair.data_handling.bootstraps.CreateShuffledData", return_value=None) def bootstrap_no_shuffling(self, mock_create_shuffle_data, orig_generator, data_path): shutil.rmtree(data_path) - return BootStraps(orig_generator, data_path, 20) + return Bootstraps(orig_generator, data_path, 20) def test_init_no_shuffling(self, bootstrap_no_shuffling, data_path): - assert isinstance(bootstrap_no_shuffling, BootStraps) + assert isinstance(bootstrap_no_shuffling, Bootstraps) assert bootstrap_no_shuffling.number_of_bootstraps == 20 assert bootstrap_no_shuffling.bootstrap_path == data_path def test_init_with_shuffling(self, orig_generator, data_path, caplog): caplog.set_level(logging.INFO) - BootStraps(orig_generator, data_path, 20) + Bootstraps(orig_generator, data_path, 20) assert caplog.record_tuples[0] == ('root', logging.INFO, "create / check shuffled bootstrap data") def test_stations(self, bootstrap_no_shuffling, orig_generator): @@ -213,9 +213,9 @@ class TestBootStraps: @mock.patch("mlair.data_handling.data_generator.DataGenerator._load_pickle_data", side_effect=FileNotFoundError) def test_get_generator_different_generator(self, mock_load_pickle, data_path, orig_generator): - BootStraps(orig_generator, data_path, 20) # to create + Bootstraps(orig_generator, data_path, 20) # to create orig_generator.window_history_size = 4 - bootstrap = BootStraps(orig_generator, data_path, 20) + bootstrap = Bootstraps(orig_generator, data_path, 20) station = bootstrap.stations[0] var = bootstrap.variables[0] var_others = bootstrap.variables[1:] diff --git a/test/test_helpers/test_statistics.py b/test/test_helpers/test_statistics.py index 2a77f0806b886bee1a3961ff0a972e8f0ee62873..f5148cdc293939d5711afb57c2fa009c47b6c86d 100644 --- a/test/test_helpers/test_statistics.py +++ b/test/test_helpers/test_statistics.py @@ -4,7 +4,8 @@ import pytest import xarray as xr from mlair.helpers.statistics import standardise, standardise_inverse, standardise_apply, centre, centre_inverse, \ - centre_apply, apply_inverse_transformation, min_max, min_max_inverse, min_max_apply, log, log_inverse, log_apply + centre_apply, apply_inverse_transformation, min_max, min_max_inverse, min_max_apply, log, log_inverse, log_apply, \ + create_single_bootstrap_realization, calculate_average, create_n_bootstrap_realizations lazy = pytest.lazy_fixture @@ -221,3 +222,36 @@ class TestLog: data_ref, opts = log(data_orig, dim) data_test = log_apply(data_orig, opts["mean"], opts["std"]) assert np.testing.assert_array_almost_equal(data_ref, data_test) is None + + +class TestCreateBootstrapRealizations: + + @pytest.fixture + def data(self): + return xr.DataArray(np.array(range(20)).reshape(2 , -1).T, + dims={'time': range(10), 'model': ['m1', 'm2']}, + coords={'time': range(10), 'model': ['m1', 'm2']}) + + def test_create_single_bootstrap_realization(self, data): + np.random.seed(42) + proc_data = create_single_bootstrap_realization(data, "time") + assert isinstance(proc_data, xr.DataArray) + assert (proc_data.coords['time'].values == np.array([6, 3, 7, 4, 6, 9, 2, 6, 7, 4])).all() + # check if all time index values of proc_data are from data + assert np.in1d(proc_data.indexes['time'].values, data.indexes['time'].values).all() + + def test_calculate_average(self, data): + assert isinstance(data, xr.DataArray) + assert calculate_average(data) == data.mean() + assert (calculate_average(data, axis=0) == data.mean(axis=0)).all() + + def test_create_n_bootstrap_realizations(self, data): + boot_data = create_n_bootstrap_realizations(data, dim_name_time='time', dim_name_model='model', + n_boots=1000, dim_name_boots='boots') + assert isinstance(boot_data, xr.DataArray) + assert boot_data.shape == (1000, 2) + + boot_data = create_n_bootstrap_realizations(data.sel(model='m1').squeeze(), dim_name_time='time', + dim_name_model='model', n_boots=1000, dim_name_boots='boots') + assert isinstance(boot_data, xr.DataArray) + assert boot_data.shape == (1000,)