From 8ad74c53051a9e27870c58ef97c5cf5e1c4d51f1 Mon Sep 17 00:00:00 2001 From: "v.gramlich1" <v.gramlichfz-juelich.de> Date: Mon, 2 Aug 2021 16:30:23 +0200 Subject: [PATCH] Made PlotOversamplingContingency plot competitors --- mlair/plotting/data_insight_plotting.py | 11 +- mlair/plotting/postprocessing_plotting.py | 123 ++++++++++++++-------- mlair/run_modules/post_processing.py | 4 +- 3 files changed, 82 insertions(+), 56 deletions(-) diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py index c4c1f4af..0a3b28ce 100644 --- a/mlair/plotting/data_insight_plotting.py +++ b/mlair/plotting/data_insight_plotting.py @@ -23,8 +23,6 @@ from mlair.plotting.abstract_plot_class import AbstractPlotClass @TimeTrackingWrapper class PlotOversampling(AbstractPlotClass): - #Todo: Build histograms correctly - def __init__(self, data, bin_edges, oversampling_rates, plot_folder: str = ".", plot_names=["oversampling_histogram", "oversampling_density_histogram", "oversampling_rates", "oversampling_rates_deviation"]): @@ -59,13 +57,8 @@ class PlotOversampling(AbstractPlotClass): def _plot_oversampling_histogram(self, Y_hist, Y_extreme_hist, bin_edges): fig, ax = plt.subplots(1, 1) - ax.hist(bin_edges[:-1], bin_edges, weights=Y_hist, label="Before oversampling") - ax.hist(bin_edges[:-1], bin_edges, weights=Y_extreme_hist, label="After oversampling") - #ax.plot(bin_edges[:-1] + 0.5 * interval_width, weights, label=f"{subset}", c=colors[subset]) - #ax.step(bin_edges, np.append(0,Y_hist), label="Before oversampling") - #ax.step(bin_edges, np.append(0,Y_extreme_hist), label="After oversampling") - #ax.stairs(Y_hist_dens, bin_edges, label="Before oversampling") - #ax.stairs(Y_extreme_hist_dens, bin_edges, label="After oversampling") + ax.hist(bin_edges[:-1], bin_edges, weights=Y_hist, label="Before oversampling", histtype="step") + ax.hist(bin_edges[:-1], bin_edges, weights=Y_extreme_hist, label="After oversampling", histtype="step") ax.set_title(f"Density Histogram before-after oversampling") ax.legend() diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py index 29ed4054..a0d54c18 100644 --- a/mlair/plotting/postprocessing_plotting.py +++ b/mlair/plotting/postprocessing_plotting.py @@ -30,8 +30,7 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING) @TimeTrackingWrapper class PlotOversamplingContingency(AbstractPlotClass): - #Todo: 1. Make competitors flexible - # 2. Get min and max_label + #Todo: Get min and max_label def __init__(self, station_names, file_path, comp_path, file_name, plot_folder: str = ".", model_name: str = "nn", obs_name: str = "obs", comp_names: str = "IntelliO3", @@ -43,31 +42,40 @@ class PlotOversamplingContingency(AbstractPlotClass): self._file_path = file_path self._comp_path = comp_path self._file_name = file_name - self._model_name = model_name self._obs_name = obs_name + self._model_name = model_name self._comp_names = to_list(comp_names) - true_above, false_above, false_below, true_below, borders = self._calculate_contingencies() - ts, h, f = self._calculate_scores(true_above, false_above, false_below, true_below) - min_label = borders[0] - max_label = borders[1] - plt.plot(range(min_label, max_label), ts, label="threat score") - plt.legend() + self._all_names = [self._model_name] + self._all_names.extend(self._comp_names) + self._plot_names = plot_names + contingency_array, borders = self._calculate_contingencies() + self._scores = ["ts", "h", "f"] + score_array = self._calculate_all_scores(contingency_array) + self._min_label = borders[0] + self._max_label = borders[1] + self._plot_counter = 0 + + self._plot(score_array, "ts") self._save() - self.plot_name = plot_names[1] - plt.plot(range(min_label, max_label), h, label="hit rate") - plt.legend() + self._plot(score_array, "h") self._save() - self.plot_name = plot_names[2] - plt.plot(range(min_label, max_label), f, label="false alarm rate") - plt.legend() + self._plot(score_array, "f") self._save() - self.plot_name = plot_names[3] - plt.plot(range(min_label, max_label), ts, label="threat score") - plt.plot(range(min_label, max_label), h, label="hit rate") - plt.plot(range(min_label, max_label), f, label="false alarm rate") - plt.legend() + self._plot(score_array, "all_scores") self._save() + def _plot(self, data, score): + if score == "all_scores": + for score_name in data.scores.values.tolist(): + plt.plot(range(self._min_label, self._max_label), data.loc[dict(type="nn", scores=score_name)], label=score_name) + else: + for type in data.type.values.tolist(): + plt.plot(range(self._min_label, self._max_label), data.loc[dict(type=type, scores=score)], label=type) + plt.legend() + self.plot_name = self._plot_names[self._plot_counter] + self._plot_counter = self._plot_counter + 1 + + def _create_competitor_forecast(self, station_name: str, competitor_name: str) -> xr.DataArray: """ Load and format the competing forecast of a distinct model indicated by `competitor_name` for a distinct station @@ -109,27 +117,29 @@ class PlotOversamplingContingency(AbstractPlotClass): return xr.concat(competing_predictions, "type") if len(competing_predictions) > 0 else None def _calculate_contingencies(self): + min_label = 0 + max_label = 100 + borders = [min_label, max_label] + thresholds = np.arange(min_label, max_label) + contingency_cell = ["ta", "fa", "fb", "tb"] + contingency_array = xr.DataArray(dims=["thresholds", "contingency_cell", "type"], + coords=[thresholds, contingency_cell, self._all_names]) + contingency_array = contingency_array.fillna(0) for station in self._stations: file = os.path.join(self._file_path, self._file_name % station) forecast_file = xr.open_dataarray(file) obs = forecast_file.sel(type=self._obs_name) - model = forecast_file.sel(type=self._model_name) + predictions = [forecast_file.sel(type=self._model_name)] competitors = [self._load_competitors(station, [comp]).sel(type=comp) for comp in self._comp_names] - min_label = 0 - max_label = 100 - borders = [min_label, max_label] - true_above = [] - false_above = [] - false_below = [] - true_below = [] + predictions.extend(competitors) for threshold in range(min_label, max_label): - ta, fa, fb, tb = self._single_contingency(obs, model, threshold) - true_above.append(ta) - false_above.append(fa) - false_below.append(fb) - true_below.append(tb) - return np.array(true_above), np.array(false_above), np.array(false_below), np.array(true_below), borders - + for pred in predictions: + ta, fa, fb, tb = self._single_contingency(obs, pred, threshold) + contingency_array.loc[dict(thresholds=threshold, contingency_cell="ta", type=pred.type.values)] = ta + contingency_array.loc[dict(thresholds=threshold, contingency_cell="fa", type=pred.type.values)] = fa + contingency_array.loc[dict(thresholds=threshold, contingency_cell="fb", type=pred.type.values)] = fb + contingency_array.loc[dict(thresholds=threshold, contingency_cell="tb", type=pred.type.values)] = tb + return contingency_array, borders def _single_contingency(self, obs, pred, threshold): ta = 0 @@ -151,16 +161,39 @@ class PlotOversamplingContingency(AbstractPlotClass): tb += 1 return ta, fa, fb, tb - def _calculate_scores(self, true_above, false_above, false_below, true_below): - np.seterr(divide="ignore") - np.seterr(divide="ignore") - ts = true_above/(true_above + false_above + false_below) - h = true_above/(true_above + false_below) - f = false_above/(false_above + true_below) - np.nan_to_num(ts, copy=False) - np.nan_to_num(h, copy=False) - np.nan_to_num(f, copy=False) - return ts, h, f + def _calculate_all_scores(self, contingency_array): + score_array = xr.DataArray(dims=["scores", "thresholds", "type"], + coords=[self._scores, contingency_array.thresholds.values, + contingency_array.type.values]) + for type in score_array.type.values.tolist(): + for threshold in score_array.thresholds.values.tolist(): + for score in score_array.scores.values.tolist(): + score_value = self._calculate_scores(contingency_array.loc[dict(type=type, + thresholds=threshold)].values, score) + score_array.loc[dict(type=type, thresholds=threshold, scores=score)] = score_value + return score_array + + def _calculate_scores(self, contingency, score): + true_above = contingency[0] + false_above = contingency[1] + false_below = contingency[2] + true_below = contingency[3] + if score == "ts": + if (true_above + false_above + false_below) == 0: + score_value = 1 + else: + score_value = true_above/(true_above + false_above + false_below) + elif score == "h": + if (true_above + false_below) == 0: + score_value = 1 + else: + score_value = true_above/(true_above + false_below) + elif score == "f": + if (false_above + true_below) == 0: + score_value = 1 + else: + score_value = false_above/(false_above + true_below) + return score_value diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index cef2c651..80d40a66 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -333,8 +333,8 @@ class PostProcessing(RunEnvironment): try: if (self.data_store.get('oversampling_method')=='bin_oversampling') and ( "PlotOversamplingContingency" in plot_list): - PlotOversamplingContingency(station_names=self.test_data.keys(), file_path=path, comp_path=self.competitor_path, - comp_names=self.competitors, + PlotOversamplingContingency(station_names=self.test_data.keys(), file_path=path, + comp_path=self.competitor_path, comp_names=self.competitors, file_name=r"forecasts_%s_test.nc", plot_folder=self.plot_path) except Exception as e: logging.error(f"Could not create plot OversamplingContingencyPlots due to the following error: {e}") -- GitLab