From 4ef91f3062b7641b8cbfde14c43a8dc9bfc57ba2 Mon Sep 17 00:00:00 2001 From: "v.gramlich1" <v.gramlichfz-juelich.de> Date: Thu, 15 Jul 2021 13:11:08 +0200 Subject: [PATCH] Moved Plots from pre_processing to data_insight_plotting. Added PlotOversampling to Default_plot_list. --- mlair/configuration/defaults.py | 2 +- mlair/model_modules/model_class.py | 10 ++-- mlair/plotting/data_insight_plotting.py | 70 +++++++++++++++++++++++++ mlair/run_modules/post_processing.py | 21 +++++++- mlair/run_modules/pre_processing.py | 3 +- run.py | 1 + 6 files changed, 99 insertions(+), 8 deletions(-) diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py index 7b7584ad..fc4f7f09 100644 --- a/mlair/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -48,7 +48,7 @@ DEFAULT_CREATE_NEW_BOOTSTRAPS = False DEFAULT_NUMBER_OF_BOOTSTRAPS = 20 DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles", - "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram"] + "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram", "PlotOversampling"] DEFAULT_SAMPLING = "daily" DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA", "press": "REA", "relhum": "REA", "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "", diff --git a/mlair/model_modules/model_class.py b/mlair/model_modules/model_class.py index 9a0e97db..ec7f2179 100644 --- a/mlair/model_modules/model_class.py +++ b/mlair/model_modules/model_class.py @@ -377,7 +377,7 @@ class IntelliO3_ts_architecture(AbstractModelClass): # apply to model self.set_model() self.set_compile_options() - self.set_custom_objects(loss=self.compile_options["loss"], + self.set_custom_objects(loss=self.compile_options["loss"][0], SymmetricPadding2D=SymmetricPadding2D, LearningRateDecay=LearningRateDecay) @@ -407,14 +407,14 @@ class IntelliO3_ts_architecture(AbstractModelClass): pool_settings_dict1 = {'pool_kernel': (3, 1), 'tower_filter': 16, 'activation': activation} conv_settings_dict2 = { - 'tower_1': {'reduction_filter': 64, 'tower_filter': 32 * 2, 'tower_kernel': (3, 1), + 'tower_1': {'reduction_filter': 64, 'tower_filter': 32 * 2 * 2, 'tower_kernel': (3, 1), 'activation': activation}, - 'tower_2': {'reduction_filter': 64, 'tower_filter': 32 * 2, 'tower_kernel': (5, 1), + 'tower_2': {'reduction_filter': 64, 'tower_filter': 32 * 2 * 2, 'tower_kernel': (5, 1), 'activation': activation}, - 'tower_3': {'reduction_filter': 64, 'tower_filter': 32 * 2, 'tower_kernel': (1, 1), + 'tower_3': {'reduction_filter': 64, 'tower_filter': 32 * 2 * 2, 'tower_kernel': (1, 1), 'activation': activation} } - pool_settings_dict2 = {'pool_kernel': (3, 1), 'tower_filter': 32, 'activation': activation} + pool_settings_dict2 = {'pool_kernel': (3, 1), 'tower_filter': 32*2, 'activation': activation} ########################################## inception_model = InceptionModelBase() diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py index 8a563074..3440321a 100644 --- a/mlair/plotting/data_insight_plotting.py +++ b/mlair/plotting/data_insight_plotting.py @@ -19,6 +19,76 @@ from mlair.data_handler import DataCollection from mlair.helpers import TimeTrackingWrapper, to_list from mlair.plotting.abstract_plot_class import AbstractPlotClass +@TimeTrackingWrapper +class PlotOversamplingHistogram(AbstractPlotClass): + + def __init__(self, Y, Y_extreme, bin_edges, plot_folder: str = ".", + plot_name="oversampling_histogram"): + + super().__init__(plot_folder, plot_name) + self._plot(Y, Y_extreme, bin_edges) + self._save() + + def _plot(self, Y, Y_extreme, bin_edges): + fig, ax = plt.subplots(1, 1) + Y.plot.hist(bins=bin_edges, histtype="step", label="Before", ax=ax)[0] + Y_extreme.plot.hist(bins=bin_edges, histtype="step", label="After", ax=ax)[0] + ax.set_title(f"Histogram before-after oversampling") + ax.legend() + + +@TimeTrackingWrapper +class PlotOversamplingDensityHistogram(AbstractPlotClass): + + def __init__(self, Y, Y_extreme, bin_edges, plot_folder: str = ".", + plot_name="oversampling_density_histogram"): + super().__init__(plot_folder, plot_name) + self._plot(Y, Y_extreme, bin_edges) + self._save() + + def _plot(self, Y, Y_extreme, bin_edges): + fig, ax = plt.subplots(1, 1) + Y.plot.hist(bins=bin_edges, density=True, histtype="step", label="Before", ax=ax)[0] + Y_extreme.plot.hist(bins=bin_edges, density=True, histtype="step", label="After", ax=ax)[0] + ax.set_title(f"Density Histogram before-after oversampling") + ax.legend() + + +@TimeTrackingWrapper +class PlotOversamplingRates(AbstractPlotClass): + + def __init__(self, Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist, plot_folder: str = ".", + plot_name="oversampling_rates"): + super().__init__(plot_folder, plot_name) + self._plot(Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist) + self._save() + + def _plot(self, Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist): + fig, ax = plt.subplots(1, 1) + real_oversampling = Y_extreme_hist[0] / Y_hist[0] + ax.plot(range(len(real_oversampling)), oversampling_rates, label="Desired oversampling_rates") + ax.plot(range(len(real_oversampling)), real_oversampling, label="Actual Oversampling Rates") + ax.set_title(f"Oversampling rates") + ax.legend() + + +@TimeTrackingWrapper +class PlotOversamplingRatesDeviation(AbstractPlotClass): + + def __init__(self, Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist, plot_folder: str = ".", + plot_name="oversampling_rates_deviation"): + super().__init__(plot_folder, plot_name) + self._plot(Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist) + self._save() + + def _plot(self, Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist): + fig, ax = plt.subplots(1, 1) + real_oversampling = Y_extreme_hist[0] / Y_hist[0] + ax.plot(range(len(real_oversampling)), real_oversampling / oversampling_rates, + label="Actual/Desired Rate") + ax.set_title(f"Deviation from desired oversampling rates") + ax.legend() + @TimeTrackingWrapper class PlotStationMap(AbstractPlotClass): # pragma: no cover diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index 89a6f205..742acf2f 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -22,7 +22,8 @@ from mlair.model_modules import AbstractModelClass from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \ PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, PlotSeparationOfScales from mlair.plotting.data_insight_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \ - PlotPeriodogram, PlotDataHistogram + PlotPeriodogram, PlotDataHistogram, PlotOversamplingHistogram, PlotOversamplingDensityHistogram, \ + PlotOversamplingRates, PlotOversamplingRatesDeviation from mlair.run_modules.run_environment import RunEnvironment @@ -305,6 +306,24 @@ class PostProcessing(RunEnvironment): target_dim = self.data_store.get("target_dim") iter_dim = self.data_store.get("iter_dim") + try: + if (self.data_store.get('oversampling_method')=='bin_oversampling') and ( + "PlotOversampling" in plot_list): + bin_edges = self.data_store.get('oversampling_bin_edges') + oversampling_rates = self.data_store.get('oversampling_rates_capped','train') + Y = self.data_store.get('Oversampling_Y') + Y_extreme = self.data_store.get('Oversampling_Y_extreme') + Y_hist = Y.plot.hist(bins=bin_edges, histtype="step") + Y_extreme_hist = Y_extreme.plot.hist(bins=bin_edges, histtype="step") + PlotOversamplingHistogram(Y, Y_extreme, bin_edges, plot_folder=self.plot_path) + PlotOversamplingDensityHistogram(Y, Y_extreme, bin_edges, plot_folder=self.plot_path) + PlotOversamplingRates(Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist, + plot_folder=self.plot_path) + PlotOversamplingRatesDeviation(Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, + Y_extreme_hist, plot_folder=self.plot_path) + except Exception as e: + logging.error(f"Could not create plot OversamplingPlots due to the following error: {e}") + try: if ("filter" in self.test_data[0].get_X(as_numpy=False)[0].coords) and ( "PlotSeparationOfScales" in plot_list): diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index 215c0bb8..69f14bed 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -113,7 +113,8 @@ class PreProcessing(RunEnvironment): else: Y = xr.concat([Y, station._Y], dim="Stations") Y_extreme = xr.concat([Y_extreme, station._Y_extreme], dim="Stations") - + self.data_store.set('Oversampling_Y', Y) + self.data_store.set('Oversampling_Y_extreme', Y_extreme) ''' if not on HPC: fig, ax = plt.subplots(nrows=2, ncols=2) diff --git a/run.py b/run.py index f2bb336e..05b43ade 100644 --- a/run.py +++ b/run.py @@ -25,6 +25,7 @@ def main(parser_args): # stations=["DEBW087","DEBW013", "DEBW107", "DEBW076"], stations=["DEBW013", "DEBW087", "DEBW107", "DEBW076"], train_model=False, create_new_model=True, network="UBA", + oversampling_method="bin_oversampling", oversampling_bins=10, oversampling_rates_cap=100, window_lead_time=2, evaluate_bootstraps=False, # plot_list=["PlotCompetitiveSkillScore"], competitors=["test_model", "test_model2"], competitor_path=os.path.join(os.getcwd(), "data", "comp_test"), -- GitLab