diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py index 7b7584ad5ac5499f63f4f787f2215830a34ab6bb..fc4f7f0910c086bb5bb7f7802e5332404f7b5359 100644 --- a/mlair/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -48,7 +48,7 @@ DEFAULT_CREATE_NEW_BOOTSTRAPS = False DEFAULT_NUMBER_OF_BOOTSTRAPS = 20 DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles", - "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram"] + "PlotAvailability", "PlotAvailabilityHistogram", "PlotDataHistogram", "PlotOversampling"] DEFAULT_SAMPLING = "daily" DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA", "press": "REA", "relhum": "REA", "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "", diff --git a/mlair/model_modules/model_class.py b/mlair/model_modules/model_class.py index 9a0e97dbd1f3a3a52f5717c88d09702e5d0d7928..ec7f217902d7e29b1092ecd1ef51b3fc30602f62 100644 --- a/mlair/model_modules/model_class.py +++ b/mlair/model_modules/model_class.py @@ -377,7 +377,7 @@ class IntelliO3_ts_architecture(AbstractModelClass): # apply to model self.set_model() self.set_compile_options() - self.set_custom_objects(loss=self.compile_options["loss"], + self.set_custom_objects(loss=self.compile_options["loss"][0], SymmetricPadding2D=SymmetricPadding2D, LearningRateDecay=LearningRateDecay) @@ -407,14 +407,14 @@ class IntelliO3_ts_architecture(AbstractModelClass): pool_settings_dict1 = {'pool_kernel': (3, 1), 'tower_filter': 16, 'activation': activation} conv_settings_dict2 = { - 'tower_1': {'reduction_filter': 64, 'tower_filter': 32 * 2, 'tower_kernel': (3, 1), + 'tower_1': {'reduction_filter': 64, 'tower_filter': 32 * 2 * 2, 'tower_kernel': (3, 1), 'activation': activation}, - 'tower_2': {'reduction_filter': 64, 'tower_filter': 32 * 2, 'tower_kernel': (5, 1), + 'tower_2': {'reduction_filter': 64, 'tower_filter': 32 * 2 * 2, 'tower_kernel': (5, 1), 'activation': activation}, - 'tower_3': {'reduction_filter': 64, 'tower_filter': 32 * 2, 'tower_kernel': (1, 1), + 'tower_3': {'reduction_filter': 64, 'tower_filter': 32 * 2 * 2, 'tower_kernel': (1, 1), 'activation': activation} } - pool_settings_dict2 = {'pool_kernel': (3, 1), 'tower_filter': 32, 'activation': activation} + pool_settings_dict2 = {'pool_kernel': (3, 1), 'tower_filter': 32*2, 'activation': activation} ########################################## inception_model = InceptionModelBase() diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py index 8a5630743b4ddc59feb7e989064d13f879ae0199..3440321a8708ee905c5c991c4c8c5a1870719e63 100644 --- a/mlair/plotting/data_insight_plotting.py +++ b/mlair/plotting/data_insight_plotting.py @@ -19,6 +19,76 @@ from mlair.data_handler import DataCollection from mlair.helpers import TimeTrackingWrapper, to_list from mlair.plotting.abstract_plot_class import AbstractPlotClass +@TimeTrackingWrapper +class PlotOversamplingHistogram(AbstractPlotClass): + + def __init__(self, Y, Y_extreme, bin_edges, plot_folder: str = ".", + plot_name="oversampling_histogram"): + + super().__init__(plot_folder, plot_name) + self._plot(Y, Y_extreme, bin_edges) + self._save() + + def _plot(self, Y, Y_extreme, bin_edges): + fig, ax = plt.subplots(1, 1) + Y.plot.hist(bins=bin_edges, histtype="step", label="Before", ax=ax)[0] + Y_extreme.plot.hist(bins=bin_edges, histtype="step", label="After", ax=ax)[0] + ax.set_title(f"Histogram before-after oversampling") + ax.legend() + + +@TimeTrackingWrapper +class PlotOversamplingDensityHistogram(AbstractPlotClass): + + def __init__(self, Y, Y_extreme, bin_edges, plot_folder: str = ".", + plot_name="oversampling_density_histogram"): + super().__init__(plot_folder, plot_name) + self._plot(Y, Y_extreme, bin_edges) + self._save() + + def _plot(self, Y, Y_extreme, bin_edges): + fig, ax = plt.subplots(1, 1) + Y.plot.hist(bins=bin_edges, density=True, histtype="step", label="Before", ax=ax)[0] + Y_extreme.plot.hist(bins=bin_edges, density=True, histtype="step", label="After", ax=ax)[0] + ax.set_title(f"Density Histogram before-after oversampling") + ax.legend() + + +@TimeTrackingWrapper +class PlotOversamplingRates(AbstractPlotClass): + + def __init__(self, Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist, plot_folder: str = ".", + plot_name="oversampling_rates"): + super().__init__(plot_folder, plot_name) + self._plot(Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist) + self._save() + + def _plot(self, Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist): + fig, ax = plt.subplots(1, 1) + real_oversampling = Y_extreme_hist[0] / Y_hist[0] + ax.plot(range(len(real_oversampling)), oversampling_rates, label="Desired oversampling_rates") + ax.plot(range(len(real_oversampling)), real_oversampling, label="Actual Oversampling Rates") + ax.set_title(f"Oversampling rates") + ax.legend() + + +@TimeTrackingWrapper +class PlotOversamplingRatesDeviation(AbstractPlotClass): + + def __init__(self, Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist, plot_folder: str = ".", + plot_name="oversampling_rates_deviation"): + super().__init__(plot_folder, plot_name) + self._plot(Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist) + self._save() + + def _plot(self, Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist): + fig, ax = plt.subplots(1, 1) + real_oversampling = Y_extreme_hist[0] / Y_hist[0] + ax.plot(range(len(real_oversampling)), real_oversampling / oversampling_rates, + label="Actual/Desired Rate") + ax.set_title(f"Deviation from desired oversampling rates") + ax.legend() + @TimeTrackingWrapper class PlotStationMap(AbstractPlotClass): # pragma: no cover diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index 89a6f205d03892c57c55a66399a43c9ba2987b42..742acf2f772abded667bff3a54b17064f5901c4b 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -22,7 +22,8 @@ from mlair.model_modules import AbstractModelClass from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \ PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, PlotSeparationOfScales from mlair.plotting.data_insight_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \ - PlotPeriodogram, PlotDataHistogram + PlotPeriodogram, PlotDataHistogram, PlotOversamplingHistogram, PlotOversamplingDensityHistogram, \ + PlotOversamplingRates, PlotOversamplingRatesDeviation from mlair.run_modules.run_environment import RunEnvironment @@ -305,6 +306,24 @@ class PostProcessing(RunEnvironment): target_dim = self.data_store.get("target_dim") iter_dim = self.data_store.get("iter_dim") + try: + if (self.data_store.get('oversampling_method')=='bin_oversampling') and ( + "PlotOversampling" in plot_list): + bin_edges = self.data_store.get('oversampling_bin_edges') + oversampling_rates = self.data_store.get('oversampling_rates_capped','train') + Y = self.data_store.get('Oversampling_Y') + Y_extreme = self.data_store.get('Oversampling_Y_extreme') + Y_hist = Y.plot.hist(bins=bin_edges, histtype="step") + Y_extreme_hist = Y_extreme.plot.hist(bins=bin_edges, histtype="step") + PlotOversamplingHistogram(Y, Y_extreme, bin_edges, plot_folder=self.plot_path) + PlotOversamplingDensityHistogram(Y, Y_extreme, bin_edges, plot_folder=self.plot_path) + PlotOversamplingRates(Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, Y_extreme_hist, + plot_folder=self.plot_path) + PlotOversamplingRatesDeviation(Y, Y_extreme, bin_edges, oversampling_rates, Y_hist, + Y_extreme_hist, plot_folder=self.plot_path) + except Exception as e: + logging.error(f"Could not create plot OversamplingPlots due to the following error: {e}") + try: if ("filter" in self.test_data[0].get_X(as_numpy=False)[0].coords) and ( "PlotSeparationOfScales" in plot_list): diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index 215c0bb80c05fd9ac267418961c2ca96e025b3e2..69f14bedc0a31afa4965778e33e7c884db77179f 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -113,7 +113,8 @@ class PreProcessing(RunEnvironment): else: Y = xr.concat([Y, station._Y], dim="Stations") Y_extreme = xr.concat([Y_extreme, station._Y_extreme], dim="Stations") - + self.data_store.set('Oversampling_Y', Y) + self.data_store.set('Oversampling_Y_extreme', Y_extreme) ''' if not on HPC: fig, ax = plt.subplots(nrows=2, ncols=2) diff --git a/run.py b/run.py index f2bb336e8a886a3c0c4d60736c77b5ebc27cad67..05b43ade453a4eb36952e18ad1c7ebab788dc37d 100644 --- a/run.py +++ b/run.py @@ -25,6 +25,7 @@ def main(parser_args): # stations=["DEBW087","DEBW013", "DEBW107", "DEBW076"], stations=["DEBW013", "DEBW087", "DEBW107", "DEBW076"], train_model=False, create_new_model=True, network="UBA", + oversampling_method="bin_oversampling", oversampling_bins=10, oversampling_rates_cap=100, window_lead_time=2, evaluate_bootstraps=False, # plot_list=["PlotCompetitiveSkillScore"], competitors=["test_model", "test_model2"], competitor_path=os.path.join(os.getcwd(), "data", "comp_test"),