diff --git a/HPC_setup/requirements_HDFML_additionals.txt b/HPC_setup/requirements_HDFML_additionals.txt index 1a9e8524906115e02338dcf80137081ab7165697..9f19c839e7d364647c609a223eb047a7f7fab6a8 100644 --- a/HPC_setup/requirements_HDFML_additionals.txt +++ b/HPC_setup/requirements_HDFML_additionals.txt @@ -11,7 +11,7 @@ packaging==21.3 timezonefinder==5.2.0 patsy==0.5.2 statsmodels==0.13.2 -seaborn==0.11.2 +seaborn==0.12.0 xarray==0.16.2 tabulate==0.8.10 wget==3.2 diff --git a/HPC_setup/requirements_JUWELS_additionals.txt b/HPC_setup/requirements_JUWELS_additionals.txt index 1a9e8524906115e02338dcf80137081ab7165697..9f19c839e7d364647c609a223eb047a7f7fab6a8 100644 --- a/HPC_setup/requirements_JUWELS_additionals.txt +++ b/HPC_setup/requirements_JUWELS_additionals.txt @@ -11,7 +11,7 @@ packaging==21.3 timezonefinder==5.2.0 patsy==0.5.2 statsmodels==0.13.2 -seaborn==0.11.2 +seaborn==0.12.0 xarray==0.16.2 tabulate==0.8.10 wget==3.2 diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py index 9bb15068ce3a5ad934f7b0251b84cb19f37702f6..d0b3592905c32a4c7af875014532a5539805dd3a 100644 --- a/mlair/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -48,7 +48,7 @@ DEFAULT_TEST_END = "2017-12-31" DEFAULT_TEST_MIN_LENGTH = 90 DEFAULT_TRAIN_VAL_MIN_LENGTH = 180 DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS = True -DEFAULT_COMPETITORS = ["ols"] +DEFAULT_COMPETITORS = ["ols", "persi"] DEFAULT_DO_UNCERTAINTY_ESTIMATE = True DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH = "1m" DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS = True diff --git a/mlair/model_modules/branched_input_networks.py b/mlair/model_modules/branched_input_networks.py index af3a8bffa3169556d55af94192915e3a27f89cc1..acb83967b783a806e4221738c1e7144f6850c593 100644 --- a/mlair/model_modules/branched_input_networks.py +++ b/mlair/model_modules/branched_input_networks.py @@ -1,3 +1,4 @@ +import logging from functools import partial, reduce import copy from typing import Union @@ -9,6 +10,8 @@ from mlair.helpers import select_from_dict, to_list from mlair.model_modules.loss import var_loss from mlair.model_modules.recurrent_networks import RNN from mlair.model_modules.convolutional_networks import CNNfromConfig +from mlair.model_modules.residual_networks import ResNet +from mlair.model_modules.u_networks import UNet class BranchedInputCNN(CNNfromConfig): # pragma: no cover @@ -367,3 +370,189 @@ class BranchedInputFCN(AbstractModelClass): # pragma: no cover "metrics": ["mse", "mae", var_loss]} # self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss], loss_weights=[2, 1])], # "metrics": ["mse", "mae", var_loss]} + + +class BranchedInputUNet(UNet, BranchedInputCNN): # pragma: no cover + """ + A U-net neural network with multiple input branches. + + ```python + + input_shape = [(72,1,9),(72,1,9),] + output_shape = [(4, )] + + # model + layer_configuration=[ + + # 1st block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "blocksave"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + + # 2nd block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "blocksave"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + + # 3rd block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "blocksave"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + + # 4th block (final down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + + # 5th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 64, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + + # 6th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 32, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + + # 7th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 16, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + + # Tail + {"type": "Concatenate"}, + {"type": "Flatten"}, + {"type": "Dense", "units": 128, "activation": "relu"} + ] + + model = BranchedInputUNet(input_shape, output_shape, layer_configuration) + ``` + + """ + + def __init__(self, input_shape, output_shape, layer_configuration: list, optimizer="adam", **kwargs): + + super(BranchedInputUNet, self).__init__(input_shape, output_shape, layer_configuration, optimizer=optimizer, **kwargs) + + def set_model(self): + + x_input = [] + x_in = [] + stop_pos = None + block_save = [] + + for branch in range(len(self._input_shape)): + print(branch) + block_save = [] + shape_b = self._input_shape[branch] + x_input_b = keras.layers.Input(shape=shape_b, name=f"input_branch{branch + 1}") + x_input.append(x_input_b) + x_in_b = x_input_b + b_conf = copy.deepcopy(self.conf) + + for pos, layer_opts in enumerate(b_conf): + print(layer_opts) + if layer_opts.get("type") == "Concatenate": + if stop_pos is None: + stop_pos = pos + else: + assert pos == stop_pos + break + layer, layer_kwargs, follow_up_layer = self._extract_layer_conf(layer_opts) + if layer == "blocksave": + block_save.append(x_in_b) + continue + layer_name = self._get_layer_name(layer, layer_kwargs, pos, branch) + if "Concatenate" in layer_name: + x_in_b = layer(name=layer_name)([x_in_b, block_save.pop(-1)]) + self._layer_save.append({"layer": layer, "follow_up_layer": follow_up_layer}) + continue + x_in_b = layer(**layer_kwargs, name=layer_name)(x_in_b) + if follow_up_layer is not None: + for follow_up in to_list(follow_up_layer): + layer_name = self._get_layer_name(follow_up, None, pos, branch) + x_in_b = follow_up(name=layer_name)(x_in_b) + self._layer_save.append({"layer": layer, **layer_kwargs, "follow_up_layer": follow_up_layer, + "branch": branch}) + x_in.append(x_in_b) + + print("concat") + x_concat = keras.layers.Concatenate()(x_in) + if len(block_save) > 0: + logging.warning(f"Branches of BranchedInputUNet are concatenated before last upsampling block is applied.") + block_save = [] + + if stop_pos is not None: + for pos, layer_opts in enumerate(self.conf[stop_pos + 1:]): + print(layer_opts) + layer, layer_kwargs, follow_up_layer = self._extract_layer_conf(layer_opts) + if layer == "blocksave": + block_save.append(x_concat) + continue + layer_name = self._get_layer_name(layer, layer_kwargs, pos, None) + if "Concatenate" in layer_name: + x_concat = layer(name=layer_name)([x_concat, block_save.pop(-1)]) + self._layer_save.append({"layer": layer, "follow_up_layer": follow_up_layer}) + continue + x_concat = layer(**layer_kwargs, name=layer_name)(x_concat) + if follow_up_layer is not None: + for follow_up in to_list(follow_up_layer): + layer_name = self._get_layer_name(follow_up, None, pos + stop_pos, None) + x_concat = follow_up(name=layer_name)(x_concat) + self._layer_save.append({"layer": layer, **layer_kwargs, "follow_up_layer": follow_up_layer, + "branch": "concat"}) + + x_concat = keras.layers.Dense(self._output_shape)(x_concat) + out = self.activation_output(name=f"{self.activation_output_name}_output")(x_concat) + self.model = keras.Model(inputs=x_input, outputs=[out]) + print(self.model.summary()) + + +class BranchedInputResNet(ResNet, BranchedInputCNN): # pragma: no cover + """ + A convolutional neural network with multiple input branches and residual blocks (skip connections). + + ```python + input_shape = [(65,1,9), (65,1,9)] + output_shape = [(4, )] + + # model + layer_configuration=[ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, + {"type": "Dense", "units": 128, "activation": "relu"} + ] + + model = BranchedInputResNet(input_shape, output_shape, layer_configuration) + ``` + + """ + + def __init__(self, input_shape: list, output_shape: list, layer_configuration: list, optimizer="adam", **kwargs): + + super().__init__(input_shape, output_shape, layer_configuration, optimizer=optimizer, **kwargs) diff --git a/mlair/model_modules/residual_networks.py b/mlair/model_modules/residual_networks.py index a9b502c4ef9ba5daa2b624f678b1f951dad3b747..913ed7b89125abffcb3f91a926adc9b2cd1b22a5 100644 --- a/mlair/model_modules/residual_networks.py +++ b/mlair/model_modules/residual_networks.py @@ -1,16 +1,16 @@ __author__ = "Lukas Leufen" -__date__ = "2021-08-23" +__date__ = "2022-08-23" from functools import partial -from mlair.model_modules.branched_input_networks import BranchedInputCNN +from mlair.model_modules.convolutional_networks import CNNfromConfig import tensorflow.keras as keras -class BranchedInputResNet(BranchedInputCNN): +class ResNet(CNNfromConfig): # pragma: no cover """ - A convolutional neural network with multiple input branches and residual blocks (skip connections). + A convolutional neural network with residual blocks (skip connections). ```python input_shape = [(65,1,9)] @@ -29,11 +29,10 @@ class BranchedInputResNet(BranchedInputCNN): {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, {"type": "Dropout", "rate": 0.25}, {"type": "Flatten"}, - {"type": "Concatenate"}, {"type": "Dense", "units": 128, "activation": "relu"} ] - model = BranchedInputResNet(input_shape, output_shape, layer_configuration) + model = ResNet(input_shape, output_shape, layer_configuration) ``` """ diff --git a/mlair/model_modules/u_networks.py b/mlair/model_modules/u_networks.py new file mode 100644 index 0000000000000000000000000000000000000000..2462553d30e84a23ae1f56528bd912d9e25e14b9 --- /dev/null +++ b/mlair/model_modules/u_networks.py @@ -0,0 +1,138 @@ +__author__ = "Lukas Leufen" +__date__ = "2022-08-29" + + +from functools import partial + +from mlair.helpers import select_from_dict, to_list +from mlair.model_modules.convolutional_networks import CNNfromConfig +import tensorflow.keras as keras + + +class UNet(CNNfromConfig): # pragma: no cover + """ + A U-net neural network. + + ```python + input_shape = [(65,1,9)] + output_shape = [(4, )] + + # model + layer_configuration=[ + + # 1st block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "blocksave"}, + + # 2nd block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "blocksave"}, + + # 3rd block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "blocksave"}, + + # 4th block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "blocksave"}, + + # 5th block (final down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 256, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 256, "padding": "same"}, + + # 6th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 128, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + + # 7th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 64, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + + # 8th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 32, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + + # 8th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 16, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + + # Tail + {"type": "Flatten"}, + {"type": "Dense", "units": 128, "activation": "relu"} + ] + + model = UNet(input_shape, output_shape, layer_configuration) + ``` + + + """ + + def __init__(self, input_shape: list, output_shape: list, layer_configuration: list, optimizer="adam", **kwargs): + + super().__init__(input_shape, output_shape, layer_configuration, optimizer=optimizer, **kwargs) + + def _extract_layer_conf(self, layer_opts): + if layer_opts["type"] == "ConcatenateUNet": + layer = getattr(keras.layers, "Concatenate", None) + return layer, None, None + elif layer_opts["type"] == "blocksave": + return "blocksave", None, None + else: + return super()._extract_layer_conf(layer_opts) + + def set_model(self): + x_input = keras.layers.Input(shape=self._input_shape) + x_in = x_input + block_save = [] + + for pos, layer_opts in enumerate(self.conf): + print(layer_opts) + layer, layer_kwargs, follow_up_layer = self._extract_layer_conf(layer_opts) + if layer == "blocksave": + block_save.append(x_in) + continue + layer_name = self._get_layer_name(layer, layer_kwargs, pos) + if "Concatenate" in layer_name: + x_in = layer(name=layer_name)([x_in, block_save.pop(-1)]) + self._layer_save.append({"layer": layer, "follow_up_layer": follow_up_layer}) + continue + x_in = layer(**layer_kwargs, name=layer_name)(x_in) + if follow_up_layer is not None: + for follow_up in to_list(follow_up_layer): + layer_name = self._get_layer_name(follow_up, None, pos) + x_in = follow_up(name=layer_name)(x_in) + self._layer_save.append({"layer": layer, **layer_kwargs, "follow_up_layer": follow_up_layer}) + + x_in = keras.layers.Dense(self._output_shape)(x_in) + out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in) + self.model = keras.Model(inputs=x_input, outputs=[out]) + print(self.model.summary()) diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py index c3fb7abc9c51378552ed91d2fa6b69e08fa351e7..6ceb7b30b8f37b5a0d06efb0f201979040ffdb5d 100644 --- a/mlair/plotting/postprocessing_plotting.py +++ b/mlair/plotting/postprocessing_plotting.py @@ -17,6 +17,7 @@ import seaborn as sns import xarray as xr from matplotlib.backends.backend_pdf import PdfPages from matplotlib.offsetbox import AnchoredText +import matplotlib.dates as mdates from scipy.stats import mannwhitneyu from mlair import helpers @@ -1112,7 +1113,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover self.prepare_data(data) # create all combinations to plot (h/v, utest/notest, single/multi) - variants = list(itertools.product(*[["v", "h"], [True, False], ["single", "multi"]])) + variants = list(itertools.product(*[["v", "h"], [True, False], ["single", "multi", "panel"]])) # plot raw metric (mse) for orientation, utest, agg_type in variants: @@ -1157,7 +1158,9 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover if apply_u_test is True and agg_type == "multi": return # not implemented data_table = self._data_table - if self.ahead_dim not in data_table.index.names and agg_type == "multi": + if self.ahead_dim not in data_table.index.names and agg_type in ["multi", "panel"]: + return # nothing to do + if apply_u_test is True and agg_type == "panel": return # nothing to do n_boots = self._n_boots size = len(np.unique(data_table.columns)) @@ -1177,7 +1180,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover showmeans=True, meanprops={"markersize": 6, "markeredgecolor": "k"}, flierprops={"marker": "o", "markerfacecolor": "black", "markeredgecolor": "none", "markersize": 3}, boxprops={'facecolor': 'none', 'edgecolor': 'k'}, width=width, orient=orientation) - else: + elif agg_type == "multi": xy = {"x": self.model_type_dim, "y": 0} if orientation == "v" else {"x": 0, "y": self.model_type_dim} sns.boxplot(data=data_table.stack(self.model_type_dim).reset_index(), ax=ax, whis=1.5, palette=color_palette, showmeans=True, meanprops={"markersize": 6, "markeredgecolor": "k", "markerfacecolor": "white"}, @@ -1187,35 +1190,71 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover _labels = [str(i) + self.sampling for i in data_table.index.levels[1].values] handles, _ = ax.get_legend_handles_labels() ax.legend(handles, _labels) + else: + xy = (self.model_type_dim, 0) if orientation == "v" else (0, self.model_type_dim) + col_or_row = {"col": self.ahead_dim} if orientation == "v" else {"row": self.ahead_dim} + aspect = figsize[0] / figsize[1] + height = figsize[1] * 0.8 + ax = sns.FacetGrid(data_table.stack(self.model_type_dim).reset_index(), **col_or_row, aspect=aspect, height=height) + ax.map(sns.boxplot, *xy, whis=1.5, color="white", showmeans=True, order=data_table.mean().index.to_list(), + meanprops={"markersize": 6, "markeredgecolor": "k"}, + flierprops={"marker": "o", "markerfacecolor": "black", "markeredgecolor": "none", "markersize": 3}, + boxprops={'facecolor': 'none', 'edgecolor': 'k'}, width=width, orient=orientation) - if orientation == "v": - if apply_u_test: - ax = self.set_significance_bars(asteriks, ax, data_table, orientation) - ylims = list(ax.get_ylim()) - ax.set_ylim([ylims[0], ylims[1]*1.025]) - ax.set_ylabel(f"{self.error_measure} (in {self.error_unit})") - ax.set_xticklabels(ax.get_xticklabels(), rotation=45) - ax.set_xlabel(None) - elif orientation == "h": - if apply_u_test: - ax = self.set_significance_bars(asteriks, ax, data_table, orientation) - ax.set_xlabel(f"{self.error_measure} (in {self.error_unit})") - xlims = list(ax.get_xlim()) - ax.set_xlim([xlims[0], xlims[1] * 1.015]) - ax.set_ylabel(None) + _labels = [str(i) + self.sampling for i in data_table.index.levels[1].values] + for axi, title in zip(ax.axes.flatten(), _labels): + axi.set_title(title) + plt.setp(axi.lines, color='k') + + if agg_type == "panel": + if orientation == "v": + for axi in ax.axes.flatten(): + axi.set_xlabel(None) + axi.set_xticklabels(axi.get_xticklabels(), rotation=45) + ax.set_ylabels(f"{self.error_measure} (in {self.error_unit})") + loc = "upper left" + else: + for axi in ax.axes.flatten(): + axi.set_ylabel(None) + ax.set_xlabels(f"{self.error_measure} (in {self.error_unit})") + loc = "upper right" + text = f"n={n_boots}" + if self.block_length is not None: + text = f"{self.block_length}, {text}" + if len(season) > 0: + text = f"{season}, {text}" + text_box = AnchoredText(text, frameon=True, loc=loc) + plt.setp(text_box.patch, edgecolor='k', facecolor='w') + ax.axes.flatten()[0].add_artist(text_box) else: - raise ValueError(f"orientation must be `v' or `h' but is: {orientation}") - text = f"n={n_boots}" - if self.block_length is not None: - text = f"{self.block_length}, {text}" - if len(season) > 0: - text = f"{season}, {text}" - loc = "lower left" - text_box = AnchoredText(text, frameon=True, loc=loc, pad=0.5, bbox_to_anchor=(0., 1.0), - bbox_transform=ax.transAxes) - plt.setp(text_box.patch, edgecolor='k', facecolor='w') - ax.add_artist(text_box) - plt.setp(ax.lines, color='k') + if orientation == "v": + if apply_u_test: + ax = self.set_significance_bars(asteriks, ax, data_table, orientation) + ylims = list(ax.get_ylim()) + ax.set_ylim([ylims[0], ylims[1]*1.025]) + ax.set_ylabel(f"{self.error_measure} (in {self.error_unit})") + ax.set_xticklabels(ax.get_xticklabels(), rotation=45) + ax.set_xlabel(None) + elif orientation == "h": + if apply_u_test: + ax = self.set_significance_bars(asteriks, ax, data_table, orientation) + ax.set_xlabel(f"{self.error_measure} (in {self.error_unit})") + xlims = list(ax.get_xlim()) + ax.set_xlim([xlims[0], xlims[1] * 1.015]) + ax.set_ylabel(None) + else: + raise ValueError(f"orientation must be `v' or `h' but is: {orientation}") + text = f"n={n_boots}" + if self.block_length is not None: + text = f"{self.block_length}, {text}" + if len(season) > 0: + text = f"{season}, {text}" + loc = "lower left" + text_box = AnchoredText(text, frameon=True, loc=loc, pad=0.5, bbox_to_anchor=(0., 1.0), + bbox_transform=ax.transAxes) + plt.setp(text_box.patch, edgecolor='k', facecolor='w') + ax.add_artist(text_box) + plt.setp(ax.lines, color='k') plt.tight_layout() self._save() plt.close("all") @@ -1255,6 +1294,7 @@ class PlotTimeEvolutionMetric(AbstractPlotClass): vmax = int(data.quantile(0.95)) data = self._prepare_data(data, time_dim, model_type_dim, model_indicator, model_name) + # detailed plot for each model type for t in data[model_type_dim]: # note: could be expanded to create plot per ahead step plot_data = data.sel({model_type_dim: t}).mean(ahead_dim).to_pandas() @@ -1264,6 +1304,24 @@ class PlotTimeEvolutionMetric(AbstractPlotClass): self.plot_name = f"{plot_name}_{t.values}" self._plot(plot_data, years, months, vmin, vmax, str(t.values)) + # aggregated version with all model types + remaining_dim = set(data.dims).difference((model_type_dim, time_dim)) + _data = data.mean(remaining_dim, skipna=True).transpose(model_type_dim, time_dim) + vmin = int(_data.quantile(0.05)) + vmax = int(_data.quantile(0.95)) + plot_data = _data.to_pandas() + years = plot_data.columns.strftime("%Y").to_list() + months = plot_data.columns.strftime("%b").to_list() + plot_data.columns = plot_data.columns.strftime("%b %Y") + self.plot_name = f"{plot_name}_summary" + self._plot(plot_data, years, months, vmin, vmax, None) + + # line plot version + y_dim = "error" + plot_data = data.to_dataset(name=y_dim).to_dataframe().reset_index() + self.plot_name = f"{plot_name}_line_plot" + self._plot_summary_line(plot_data, x_dim=time_dim, y_dim=y_dim, hue_dim=model_type_dim) + @staticmethod def _find_nan_edge(data, time_dim): coll = [] @@ -1306,7 +1364,7 @@ class PlotTimeEvolutionMetric(AbstractPlotClass): @staticmethod def _aspect_cbar(val): - return min(max(1.25 * val + 7.5, 10), 30) + return min(max(1.25 * val + 7.5, 5), 30) def _plot(self, data, years, months, vmin=None, vmax=None, subtitle=None): fig, ax = plt.subplots(figsize=(max(data.shape[1] / 6, 12), max(data.shape[0] / 3.5, 2))) @@ -1320,6 +1378,21 @@ class PlotTimeEvolutionMetric(AbstractPlotClass): plt.tight_layout() self._save() + def _plot_summary_line(self, data, x_dim, y_dim, hue_dim): + data[x_dim] = pd.to_datetime(data[x_dim].dt.strftime('%Y-%m')) #??? + n = len(data[hue_dim].unique()) + ax = sns.lineplot(data=data, x=x_dim, y=y_dim, hue=hue_dim, errorbar=("pi", 50), + palette=sns.color_palette()[:n], style=hue_dim, dashes=False, markers=["X"]*n) + ax.set(xlabel=None, ylabel=self.title) + ax.get_legend().set_title(None) + ax.xaxis.set_major_locator(mdates.YearLocator()) + ax.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y')) + ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=range(1, 13, 3))) + ax.xaxis.set_minor_formatter(mdates.DateFormatter('%b')) + ax.margins(x=0) + plt.tight_layout() + self._save() + @TimeTrackingWrapper class PlotSeasonalMSEStack(AbstractPlotClass): diff --git a/mlair/plotting/tracker_plot.py b/mlair/plotting/tracker_plot.py index 53ec7496e7e04da0f53b1d0ce817793dea732963..5c5887a6e1f9b03ed06ade49a49cd70edc51e820 100644 --- a/mlair/plotting/tracker_plot.py +++ b/mlair/plotting/tracker_plot.py @@ -319,8 +319,12 @@ class TrackPlot: x_max = None if chain.successor is not None: + stage_count = 0 for e in chain.successor: if e.stage == stage: + stage_count += 1 + if stage_count > 50: + continue shift = self.width + self.space_intern_x if chain.stage == e.stage else 0 x_tmp = self.plot_track_chain(e, y_pos, x_pos + shift, prev=(x, y), stage=stage, sparse_conn_mode=sparse_conn_mode) diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index de58e9054aa1619ddb5b8fd1fb481b25bf089f5b..459a1928958a96238af89caa4241911554df416f 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -72,6 +72,7 @@ class PostProcessing(RunEnvironment): self.model: AbstractModelClass = self._load_model() self.model_name = self.data_store.get("model_name", "model").rsplit("/", 1)[1].split(".", 1)[0] self.ols_model = None + self.persi_model = True self.batch_size: int = self.data_store.get_default("batch_size", "model", 64) self.test_data = self.data_store.get("data_collection", "test") batch_path = self.data_store.get("batch_path", scope="test") @@ -106,6 +107,9 @@ class PostProcessing(RunEnvironment): # ols model self.train_ols_model() + # persi model + self.setup_persistence() + # forecasts on test data self.make_prediction(self.test_data) self.make_prediction(self.train_val_data) @@ -178,6 +182,10 @@ class PostProcessing(RunEnvironment): file_name = os.path.join(report_path, f"uncertainty_estimate_raw_results_{season}.nc") self.uncertainty_estimate_seasons[season].to_netcdf(path=file_name) + # store block mse per station + file_name = os.path.join(report_path, f"block_mse_raw_results.nc") + self.block_mse_per_station.to_netcdf(path=file_name) + # store statistics if percentiles is None: percentiles = [.05, .1, .25, .5, .75, .9, .95] @@ -717,6 +725,12 @@ class PostProcessing(RunEnvironment): else: logging.info(f"Skip train ols model as it is not present in competitors.") + def setup_persistence(self): + """Check if persistence is requested from competitors and store this information.""" + self.persi_model = any(x in map(str.lower, self.competitors) for x in ["persi", "persistence"]) + if self.persi_model is False: + logging.info(f"Persistence is not calculated as it is not present in competitors.") + @TimeTrackingWrapper def make_prediction(self, subset): """ @@ -750,8 +764,11 @@ class PostProcessing(RunEnvironment): nn_prediction = self._create_nn_forecast(copy.deepcopy(nn_output), nn_prediction, transformation_func, normalised) # persistence - persistence_prediction = self._create_persistence_forecast(observation_data, persistence_prediction, - transformation_func, normalised) + if self.persi_model is True: + persistence_prediction = self._create_persistence_forecast(observation_data, persistence_prediction, + transformation_func, normalised) + else: + persistence_prediction = None # ols if self.ols_model is not None: diff --git a/mlair/run_modules/run_environment.py b/mlair/run_modules/run_environment.py index 2bc81750bf86d60e0c59bbf0fef68ae9c29138c9..9edb0aa27379cc2477e0240c43e4745ab5db1ce2 100644 --- a/mlair/run_modules/run_environment.py +++ b/mlair/run_modules/run_environment.py @@ -8,6 +8,7 @@ import logging import os import shutil import time +import sys from mlair.helpers.datastore import DataStoreByScope as DataStoreObject from mlair.helpers.datastore import NameNotFoundInDataStore @@ -160,8 +161,12 @@ class RunEnvironment(object): json.dump(tracker, f) def __plot_tracking(self): - plot_folder, plot_name = os.path.split(self.__find_file_pattern("tracking_%03i.pdf")) - TrackPlot(self.tracker_list, sparse_conn_mode=True, plot_folder=plot_folder, plot_name=plot_name) + try: + plot_folder, plot_name = os.path.split(self.__find_file_pattern("tracking_%03i.pdf")) + TrackPlot(self.tracker_list, sparse_conn_mode=True, plot_folder=plot_folder, plot_name=plot_name) + except Exception as e: + logging.info(f"Could not plot tracking plot due to:\n{sys.exc_info()[0]}\n" + f"{sys.exc_info()[1]}\n{sys.exc_info()[2]}") def __find_file_pattern(self, name): counter = 0 diff --git a/requirements.txt b/requirements.txt index f644ae9257c0b5a18492f8a2d0ef27d1246ec0d4..e6578031a037ce89054db061c2582dea4d91fdca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,7 +20,7 @@ pytest-html==3.1.1 pytest-lazy-fixture==0.6.3 requests==2.28.1 scipy==1.7.1 -seaborn==0.11.2 +seaborn==0.12.0 setuptools==47.1.0 --no-binary shapely Shapely==1.8.0 six==1.15.0 diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py index 6646e1a4795756edd1792ef91f535132e8cde61d..9debec9f04583b401698cea2f79f78b523fe7927 100644 --- a/test/test_run_modules/test_pre_processing.py +++ b/test/test_run_modules/test_pre_processing.py @@ -45,14 +45,16 @@ class TestPreProcessing: with PreProcessing(): assert caplog.record_tuples[0] == ('root', 20, 'PreProcessing started') assert caplog.record_tuples[1] == ('root', 20, 'check valid stations started (preprocessing)') - assert caplog.record_tuples[-6] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 3 ' + assert caplog.record_tuples[-7] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 3 ' r'station\(s\). Found 3/3 valid stations.')) - assert caplog.record_tuples[-5] == ('root', 20, "use serial create_info_df (train)") - assert caplog.record_tuples[-4] == ('root', 20, "use serial create_info_df (val)") - assert caplog.record_tuples[-3] == ('root', 20, "use serial create_info_df (test)") - assert caplog.record_tuples[-2] == ('root', 20, "Searching for competitors to be prepared for use.") - assert caplog.record_tuples[-1] == ('root', 20, "No preparation required for competitor ols as no specific " + assert caplog.record_tuples[-6] == ('root', 20, "use serial create_info_df (train)") + assert caplog.record_tuples[-5] == ('root', 20, "use serial create_info_df (val)") + assert caplog.record_tuples[-4] == ('root', 20, "use serial create_info_df (test)") + assert caplog.record_tuples[-3] == ('root', 20, "Searching for competitors to be prepared for use.") + assert caplog.record_tuples[-2] == ('root', 20, "No preparation required for competitor ols as no specific " "instruction is provided.") + assert caplog.record_tuples[-1] == ('root', 20, "No preparation required for competitor persi as no " + "specific instruction is provided.") RunEnvironment().__del__() def test_run(self, obj_with_exp_setup):