diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f97ad5b3258a86811966d2cf58e0fe905c4b12fb..37bc17a9bce9cadfda732120f87df3d39a50d921 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -46,10 +46,10 @@ tests (from scratch): - pip install --upgrade pip - zypper --no-gpg-checks addrepo https://download.opensuse.org/repositories/Application:Geo/15.4/Application:Geo.repo - zypper --no-gpg-checks refresh - - zypper --no-gpg-checks --non-interactive install proj=8.2.1 - - zypper --no-gpg-checks --non-interactive install geos=3.11.0 + - zypper --no-gpg-checks --non-interactive install proj=9.1.0 + - zypper --no-gpg-checks --non-interactive install geos=3.11.1 - zypper --no-gpg-checks --non-interactive install geos-devel=3.9.1 - - zypper --no-gpg-checks --non-interactive install libproj22=8.2.1 + # - zypper --no-gpg-checks --non-interactive install libproj22=8.2.1 - zypper --no-gpg-checks --non-interactive install binutils libproj-devel gdal-devel - pip install -r requirements.txt - chmod +x ./CI/run_pytest.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 0418eed5422d00f288879f5e9d8128558118c401..d489da6b31362241a71779435bb6b57e535261bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,30 @@ # Changelog All notable changes to this project will be documented in this file. +## v2.3.0 - 2022-11-25 - new models and plots + +### general: +* new model classes for ResNet and U-Net +* new plots and variations of existing plots + +### new features: +* new model classes: ResNet (#419), U-Net (#423) +* seasonal mse stack plot (#422) +* new aggregated and line versions of Time Evolution Plot (#424, #427) +* box-and-whisker plots are created for all error metrics (#431) +* new split and frequency distribution versions of box-and-whisker plots for error metrics (#425, #434) +* new evaluation metric: mean error / bias (#430) +* conditional quantiles are now available for all competitors too (#435) +* new map plot showing mse at locations (#432) + +### technical: +* speed up in model setup (#421) +* bugfix for boundary trim in FIR filter (#418) +* persistence is now calculated only on demand (#426) +* block mse are stored locally in a file (#428) +* fix issue with boolean variables not recognized by argparse (#417) +* renaming of ahead labels (#436) + ## v2.2.0 - 2022-08-16 - new data sources and python3.9 ### general: diff --git a/CI/Dockerfile b/CI/Dockerfile index f3b99b2f8b78129d3fff4d49d88be54613bf5929..6146387f179f53142e0710dfd56a1e8f466cd982 100644 --- a/CI/Dockerfile +++ b/CI/Dockerfile @@ -53,10 +53,10 @@ FROM base AS mlair # install geo packages RUN zypper --no-gpg-checks addrepo https://download.opensuse.org/repositories/Application:Geo/15.4/Application:Geo.repo RUN zypper --no-gpg-checks refresh -RUN zypper --no-gpg-checks --non-interactive install proj=8.2.1 -RUN zypper --no-gpg-checks --non-interactive install geos=3.10.3 +RUN zypper --no-gpg-checks --non-interactive install proj=9.1.0 +RUN zypper --no-gpg-checks --non-interactive install geos=3.11.1 RUN zypper --no-gpg-checks --non-interactive install geos-devel=3.9.1 -RUN zypper --no-gpg-checks --non-interactive install libproj22=8.2.1 +# RUN zypper --no-gpg-checks --non-interactive install libproj22=8.2.1 RUN zypper --no-gpg-checks --non-interactive install binutils libproj-devel gdal-devel # install requirements diff --git a/HPC_setup/requirements_HDFML_additionals.txt b/HPC_setup/requirements_HDFML_additionals.txt index 1a9e8524906115e02338dcf80137081ab7165697..9f19c839e7d364647c609a223eb047a7f7fab6a8 100644 --- a/HPC_setup/requirements_HDFML_additionals.txt +++ b/HPC_setup/requirements_HDFML_additionals.txt @@ -11,7 +11,7 @@ packaging==21.3 timezonefinder==5.2.0 patsy==0.5.2 statsmodels==0.13.2 -seaborn==0.11.2 +seaborn==0.12.0 xarray==0.16.2 tabulate==0.8.10 wget==3.2 diff --git a/HPC_setup/requirements_JUWELS_additionals.txt b/HPC_setup/requirements_JUWELS_additionals.txt index 1a9e8524906115e02338dcf80137081ab7165697..9f19c839e7d364647c609a223eb047a7f7fab6a8 100644 --- a/HPC_setup/requirements_JUWELS_additionals.txt +++ b/HPC_setup/requirements_JUWELS_additionals.txt @@ -11,7 +11,7 @@ packaging==21.3 timezonefinder==5.2.0 patsy==0.5.2 statsmodels==0.13.2 -seaborn==0.11.2 +seaborn==0.12.0 xarray==0.16.2 tabulate==0.8.10 wget==3.2 diff --git a/mlair/__init__.py b/mlair/__init__.py index 20388a18ac8ebdf37c4e17aa462839bb5b6b8e11..e55918caf0a84391ba912d31fc18c2e166c1317b 100644 --- a/mlair/__init__.py +++ b/mlair/__init__.py @@ -1,6 +1,6 @@ __version_info__ = { 'major': 2, - 'minor': 2, + 'minor': 3, 'micro': 0, } diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py index 9bb15068ce3a5ad934f7b0251b84cb19f37702f6..d0b3592905c32a4c7af875014532a5539805dd3a 100644 --- a/mlair/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -48,7 +48,7 @@ DEFAULT_TEST_END = "2017-12-31" DEFAULT_TEST_MIN_LENGTH = 90 DEFAULT_TRAIN_VAL_MIN_LENGTH = 180 DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS = True -DEFAULT_COMPETITORS = ["ols"] +DEFAULT_COMPETITORS = ["ols", "persi"] DEFAULT_DO_UNCERTAINTY_ESTIMATE = True DEFAULT_UNCERTAINTY_ESTIMATE_BLOCK_LENGTH = "1m" DEFAULT_UNCERTAINTY_ESTIMATE_EVALUATE_COMPETITORS = True diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py index 5fc3df951ed5dec9e94ed7d34d8dc02bafddf262..097eaf97e6ef8023abdc59e2b9e77c4d56aa25df 100644 --- a/mlair/helpers/filter.py +++ b/mlair/helpers/filter.py @@ -118,7 +118,11 @@ class FIRFilter: filt = xr.apply_ufunc(fir_filter_convolve, d, input_core_dims=[[time_dim]], output_core_dims=[[time_dim]], vectorize=True, kwargs={"h": h}, output_dtypes=[d.dtype]) - coll.append(filt) + # trim data to valid range + ext_len = int((len(h) + 1) / 2) + valid_range = filt.coords[time_dim].values[ext_len:-ext_len] + trimmed = filt.sel(**{time_dim: valid_range}) + coll.append(trimmed) filtered = xr.concat(coll, var_dim) # create result array with same shape like input data, gaps are filled by nans diff --git a/mlair/helpers/helpers.py b/mlair/helpers/helpers.py index ca69f28557c6386f021b137e5861660f40b867d9..0b97f826ee34a35dc62313ed9350919a94931e62 100644 --- a/mlair/helpers/helpers.py +++ b/mlair/helpers/helpers.py @@ -4,6 +4,7 @@ __date__ = '2019-10-21' import inspect import math +import argparse import numpy as np import xarray as xr @@ -122,7 +123,7 @@ def float_round(number: float, decimals: int = 0, round_type: Callable = math.ce return round_type(number * multiplier) / multiplier -def relative_round(x: float, sig: int) -> float: +def relative_round(x: float, sig: int, ceil=False, floor=False) -> float: """ Round small numbers according to given "significance". @@ -134,7 +135,26 @@ def relative_round(x: float, sig: int) -> float: :return: rounded number """ assert sig >= 1 - return round(x, sig-int(np.floor(np.log10(abs(x))))-1) + assert not (ceil and floor) + if x == 0: + return 0 + else: + rounded_number = round(x, sig-get_order(x)-1) + if floor is True and rounded_number > round(x, sig-get_order(x)): + res = rounded_number - 10 ** (get_order(x) - sig + 1) + elif ceil is True and rounded_number < round(x, sig-get_order(x)): + res = rounded_number + 10 ** (get_order(x) - sig + 1) + else: + res = rounded_number + return round(res, sig-get_order(res)-1) + + +def get_order(x: float): + """Get order of number (as power of 10)""" + if x == 0: + return -np.inf + else: + return int(np.floor(np.log10(abs(x)))) def remove_items(obj: Union[List, Dict, Tuple], items: Any): @@ -266,6 +286,28 @@ def filter_dict_by_value(dictionary: dict, filter_val: Any, filter_cond: bool) - return dict(filter(lambda x: (x[1] == filter_val) is filter_cond, dictionary.items())) +def str2bool(v): + if isinstance(v, bool): + return v + elif isinstance(v, str): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + + +def squeeze_coords(d): + """Look for unused coords and remove them. Does only work for xarray DataArrays.""" + try: + return d.drop(set(d.coords.keys()).difference(d.dims)) + except Exception: + return d + + # def convert_size(size_bytes): # if size_bytes == 0: # return "0B" diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py index 5f3aa45161530ff7d425ccbc7625dd7e081d8839..9415e0fcb0557f792d1cc0679868caff97e316d3 100644 --- a/mlair/helpers/statistics.py +++ b/mlair/helpers/statistics.py @@ -12,6 +12,7 @@ from typing import Union, Tuple, Dict, List import itertools from collections import OrderedDict from mlair.helpers import to_list +from mlair.helpers.helpers import squeeze_coords Data = Union[xr.DataArray, pd.DataFrame] @@ -213,6 +214,11 @@ def mean_absolute_error(a, b, dim=None): return np.abs(a - b).mean(dim) +def mean_error(a, b, dim=None): + """Calculate mean error where a is forecast and b the reference (e.g. observation).""" + return a.mean(dim) - b.mean(dim) + + def index_of_agreement(a, b, dim=None): """Calculate index of agreement (IOA) where a is the forecast and b the reference (e.g. observation).""" num = (np.square(b - a)).sum(dim) @@ -234,7 +240,7 @@ def modified_normalized_mean_bias(a, b, dim=None): def calculate_error_metrics(a, b, dim): - """Calculate MSE, RMSE, MAE, IOA, and MNMB. Additionally, return number of used values for calculation. + """Calculate MSE, ME, RMSE, MAE, IOA, and MNMB. Additionally, return number of used values for calculation. :param a: forecast data to calculate metrics for :param b: reference (e.g. observation) @@ -243,12 +249,25 @@ def calculate_error_metrics(a, b, dim): :returns: dict with results for all metrics indicated by lowercase metric short name """ mse = mean_squared_error(a, b, dim) + me = mean_error(a, b, dim) rmse = np.sqrt(mse) mae = mean_absolute_error(a, b, dim) ioa = index_of_agreement(a, b, dim) mnmb = modified_normalized_mean_bias(a, b, dim) n = (a - b).notnull().sum(dim) - return {"mse": mse, "rmse": rmse, "mae": mae, "ioa": ioa, "mnmb": mnmb, "n": n} + results = {"mse": mse, "me": me, "rmse": rmse, "mae": mae, "ioa": ioa, "mnmb": mnmb, "n": n} + return {k: squeeze_coords(v) for k, v in results.items()} + + +def get_error_metrics_units(base_unit): + return {"mse": f"{base_unit}$^2$", "me": base_unit, "rmse": base_unit, "mae": base_unit, "ioa": None, "mnmb": None, + "n": None} + + +def get_error_metrics_long_name(): + return {"mse": "mean squared error", "me": "mean error", "rmse": "root mean squared error", + "mae": "mean absolute error", "ioa": "index of agreement", "mnmb": "modified normalized mean bias", + "n": "count"} def mann_whitney_u_test(data: pd.DataFrame, reference_col_name: str, **kwargs): diff --git a/mlair/model_modules/branched_input_networks.py b/mlair/model_modules/branched_input_networks.py index af3a8bffa3169556d55af94192915e3a27f89cc1..acb83967b783a806e4221738c1e7144f6850c593 100644 --- a/mlair/model_modules/branched_input_networks.py +++ b/mlair/model_modules/branched_input_networks.py @@ -1,3 +1,4 @@ +import logging from functools import partial, reduce import copy from typing import Union @@ -9,6 +10,8 @@ from mlair.helpers import select_from_dict, to_list from mlair.model_modules.loss import var_loss from mlair.model_modules.recurrent_networks import RNN from mlair.model_modules.convolutional_networks import CNNfromConfig +from mlair.model_modules.residual_networks import ResNet +from mlair.model_modules.u_networks import UNet class BranchedInputCNN(CNNfromConfig): # pragma: no cover @@ -367,3 +370,189 @@ class BranchedInputFCN(AbstractModelClass): # pragma: no cover "metrics": ["mse", "mae", var_loss]} # self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss], loss_weights=[2, 1])], # "metrics": ["mse", "mae", var_loss]} + + +class BranchedInputUNet(UNet, BranchedInputCNN): # pragma: no cover + """ + A U-net neural network with multiple input branches. + + ```python + + input_shape = [(72,1,9),(72,1,9),] + output_shape = [(4, )] + + # model + layer_configuration=[ + + # 1st block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "blocksave"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + + # 2nd block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "blocksave"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + + # 3rd block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "blocksave"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + + # 4th block (final down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + + # 5th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 64, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + + # 6th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 32, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + + # 7th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 16, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + + # Tail + {"type": "Concatenate"}, + {"type": "Flatten"}, + {"type": "Dense", "units": 128, "activation": "relu"} + ] + + model = BranchedInputUNet(input_shape, output_shape, layer_configuration) + ``` + + """ + + def __init__(self, input_shape, output_shape, layer_configuration: list, optimizer="adam", **kwargs): + + super(BranchedInputUNet, self).__init__(input_shape, output_shape, layer_configuration, optimizer=optimizer, **kwargs) + + def set_model(self): + + x_input = [] + x_in = [] + stop_pos = None + block_save = [] + + for branch in range(len(self._input_shape)): + print(branch) + block_save = [] + shape_b = self._input_shape[branch] + x_input_b = keras.layers.Input(shape=shape_b, name=f"input_branch{branch + 1}") + x_input.append(x_input_b) + x_in_b = x_input_b + b_conf = copy.deepcopy(self.conf) + + for pos, layer_opts in enumerate(b_conf): + print(layer_opts) + if layer_opts.get("type") == "Concatenate": + if stop_pos is None: + stop_pos = pos + else: + assert pos == stop_pos + break + layer, layer_kwargs, follow_up_layer = self._extract_layer_conf(layer_opts) + if layer == "blocksave": + block_save.append(x_in_b) + continue + layer_name = self._get_layer_name(layer, layer_kwargs, pos, branch) + if "Concatenate" in layer_name: + x_in_b = layer(name=layer_name)([x_in_b, block_save.pop(-1)]) + self._layer_save.append({"layer": layer, "follow_up_layer": follow_up_layer}) + continue + x_in_b = layer(**layer_kwargs, name=layer_name)(x_in_b) + if follow_up_layer is not None: + for follow_up in to_list(follow_up_layer): + layer_name = self._get_layer_name(follow_up, None, pos, branch) + x_in_b = follow_up(name=layer_name)(x_in_b) + self._layer_save.append({"layer": layer, **layer_kwargs, "follow_up_layer": follow_up_layer, + "branch": branch}) + x_in.append(x_in_b) + + print("concat") + x_concat = keras.layers.Concatenate()(x_in) + if len(block_save) > 0: + logging.warning(f"Branches of BranchedInputUNet are concatenated before last upsampling block is applied.") + block_save = [] + + if stop_pos is not None: + for pos, layer_opts in enumerate(self.conf[stop_pos + 1:]): + print(layer_opts) + layer, layer_kwargs, follow_up_layer = self._extract_layer_conf(layer_opts) + if layer == "blocksave": + block_save.append(x_concat) + continue + layer_name = self._get_layer_name(layer, layer_kwargs, pos, None) + if "Concatenate" in layer_name: + x_concat = layer(name=layer_name)([x_concat, block_save.pop(-1)]) + self._layer_save.append({"layer": layer, "follow_up_layer": follow_up_layer}) + continue + x_concat = layer(**layer_kwargs, name=layer_name)(x_concat) + if follow_up_layer is not None: + for follow_up in to_list(follow_up_layer): + layer_name = self._get_layer_name(follow_up, None, pos + stop_pos, None) + x_concat = follow_up(name=layer_name)(x_concat) + self._layer_save.append({"layer": layer, **layer_kwargs, "follow_up_layer": follow_up_layer, + "branch": "concat"}) + + x_concat = keras.layers.Dense(self._output_shape)(x_concat) + out = self.activation_output(name=f"{self.activation_output_name}_output")(x_concat) + self.model = keras.Model(inputs=x_input, outputs=[out]) + print(self.model.summary()) + + +class BranchedInputResNet(ResNet, BranchedInputCNN): # pragma: no cover + """ + A convolutional neural network with multiple input branches and residual blocks (skip connections). + + ```python + input_shape = [(65,1,9), (65,1,9)] + output_shape = [(4, )] + + # model + layer_configuration=[ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, + {"type": "Dense", "units": 128, "activation": "relu"} + ] + + model = BranchedInputResNet(input_shape, output_shape, layer_configuration) + ``` + + """ + + def __init__(self, input_shape: list, output_shape: list, layer_configuration: list, optimizer="adam", **kwargs): + + super().__init__(input_shape, output_shape, layer_configuration, optimizer=optimizer, **kwargs) diff --git a/mlair/model_modules/residual_networks.py b/mlair/model_modules/residual_networks.py new file mode 100644 index 0000000000000000000000000000000000000000..913ed7b89125abffcb3f91a926adc9b2cd1b22a5 --- /dev/null +++ b/mlair/model_modules/residual_networks.py @@ -0,0 +1,95 @@ +__author__ = "Lukas Leufen" +__date__ = "2022-08-23" + +from functools import partial + +from mlair.model_modules.convolutional_networks import CNNfromConfig + +import tensorflow.keras as keras + + +class ResNet(CNNfromConfig): # pragma: no cover + """ + A convolutional neural network with residual blocks (skip connections). + + ```python + input_shape = [(65,1,9)] + output_shape = [(4, )] + + # model + layer_configuration=[ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Dense", "units": 128, "activation": "relu"} + ] + + model = ResNet(input_shape, output_shape, layer_configuration) + ``` + + """ + + def __init__(self, input_shape: list, output_shape: list, layer_configuration: list, optimizer="adam", **kwargs): + + super().__init__(input_shape, output_shape, layer_configuration, optimizer=optimizer, **kwargs) + + @staticmethod + def residual_block(**layer_kwargs): + layer_name = layer_kwargs.pop("name").split("_") + layer_name = "_".join([*layer_name[0:2], "%s", *layer_name[2:]]) + act = layer_kwargs.pop("activation") + if isinstance(act, partial): + act_name = act.args[0] if act.func.__name__ == "Activation" else act.func.__name__ + else: + act_name = act.__name__ + use_1x1conv = layer_kwargs.pop("use_1x1conv", False) + + def block(x): + layer_kwargs.update({"strides": 2 if use_1x1conv else 1}) + y = keras.layers.Conv2D(**layer_kwargs, padding='same', name=layer_name % "Conv1")(x) + y = act(name=layer_name % f"{act_name}1")(y) + layer_kwargs.update({"strides": 1}) + y = keras.layers.Conv2D(**layer_kwargs, padding='same', name=layer_name % "Conv2")(y) + y = keras.layers.BatchNormalization(name=layer_name % "BN2")(y) + if use_1x1conv is True: + layer_kwargs.update({"strides": 2}) + layer_kwargs.update({"kernel_size": 1}) + x = keras.layers.Conv2D(**layer_kwargs, padding='same', name=layer_name % "Conv1x1")(x) + out = keras.layers.Add(name=layer_name % "Add")([x, y]) + out = act(name=layer_name % f"{act_name}2")(out) + return out + return block + + def _extract_layer_conf(self, layer_opts): + follow_up_layer = None + layer_type = layer_opts.pop("type") + activation_type = layer_opts.pop("activation", None) + if activation_type is not None: + activation = self._activation.get(activation_type) + kernel_initializer = self._initializer.get(activation_type, "glorot_uniform") + layer_opts["kernel_initializer"] = kernel_initializer + follow_up_layer = activation + if self.bn is True and layer_type.lower() != "residual_block": + another_layer = keras.layers.BatchNormalization + if activation_type in ["relu", "linear", "prelu", "leakyrelu"]: + follow_up_layer = (another_layer, follow_up_layer) + else: + follow_up_layer = (follow_up_layer, another_layer) + regularizer_type = layer_opts.pop("kernel_regularizer", None) + if regularizer_type is not None: + layer_opts["kernel_regularizer"] = self._set_regularizer(regularizer_type, **self.kwargs) + if layer_type.lower() == "residual_block": + layer = self.residual_block + layer_opts["activation"] = follow_up_layer + follow_up_layer = None + else: + layer = getattr(keras.layers, layer_type, None) + return layer, layer_opts, follow_up_layer diff --git a/mlair/model_modules/u_networks.py b/mlair/model_modules/u_networks.py new file mode 100644 index 0000000000000000000000000000000000000000..2462553d30e84a23ae1f56528bd912d9e25e14b9 --- /dev/null +++ b/mlair/model_modules/u_networks.py @@ -0,0 +1,138 @@ +__author__ = "Lukas Leufen" +__date__ = "2022-08-29" + + +from functools import partial + +from mlair.helpers import select_from_dict, to_list +from mlair.model_modules.convolutional_networks import CNNfromConfig +import tensorflow.keras as keras + + +class UNet(CNNfromConfig): # pragma: no cover + """ + A U-net neural network. + + ```python + input_shape = [(65,1,9)] + output_shape = [(4, )] + + # model + layer_configuration=[ + + # 1st block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "blocksave"}, + + # 2nd block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "blocksave"}, + + # 3rd block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "blocksave"}, + + # 4th block (down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "blocksave"}, + + # 5th block (final down) + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 256, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 256, "padding": "same"}, + + # 6th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 128, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "padding": "same"}, + + # 7th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 64, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "padding": "same"}, + + # 8th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 32, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + + # 8th block (up) + {"type": "Conv2DTranspose", "activation": "relu", "kernel_size": (2, 1), "filters": 16, "strides": (2, 1), + "padding": "same"}, + {"type": "ConcatenateUNet"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "padding": "same"}, + + # Tail + {"type": "Flatten"}, + {"type": "Dense", "units": 128, "activation": "relu"} + ] + + model = UNet(input_shape, output_shape, layer_configuration) + ``` + + + """ + + def __init__(self, input_shape: list, output_shape: list, layer_configuration: list, optimizer="adam", **kwargs): + + super().__init__(input_shape, output_shape, layer_configuration, optimizer=optimizer, **kwargs) + + def _extract_layer_conf(self, layer_opts): + if layer_opts["type"] == "ConcatenateUNet": + layer = getattr(keras.layers, "Concatenate", None) + return layer, None, None + elif layer_opts["type"] == "blocksave": + return "blocksave", None, None + else: + return super()._extract_layer_conf(layer_opts) + + def set_model(self): + x_input = keras.layers.Input(shape=self._input_shape) + x_in = x_input + block_save = [] + + for pos, layer_opts in enumerate(self.conf): + print(layer_opts) + layer, layer_kwargs, follow_up_layer = self._extract_layer_conf(layer_opts) + if layer == "blocksave": + block_save.append(x_in) + continue + layer_name = self._get_layer_name(layer, layer_kwargs, pos) + if "Concatenate" in layer_name: + x_in = layer(name=layer_name)([x_in, block_save.pop(-1)]) + self._layer_save.append({"layer": layer, "follow_up_layer": follow_up_layer}) + continue + x_in = layer(**layer_kwargs, name=layer_name)(x_in) + if follow_up_layer is not None: + for follow_up in to_list(follow_up_layer): + layer_name = self._get_layer_name(follow_up, None, pos) + x_in = follow_up(name=layer_name)(x_in) + self._layer_save.append({"layer": layer, **layer_kwargs, "follow_up_layer": follow_up_layer}) + + x_in = keras.layers.Dense(self._output_shape)(x_in) + out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in) + self.model = keras.Model(inputs=x_input, outputs=[out]) + print(self.model.summary()) diff --git a/mlair/plotting/abstract_plot_class.py b/mlair/plotting/abstract_plot_class.py index a26023bb6cb8772623479491ac8bcc731dd42223..377dc6b7abbbb693c9d18175983101e622063a70 100644 --- a/mlair/plotting/abstract_plot_class.py +++ b/mlair/plotting/abstract_plot_class.py @@ -92,11 +92,10 @@ class AbstractPlotClass: # pragma: no cover plt.rcParams.update(self.rc_params) @staticmethod - def _get_sampling(sampling): - if sampling == "daily": - return "D" - elif sampling == "hourly": - return "h" + def _get_sampling(sampling, pos=1): + sampling = (sampling, sampling) if isinstance(sampling, str) else sampling + sampling_letter = {"hourly": "H", "daily": "d"}.get(sampling[pos], "") + return sampling, sampling_letter @staticmethod def get_dataset_colors(): diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py index db2b3340e06545f988c81503df2aa27b655095bb..d33f3abb2e2cd04399a2d98f34bf2ed06acfcb6b 100644 --- a/mlair/plotting/data_insight_plotting.py +++ b/mlair/plotting/data_insight_plotting.py @@ -188,7 +188,7 @@ class PlotAvailability(AbstractPlotClass): # pragma: no cover super().__init__(plot_folder, "data_availability") self.time_dim = time_dimension self.window_dim = window_dimension - self.sampling = self._get_sampling(sampling) + self.sampling = self._get_sampling(sampling)[1] self.linewidth = None if self.sampling == 'h': self.linewidth = 0.001 @@ -321,7 +321,7 @@ class PlotAvailabilityHistogram(AbstractPlotClass): # pragma: no cover def _set_dims_from_datahandler(self, data_handler): self.temporal_dim = data_handler.id_class.time_dim self.target_dim = data_handler.id_class.target_dim - self.freq = self._get_sampling(data_handler.id_class.sampling) + self.freq = self._get_sampling(data_handler.id_class.sampling)[1] @property def allowed_plot_types(self): diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py index c7647ef5bf5b5b6c46eae9318c0fd99b294292c6..66679a22bcf0d6a2da4702bf6d41e0fb61b6f62a 100644 --- a/mlair/plotting/postprocessing_plotting.py +++ b/mlair/plotting/postprocessing_plotting.py @@ -11,17 +11,21 @@ import itertools import matplotlib import matplotlib.pyplot as plt +import matplotlib.colors as colors import numpy as np import pandas as pd import seaborn as sns import xarray as xr from matplotlib.backends.backend_pdf import PdfPages from matplotlib.offsetbox import AnchoredText +import matplotlib.dates as mdates from scipy.stats import mannwhitneyu +import datetime as dt from mlair import helpers from mlair.data_handler.iterator import DataCollection from mlair.helpers import TimeTrackingWrapper +from mlair.helpers.helpers import relative_round from mlair.plotting.abstract_plot_class import AbstractPlotClass from mlair.helpers.statistics import mann_whitney_u_test, represent_p_values_as_asteriks @@ -131,8 +135,7 @@ class PlotMonthlySummary(AbstractPlotClass): # pragma: no cover """ data = self._data.to_dataset(name='values').to_dask_dataframe() logging.debug("... start plotting") - color_palette = [matplotlib.colors.cnames["green"]] + sns.color_palette("Blues_d", - self._window_lead_time).as_hex() + color_palette = [colors.cnames["green"]] + sns.color_palette("Blues_d", self._window_lead_time).as_hex() ax = sns.boxplot(x='index', y='values', hue='ahead', data=data.compute(), whis=1.5, palette=color_palette, flierprops={'marker': '.', 'markersize': 1}, showmeans=True, meanprops={'markersize': 1, 'markeredgecolor': 'k'}) @@ -175,16 +178,26 @@ class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover warnings.filterwarnings("ignore", message="Attempted to set non-positive bottom ylim on a log-scaled axis.") def __init__(self, stations: List, data_pred_path: str, plot_folder: str = ".", plot_per_seasons=True, - rolling_window: int = 3, forecast_indicator: str = "nn", obs_indicator: str = "obs", **kwargs): + rolling_window: int = 3, forecast_indicator: str = "nn", obs_indicator: str = "obs", + competitors=None, model_type_dim: str = "type", index_dim: str = "index", ahead_dim: str = "ahead", + competitor_path: str = None, sampling: str = "daily", model_name: str = "nn", **kwargs): """Initialise.""" super().__init__(plot_folder, "conditional_quantiles") self._data_pred_path = data_pred_path self._stations = stations self._rolling_window = rolling_window self._forecast_indicator = forecast_indicator + self.model_type_dim = model_type_dim + self.index_dim = index_dim + self.ahead_dim = ahead_dim + self.iter_dim = "station" + self.model_name = model_name self._obs_name = obs_indicator + self._sampling = sampling self._opts = self._get_opts(kwargs) self._seasons = ['DJF', 'MAM', 'JJA', 'SON'] if plot_per_seasons is True else "" + self.competitors = self._correct_persi_name(competitors or []) + self.competitor_path = competitor_path or data_pred_path self._data = self._load_data() self._bins = self._get_bins_from_rage_of_data() self._plot() @@ -209,11 +222,95 @@ class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover for station in self._stations: file = os.path.join(self._data_pred_path, f"forecasts_{station}_test.nc") data_tmp = xr.open_dataarray(file) - data_collector.append(data_tmp.loc[:, :, [self._forecast_indicator, - self._obs_name]].assign_coords(station=station)) - res = xr.concat(data_collector, dim='station').transpose('index', 'type', 'ahead', 'station') + start = data_tmp.coords[self.index_dim].min().values + end = data_tmp.coords[self.index_dim].max().values + competitor = self.load_competitors(station, start, end) + combined = self._combine_forecasts(data_tmp, competitor, dim=self.model_type_dim) + sel = combined.sel({self.model_type_dim: [self._forecast_indicator, self._obs_name, *self.competitors]}) + data_collector.append(sel.assign_coords({self.iter_dim: station})) + res = xr.concat(data_collector, dim=self.iter_dim).transpose(self.index_dim, self.model_type_dim, + self.ahead_dim, self.iter_dim) + return res + + def _combine_forecasts(self, forecast, competitor, dim=None): + """ + Combine forecast and competitor if both are xarray. If competitor is None, this returns forecasts and vise + versa. + """ + if dim is None: + dim = self.model_type_dim + try: + return xr.concat([forecast, competitor], dim=dim) + except (TypeError, AttributeError): + return forecast if competitor is None else competitor + + def load_competitors(self, station_name: str, start, end) -> xr.DataArray: + """ + Load all requested and available competitors for a given station. Forecasts must be available in the competitor + path like `<competitor_path>/<target_var>/forecasts_<station_name>_test.nc`. The naming style is equal for all + forecasts of MLAir, so that forecasts of a different experiment can easily be copied into the competitor path + without any change. + + :param station_name: station indicator to load competitors for + + :return: a single xarray with all competing forecasts + """ + competing_predictions = [] + for competitor_name in self.competitors: + try: + prediction = self._create_competitor_forecast(station_name, competitor_name, start, end) + competing_predictions.append(prediction) + except (FileNotFoundError, KeyError): + logging.debug(f"No competitor found for combination '{station_name}' and '{competitor_name}'.") + continue + return xr.concat(competing_predictions, self.model_type_dim) if len(competing_predictions) > 0 else None + + @staticmethod + def create_full_time_dim(data, dim, sampling, start, end): + """Ensure time dimension to be equidistant. Sometimes dates if missing values have been dropped.""" + start_data = data.coords[dim].values[0] + freq = {"daily": "1D", "hourly": "1H"}.get(sampling) + _ind = pd.date_range(start, end, freq=freq) # two steps required to include all hours of end interval + datetime_index = pd.DataFrame(index=pd.date_range(_ind.min(), _ind.max() + dt.timedelta(days=1), + closed="left", freq=freq)) + t = data.sel({dim: start_data}, drop=True) + res = xr.DataArray(coords=[datetime_index.index, *[t.coords[c] for c in t.coords]], dims=[dim, *t.coords]) + res = res.transpose(*data.dims) + if data.shape == res.shape: + res.loc[data.coords] = data + else: + _d = data.sel({dim: slice(start, end)}) + res.loc[_d.coords] = _d return res + def _create_competitor_forecast(self, station_name: str, competitor_name: str, start, end) -> xr.DataArray: + """ + Load and format the competing forecast of a distinct model indicated by `competitor_name` for a distinct station + indicated by `station_name`. The name of the competitor is set in the `type` axis as indicator. This method will + raise either a `FileNotFoundError` or `KeyError` if no competitor could be found for the given station. Either + there is no file provided in the expected path or no forecast for given `competitor_name` in the forecast file. + Forecast is trimmed on interval start and end of test subset. + + :param station_name: name of the station to load data for + :param competitor_name: name of the model + :return: the forecast of the given competitor + """ + path = os.path.join(self.competitor_path, competitor_name) + file = os.path.join(path, f"forecasts_{station_name}_test.nc") + with xr.open_dataarray(file) as da: + data = da.load() + if self._forecast_indicator in data.coords[self.model_type_dim]: + forecast = data.sel({self.model_type_dim: [self._forecast_indicator]}) + forecast.coords[self.model_type_dim] = [competitor_name] + else: + forecast = data.sel({self.model_type_dim: [competitor_name]}) + # limit forecast to time range of test subset + return self.create_full_time_dim(forecast, self.index_dim, self._sampling, start, end) + + @staticmethod + def _correct_persi_name(competitors): + return ["persi" if x == "Persistence" else x for x in competitors] + def _segment_data(self, data: xr.DataArray, x_model: str) -> xr.DataArray: """ Segment data into bins. @@ -225,12 +322,13 @@ class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover """ logging.debug("... segment data") # combine index and station to multi index - data = data.stack(z=['index', 'station']) + data = data.stack(z=[self.index_dim, self.iter_dim]) # replace multi index by simple position index (order is not relevant anymore) data.coords['z'] = range(len(data.coords['z'])) # segment data of x_model into bins - data.loc[x_model, ...] = data.loc[x_model, ...].to_pandas().T.apply(pd.cut, bins=self._bins, - labels=self._bins[1:]).T.values + data_sel = data.sel({self.model_type_dim: x_model}) + data.loc[{self.model_type_dim: x_model}] = data_sel.to_pandas().T.apply(pd.cut, bins=self._bins, + labels=self._bins[1:]).T.values return data @staticmethod @@ -243,7 +341,7 @@ class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover :return: tuple with y and x labels """ - names = (f"forecast concentration (in {data_unit})", f"observed concentration (in {data_unit})") + names = (f"forecasted concentration (in {data_unit})", f"observed concentration (in {data_unit})") if plot_type == "obs": return names else: @@ -271,9 +369,9 @@ class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover # create empty xarray with dims: time steps ahead, quantiles, bin index (numbers create in previous step) quantile_panel = xr.DataArray( np.full([data.ahead.shape[0], len(self._opts["q"]), self._bins[1:].shape[0]], np.nan), - coords=[data.ahead, self._opts["q"], self._bins[1:]], dims=['ahead', 'quantiles', 'categories']) + coords=[data.ahead, self._opts["q"], self._bins[1:]], dims=[self.ahead_dim, 'quantiles', 'categories']) # ensure that the coordinates are in the right order - quantile_panel = quantile_panel.transpose('ahead', 'quantiles', 'categories') + quantile_panel = quantile_panel.transpose(self.ahead_dim, 'quantiles', 'categories') # calculate for each bin of the pred_name data the quantiles of the ref_name data for bin in self._bins[1:]: mask = (data.loc[x_model, ...] == bin) @@ -307,28 +405,34 @@ class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover def _plot(self): """Start plotting routines: overall plot and seasonal (if enabled).""" - logging.info( - f"start plotting {self.__class__.__name__}, scheduled number of plots: {(len(self._seasons) + 1) * 2}") - + logging.info(f"start plotting {self.__class__.__name__}, scheduled number of plots: " + f"{(len(self._seasons) + 1) * (len(self.competitors) + 1) * 2}") if len(self._seasons) > 0: self._plot_seasons() self._plot_all() def _plot_seasons(self): """Create seasonal plots.""" - for season in self._seasons: - self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._forecast_indicator, - y_model=self._obs_name, plot_name_affix="cali-ref", season=season) - self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._obs_name, - y_model=self._forecast_indicator, plot_name_affix="like-base", season=season) + for model in [self._forecast_indicator, *self.competitors]: + for season in self._seasons: + self._plot_base(data=self._data.where(self._data[f"{self.index_dim}.season"] == season), + x_model=model, y_model=self._obs_name, plot_name_affix="cali-ref", + season=season, model_name=model) + self._plot_base(data=self._data.where(self._data[f"{self.index_dim}.season"] == season), + x_model=self._obs_name, y_model=model, plot_name_affix="like-base", + season=season, model_name=model) def _plot_all(self): """Plot overall conditional quantiles on full data.""" - self._plot_base(data=self._data, x_model=self._forecast_indicator, y_model=self._obs_name, plot_name_affix="cali-ref") - self._plot_base(data=self._data, x_model=self._obs_name, y_model=self._forecast_indicator, plot_name_affix="like-base") + for model in [self._forecast_indicator, *self.competitors]: + self._plot_base(data=self._data, x_model=model, y_model=self._obs_name, + plot_name_affix="cali-ref", model_name=model) + self._plot_base(data=self._data, x_model=self._obs_name, y_model=model, + plot_name_affix="like-base", model_name=model) @TimeTrackingWrapper - def _plot_base(self, data: xr.DataArray, x_model: str, y_model: str, plot_name_affix: str, season: str = ""): + def _plot_base(self, data: xr.DataArray, x_model: str, y_model: str, plot_name_affix: str, season: str = "", + model_name: str = ""): """ Create conditional quantile plots. @@ -340,7 +444,8 @@ class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover """ segmented_data, quantile_panel = self._prepare_plots(data, x_model, y_model) ylabel, xlabel = self._labels(x_model, self._opts["data_unit"]) - plot_name = f"{self.plot_name}{self.add_affix(season)}{self.add_affix(plot_name_affix)}_plot.pdf" + plot_name = f"{self.plot_name}{self.add_affix(season)}{self.add_affix(plot_name_affix)}" \ + f"{self.add_affix(model_name)}.pdf" plot_path = os.path.join(os.path.abspath(self.plot_folder), plot_name) pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path) logging.debug(f"... plot path is {plot_path}") @@ -378,7 +483,9 @@ class PlotConditionalQuantiles(AbstractPlotClass): # pragma: no cover ax2.set_ylabel(' sample size', fontsize='x-large') ax2.tick_params(axis='y', which='major', labelsize=15) # set title and save current figure - title = f"{d.values} time step(s) ahead{f' ({season})' if len(season) > 0 else ''}" + sampling_letter = {"daily": "D", "hourly": "H"}.get(self._sampling) + model_name = self.model_name if model_name == self._forecast_indicator else model_name + title = f"{model_name} ({sampling_letter}{d.values}{f', {season}' if len(season) > 0 else ''})" plt.title(title) pdf_pages.savefig() # close all open figures / plots @@ -746,19 +853,14 @@ class PlotFeatureImportanceSkillScore(AbstractPlotClass): # pragma: no cover data = data.assign_coords({self._x_name: new_boot_coords}) except NotImplementedError: pass - _, sampling_letter = self._get_target_sampling(sampling, 1) - self._labels = [str(i) + sampling_letter for i in data.coords[self._ahead_dim].values] + _, sampling_letter = self._get_sampling(sampling, 1) + sampling_letter = {"d": "D", "h": "H"}.get(sampling_letter, sampling_letter) + self._labels = [sampling_letter + str(i) for i in data.coords[self._ahead_dim].values] if station_dim not in data.dims: data = data.expand_dims(station_dim) self._number_of_bootstraps = np.unique(data.coords[self._boot_dim].values).shape[0] return data.to_dataframe("data").reset_index(level=np.arange(len(data.dims)).tolist()).dropna() - @staticmethod - def _get_target_sampling(sampling, pos): - sampling = (sampling, sampling) if isinstance(sampling, str) else sampling - sampling_letter = {"hourly": "H", "daily": "d"}.get(sampling[pos], "") - return sampling, sampling_letter - def _return_vars_without_number_tag(self, values, split_by, keep, as_unique=False): arr = np.array([v.split(split_by) for v in values]) num = arr[:, 0] @@ -1037,13 +1139,14 @@ class PlotTimeSeries: # pragma: no cover color = sns.color_palette("Blues_d", self._window_lead_time).as_hex() for ahead in data.coords[self._ahead_dim].values: plot_data = data.sel({"type": self._model_name, self._ahead_dim: ahead}).drop(["type", self._ahead_dim]).squeeze().shift(index=ahead) - label = f"{ahead}{self._sampling}" + sampling_letter = {"d": "D", "h": "H"}.get(self._sampling, self._sampling) + label = f"{sampling_letter}{ahead}" ax.plot(plot_data, color=color[ahead - 1], label=label) def _plot_obs(self, ax, data): ahead = 1 obs_data = data.sel(type="obs", ahead=ahead).shift(index=ahead) - ax.plot(obs_data, color=matplotlib.colors.cnames["green"], label="obs") + ax.plot(obs_data, color=colors.cnames["green"], label="obs") @staticmethod def _get_time_range(data): @@ -1096,8 +1199,9 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover def __init__(self, data: xr.DataArray, plot_folder: str = ".", model_type_dim: str = "type", error_measure: str = "mse", error_unit: str = None, dim_name_boots: str = 'boots', block_length: str = None, model_name: str = "NN", model_indicator: str = "nn", - ahead_dim: str = "ahead", sampling: Union[str, Tuple[str]] = "", season_annotation: str = None): - super().__init__(plot_folder, "sample_uncertainty_from_bootstrap") + ahead_dim: str = "ahead", sampling: Union[str, Tuple[str]] = "", season_annotation: str = None, + apply_root: bool = True, plot_name="sample_uncertainty_from_bootstrap"): + super().__init__(plot_folder, plot_name) self.default_plot_name = self.plot_name self.model_type_dim = model_type_dim self.ahead_dim = ahead_dim @@ -1112,16 +1216,19 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover self.prepare_data(data) # create all combinations to plot (h/v, utest/notest, single/multi) - variants = list(itertools.product(*[["v", "h"], [True, False], ["single", "multi"]])) + variants = list(itertools.product(*[["v", "h"], [True, False], ["single", "multi", "panel"]])) # plot raw metric (mse) for orientation, utest, agg_type in variants: self._plot(orientation=orientation, apply_u_test=utest, agg_type=agg_type, season=_season) + self._plot_kde(agg_type=agg_type, season=_season) - # plot root of metric (rmse) - self._apply_root() - for orientation, utest, agg_type in variants: - self._plot(orientation=orientation, apply_u_test=utest, agg_type=agg_type, tag="_sqrt", season=_season) + if apply_root is True: + # plot root of metric (rmse) + self._apply_root() + for orientation, utest, agg_type in variants: + self._plot(orientation=orientation, apply_u_test=utest, agg_type=agg_type, tag="_sqrt", season=_season) + self._plot_kde(agg_type=agg_type, tag="_sqrt", season=_season) self._data_table = None self._n_boots = None @@ -1150,6 +1257,61 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover self.error_measure = f"root {self.error_measure}" self.error_unit = self.error_unit.replace("$^2$", "") + def _plot_kde(self, agg_type="single", tag="", season=""): + self.plot_name = self.default_plot_name + "_kde" + "_" + agg_type + tag + {"": ""}.get(season, f"_{season}") + data_table = self._data_table + if agg_type == "multi": + return # nothing to do + if self.ahead_dim not in data_table.index.names and agg_type == "panel": + return # nothing to do + n_boots = self._n_boots + error_label = self.error_measure if self.error_unit is None else f"{self.error_measure} (in {self.error_unit})" + sampling_letter = {"d": "D", "h": "H"}.get(self.sampling, self.sampling) + if agg_type == "single": + fig, ax = plt.subplots() + if self.ahead_dim in data_table.index.names: + data_table = data_table.groupby(level=0).mean() + sns.kdeplot(data=data_table, ax=ax) + ylims = list(ax.get_ylim()) + ax.set_ylim([ylims[0], ylims[1]*1.025]) + ax.set_xlabel(error_label) + + text = f"n={n_boots}" + if self.block_length is not None: + text = f"{self.block_length}, {text}" + if len(season) > 0: + text = f"{season}, {text}" + loc = "upper left" + text_box = AnchoredText(text, frameon=True, loc=loc, pad=0.5, bbox_to_anchor=(0., 1.0), + bbox_transform=ax.transAxes) + plt.setp(text_box.patch, edgecolor='k', facecolor='w') + ax.add_artist(text_box) + ax.get_legend().set_title(None) + plt.tight_layout() + else: + g = sns.FacetGrid(data_table.stack(self.model_type_dim).reset_index(), **{"col": self.ahead_dim}, + hue=self.model_type_dim, legend_out=True) + g.map(sns.kdeplot, 0) + g.add_legend(title="") + fig = plt.gcf() + _labels = [sampling_letter + str(i) for i in data_table.index.levels[1].values] + for axi, title in zip(g.axes.flatten(), _labels): + axi.set_title(title) + for axi in g.axes.flatten(): + axi.set_xlabel(None) + fig.supxlabel(error_label) + text = f"n={n_boots}" + if self.block_length is not None: + text = f"{self.block_length}, {text}" + if len(season) > 0: + text = f"{season}, {text}" + loc = "upper right" + text_box = AnchoredText(text, frameon=True, loc=loc) + plt.setp(text_box.patch, edgecolor='k', facecolor='w') + g.axes.flatten()[0].add_artist(text_box) + self._save() + plt.close("all") + def _plot(self, orientation: str = "v", apply_u_test: bool = False, agg_type="single", tag="", season=""): self.plot_name = self.default_plot_name + {"v": "_vertical", "h": "_horizontal"}[orientation] + \ {True: "_u_test", False: ""}[apply_u_test] + "_" + agg_type + tag + \ @@ -1157,15 +1319,21 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover if apply_u_test is True and agg_type == "multi": return # not implemented data_table = self._data_table - if self.ahead_dim not in data_table.index.names and agg_type == "multi": + if self.ahead_dim not in data_table.index.names and agg_type in ["multi", "panel"]: + return # nothing to do + if apply_u_test is True and agg_type == "panel": return # nothing to do n_boots = self._n_boots size = len(np.unique(data_table.columns)) asteriks = self.get_asteriks_from_mann_whitney_u_result if apply_u_test is True else None color_palette = sns.color_palette("Blues_d", self._factor).as_hex() + sampling_letter = {"d": "D", "h": "H"}.get(self.sampling, self.sampling) if orientation == "v": figsize, width = (size, 5), 0.4 elif orientation == "h": + if agg_type == "multi": + size *= np.sqrt(len(data_table.index.unique(self.ahead_dim))) + size = max(size, 8) figsize, width = (7, (1+.5*size)), 0.65 else: raise ValueError(f"orientation must be `v' or `h' but is: {orientation}") @@ -1177,43 +1345,82 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass): # pragma: no cover showmeans=True, meanprops={"markersize": 6, "markeredgecolor": "k"}, flierprops={"marker": "o", "markerfacecolor": "black", "markeredgecolor": "none", "markersize": 3}, boxprops={'facecolor': 'none', 'edgecolor': 'k'}, width=width, orient=orientation) - else: + elif agg_type == "multi": xy = {"x": self.model_type_dim, "y": 0} if orientation == "v" else {"x": 0, "y": self.model_type_dim} sns.boxplot(data=data_table.stack(self.model_type_dim).reset_index(), ax=ax, whis=1.5, palette=color_palette, showmeans=True, meanprops={"markersize": 6, "markeredgecolor": "k", "markerfacecolor": "white"}, flierprops={"marker": "o", "markerfacecolor": "black", "markeredgecolor": "none", "markersize": 3}, - boxprops={'edgecolor': 'k'}, width=.9, orient=orientation, **xy, hue=self.ahead_dim) + boxprops={'edgecolor': 'k'}, width=.8, orient=orientation, **xy, hue=self.ahead_dim) - _labels = [str(i) + self.sampling for i in data_table.index.levels[1].values] + _labels = [sampling_letter + str(i) for i in data_table.index.levels[1].values] handles, _ = ax.get_legend_handles_labels() ax.legend(handles, _labels) - - if orientation == "v": - if apply_u_test: - ax = self.set_significance_bars(asteriks, ax, data_table, orientation) - ylims = list(ax.get_ylim()) - ax.set_ylim([ylims[0], ylims[1]*1.025]) - ax.set_ylabel(f"{self.error_measure} (in {self.error_unit})") - ax.set_xticklabels(ax.get_xticklabels(), rotation=45) - elif orientation == "h": - if apply_u_test: - ax = self.set_significance_bars(asteriks, ax, data_table, orientation) - ax.set_xlabel(f"{self.error_measure} (in {self.error_unit})") - xlims = list(ax.get_xlim()) - ax.set_xlim([xlims[0], xlims[1] * 1.015]) else: - raise ValueError(f"orientation must be `v' or `h' but is: {orientation}") - text = f"n={n_boots}" - if self.block_length is not None: - text = f"{self.block_length}, {text}" - if len(season) > 0: - text = f"{season}, {text}" - loc = "lower left" - text_box = AnchoredText(text, frameon=True, loc=loc, pad=0.5, bbox_to_anchor=(0., 1.0), - bbox_transform=ax.transAxes) - plt.setp(text_box.patch, edgecolor='k', facecolor='w') - ax.add_artist(text_box) - plt.setp(ax.lines, color='k') + xy = (self.model_type_dim, 0) if orientation == "v" else (0, self.model_type_dim) + col_or_row = {"col": self.ahead_dim} if orientation == "v" else {"row": self.ahead_dim} + aspect = figsize[0] / figsize[1] + height = figsize[1] * 0.8 + ax = sns.FacetGrid(data_table.stack(self.model_type_dim).reset_index(), **col_or_row, aspect=aspect, height=height) + ax.map(sns.boxplot, *xy, whis=1.5, color="white", showmeans=True, order=data_table.mean().index.to_list(), + meanprops={"markersize": 6, "markeredgecolor": "k"}, + flierprops={"marker": "o", "markerfacecolor": "black", "markeredgecolor": "none", "markersize": 3}, + boxprops={'facecolor': 'none', 'edgecolor': 'k'}, width=width, orient=orientation) + + _labels = [sampling_letter + str(i) for i in data_table.index.levels[1].values] + for axi, title in zip(ax.axes.flatten(), _labels): + axi.set_title(title) + plt.setp(axi.lines, color='k') + + error_label = self.error_measure if self.error_unit is None else f"{self.error_measure} (in {self.error_unit})" + if agg_type == "panel": + if orientation == "v": + for axi in ax.axes.flatten(): + axi.set_xlabel(None) + axi.set_xticklabels(axi.get_xticklabels(), rotation=45) + ax.set_ylabels(error_label) + loc = "upper left" + else: + for axi in ax.axes.flatten(): + axi.set_ylabel(None) + ax.set_xlabels(error_label) + loc = "upper right" + text = f"n={n_boots}" + if self.block_length is not None: + text = f"{self.block_length}, {text}" + if len(season) > 0: + text = f"{season}, {text}" + text_box = AnchoredText(text, frameon=True, loc=loc) + plt.setp(text_box.patch, edgecolor='k', facecolor='w') + ax.axes.flatten()[0].add_artist(text_box) + else: + if orientation == "v": + if apply_u_test: + ax = self.set_significance_bars(asteriks, ax, data_table, orientation) + ylims = list(ax.get_ylim()) + ax.set_ylim([ylims[0], ylims[1]*1.025]) + ax.set_ylabel(error_label) + ax.set_xticklabels(ax.get_xticklabels(), rotation=45) + ax.set_xlabel(None) + elif orientation == "h": + if apply_u_test: + ax = self.set_significance_bars(asteriks, ax, data_table, orientation) + ax.set_xlabel(error_label) + xlims = list(ax.get_xlim()) + ax.set_xlim([xlims[0], xlims[1] * 1.015]) + ax.set_ylabel(None) + else: + raise ValueError(f"orientation must be `v' or `h' but is: {orientation}") + text = f"n={n_boots}" + if self.block_length is not None: + text = f"{self.block_length}, {text}" + if len(season) > 0: + text = f"{season}, {text}" + loc = "lower left" + text_box = AnchoredText(text, frameon=True, loc=loc, pad=0.5, bbox_to_anchor=(0., 1.0), + bbox_transform=ax.transAxes) + plt.setp(text_box.patch, edgecolor='k', facecolor='w') + ax.add_artist(text_box) + plt.setp(ax.lines, color='k') plt.tight_layout() self._save() plt.close("all") @@ -1253,6 +1460,7 @@ class PlotTimeEvolutionMetric(AbstractPlotClass): vmax = int(data.quantile(0.95)) data = self._prepare_data(data, time_dim, model_type_dim, model_indicator, model_name) + # detailed plot for each model type for t in data[model_type_dim]: # note: could be expanded to create plot per ahead step plot_data = data.sel({model_type_dim: t}).mean(ahead_dim).to_pandas() @@ -1262,6 +1470,24 @@ class PlotTimeEvolutionMetric(AbstractPlotClass): self.plot_name = f"{plot_name}_{t.values}" self._plot(plot_data, years, months, vmin, vmax, str(t.values)) + # aggregated version with all model types + remaining_dim = set(data.dims).difference((model_type_dim, time_dim)) + _data = data.mean(remaining_dim, skipna=True).transpose(model_type_dim, time_dim) + vmin = int(_data.quantile(0.05)) + vmax = int(_data.quantile(0.95)) + plot_data = _data.to_pandas() + years = plot_data.columns.strftime("%Y").to_list() + months = plot_data.columns.strftime("%b").to_list() + plot_data.columns = plot_data.columns.strftime("%b %Y") + self.plot_name = f"{plot_name}_summary" + self._plot(plot_data, years, months, vmin, vmax, None) + + # line plot version + y_dim = "error" + plot_data = data.to_dataset(name=y_dim).to_dataframe().reset_index() + self.plot_name = f"{plot_name}_line_plot" + self._plot_summary_line(plot_data, x_dim=time_dim, y_dim=y_dim, hue_dim=model_type_dim) + @staticmethod def _find_nan_edge(data, time_dim): coll = [] @@ -1304,7 +1530,7 @@ class PlotTimeEvolutionMetric(AbstractPlotClass): @staticmethod def _aspect_cbar(val): - return min(max(1.25 * val + 7.5, 10), 30) + return min(max(1.25 * val + 7.5, 5), 30) def _plot(self, data, years, months, vmin=None, vmax=None, subtitle=None): fig, ax = plt.subplots(figsize=(max(data.shape[1] / 6, 12), max(data.shape[0] / 3.5, 2))) @@ -1318,10 +1544,386 @@ class PlotTimeEvolutionMetric(AbstractPlotClass): plt.tight_layout() self._save() + def _plot_summary_line(self, data, x_dim, y_dim, hue_dim): + data[x_dim] = pd.to_datetime(data[x_dim].dt.strftime('%Y-%m')) #??? + n = len(data[hue_dim].unique()) + ax = sns.lineplot(data=data, x=x_dim, y=y_dim, hue=hue_dim, errorbar=("pi", 50), + palette=sns.color_palette()[:n], style=hue_dim, dashes=False, markers=["X"]*n) + ax.set(xlabel=None, ylabel=self.title) + ax.get_legend().set_title(None) + ax.xaxis.set_major_locator(mdates.YearLocator()) + ax.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y')) + ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=range(1, 13, 3))) + ax.xaxis.set_minor_formatter(mdates.DateFormatter('%b')) + ax.margins(x=0) + plt.tight_layout() + self._save() -if __name__ == "__main__": - stations = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087'] - path = "../../testrun_network/forecasts" - plt_path = "../../" - con_quan_cls = PlotConditionalQuantiles(stations, path, plt_path) +@TimeTrackingWrapper +class PlotSeasonalMSEStack(AbstractPlotClass): + + def __init__(self, data, data_path: str, plot_folder: str = ".", boot_dim="boots", ahead_dim="ahead", + sampling: str = "daily", error_measure: str = "MSE", error_unit: str = "ppb$^2$", time_dim="index", + model_type_dim: str = "type", model_name: str = "NN", model_indicator: str = "nn",): + """Set attributes and create plot.""" + super().__init__(plot_folder, "seasonal_mse_stack_plot") + self._data_path = data_path + self.season_dim = "season" + self.time_dim = time_dim + self.ahead_dim = ahead_dim + self.error_unit = error_unit + self.error_measure = error_measure + self.dim_order = [self.season_dim, ahead_dim, model_type_dim] + + # mse from monthly blocks + self.plot_name_orig = "seasonal_mse_stack_plot" + self._data = self._prepare_data(data) + for orientation in ["horizontal", "vertical"]: + for split_ahead in [True, False]: + self._plot(ahead_dim, split_ahead, sampling, orientation) + self._save(bbox_inches="tight") + + # mes from resampling + self.plot_name_orig = "seasonal_mse_from_uncertainty_stack_plot" + self._data = self._prepare_data_from_uncertainty(boot_dim, data_path, model_type_dim, model_indicator, + model_name) + for orientation in ["horizontal", "vertical"]: + for split_ahead in [True, False]: + self._plot(ahead_dim, split_ahead, sampling, orientation) + self._save(bbox_inches="tight") + + def _prepare_data(self, data): + season_mean = data.groupby(f"{self.time_dim}.{self.season_dim}").mean() + total_mean = data.mean(self.time_dim) + factor = season_mean / season_mean.sum(self.season_dim) + season_share = (total_mean * factor).reindex({self.season_dim: ["DJF", "MAM", "JJA", "SON"]}) + season_share = season_share.mean(set(season_share.dims).difference(self.dim_order)) + return season_share.sortby(season_share.sum([self.season_dim, self.ahead_dim])).transpose(*self.dim_order) + + def _prepare_data_from_uncertainty(self, boot_dim, data_path, model_type_dim, model_indicator, model_name): + season_dim = self.season_dim + data = {} + for season in ["total", "DJF", "MAM", "JJA", "SON"]: + if season == "total": + file_name = "uncertainty_estimate_raw_results.nc" + else: + file_name = f"uncertainty_estimate_raw_results_{season}.nc" + with xr.open_dataarray(os.path.join(data_path, file_name)) as d: + data[season] = d + mean = {} + for season in data.keys(): + mean[season] = data[season].mean(boot_dim) + xr_data = xr.Dataset(mean).to_array(season_dim) + xr_data[model_type_dim] = [v if v != model_indicator else model_name for v in xr_data[model_type_dim].values] + xr_season = xr_data.sel({season_dim: ["DJF", "MAM", "JJA", "SON"]}) + factor = xr_season / xr_season.sum(season_dim) + season_share = xr_data.sel({season_dim: "total"}) * factor + return season_share.sortby(season_share.sum([self.season_dim, self.ahead_dim])).transpose(*self.dim_order) + + @staticmethod + def _set_bar_label(ax): + opts = {} + sum = {} + for c in ax.containers: + labels = [v for v in c.datavalues] + opts[c] = labels + sum = {i: sum.get(i, 0) + l for (i, l) in enumerate(labels)} + for c, labels in opts.items(): + _l = [f"{round(100 * labels[i] / sum[i])}%" for i in range(len(labels))] + ax.bar_label(c, labels=_l, label_type='center') + + def _plot(self, dim, split_ahead=True, sampling="daily", orientation="vertical"): + _, sampling_letter = self._get_sampling(sampling, 1) + sampling_letter = {"d": "D", "h": "H"}.get(sampling_letter, sampling_letter) + if split_ahead is False: + self.plot_name = self.plot_name_orig + "_total_" + orientation + data = self._data.mean(dim) + if orientation == "vertical": + fig, ax = plt.subplots(1, 1) + data.to_pandas().T.plot.bar(ax=ax, stacked=True, cmap="Dark2", legend=False) + ax.xaxis.label.set_visible(False) + ax.set_ylabel(f"{self.error_measure} (in {self.error_unit})") + self._set_bar_label(ax) + else: + m = data.to_pandas().T.shape[0] + fig, ax = plt.subplots(1, 1, figsize=(6, m)) + data.to_pandas().T.plot.barh(ax=ax, stacked=True, cmap="Dark2", legend=False) + ax.yaxis.label.set_visible(False) + ax.set_xlabel(f"{self.error_measure} (in {self.error_unit})") + self._set_bar_label(ax) + fig.legend(*ax.get_legend_handles_labels(), loc="upper center", ncol=4) + fig.tight_layout(rect=[0, 0, 1, 0.9]) + else: + self.plot_name = self.plot_name_orig + "_" + orientation + data = self._data + n = len(data.coords[dim]) + m = data.max(self.season_dim).shape + if orientation == "vertical": + fig, ax = plt.subplots(1, n, sharey=True, figsize=(np.prod(m) / 0.8, 5)) + for i, sel in enumerate(data.coords[dim].values): + data.sel({dim: sel}).to_pandas().T.plot.bar(ax=ax[i], stacked=True, cmap="Dark2", legend=False) + label = sampling_letter + str(sel) + ax[i].set_title(label) + ax[i].xaxis.label.set_visible(False) + self._set_bar_label(ax[i]) + ax[0].set_ylabel(f"{self.error_measure} (in {self.error_unit})") + fig.legend(*ax[0].get_legend_handles_labels(), loc="upper center", ncol=4) + fig.tight_layout(rect=[0, 0, 1, 0.9]) + else: + fig, ax = plt.subplots(n, 1, sharex=True, figsize=(6, np.prod(m) * 0.6)) + for i, sel in enumerate(data.coords[dim].values): + data.sel({dim: sel}).to_pandas().T.plot.barh(ax=ax[i], stacked=True, cmap="Dark2", legend=False) + label = sampling_letter + str(sel) + ax[i].set_title(label) + ax[i].yaxis.label.set_visible(False) + self._set_bar_label(ax[i]) + ax[-1].set_xlabel(f"{self.error_measure} (in {self.error_unit})") + fig.legend(*ax[0].get_legend_handles_labels(), loc="upper center", ncol=4) + fig.tight_layout(rect=[0, 0, 1, 0.95]) + + +@TimeTrackingWrapper +class PlotErrorsOnMap(AbstractPlotClass): + from mlair.plotting.data_insight_plotting import PlotStationMap + + def __init__(self, data_gen, errors, error_metric, plot_folder: str = ".", iter_dim: str = "station", + model_type_dim: str = "type", ahead_dim: str = "ahead", sampling: str = "daily"): + + super().__init__(plot_folder, f"map_plot_{error_metric}") + plot_path = os.path.join(self.plot_folder, f"{self.plot_name}.pdf") + pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path) + error_metric_units = helpers.statistics.get_error_metrics_units("ppb")[error_metric] + error_metric_name = helpers.statistics.get_error_metrics_long_name()[error_metric] + self.sampling = self._get_sampling(sampling, 1)[1] + + coords = self._extract_coords(data_gen) + for split_ahead in [False, True]: + error_data = {} + for model_type in errors.coords[model_type_dim].values: + error_data[model_type] = self._prepare_data(errors, model_type_dim, model_type, ahead_dim, error_metric, + split_ahead=split_ahead) + limits = self._calculate_limits(error_data) + for model_type, error in error_data.items(): + if split_ahead is True: + for ahead in error.index.unique(1).to_list(): + error_ahead = error.query(f"{ahead_dim} == {ahead}").droplevel(1) + plot_data = pd.concat([coords, error_ahead], axis=1) + self.plot(plot_data, error_metric, error_metric_name, error_metric_units, model_type, limits, + ahead=ahead) + pdf_pages.savefig() + else: + plot_data = pd.concat([coords, error], axis=1) + self.plot(plot_data, error_metric, error_metric_name, error_metric_units, model_type, limits) + pdf_pages.savefig() + pdf_pages.close() + plt.close('all') + + @staticmethod + def _calculate_limits(data): + vmin, vmax = np.inf, -np.inf + for v in data.values(): + vmin = min(vmin, v.min().values) + vmax = max(vmax, v.max().values) + return relative_round(float(vmin), 2, floor=True), relative_round(float(vmax), 2, ceil=True) + + @staticmethod + def _set_bounds(limits, ncolors, error_metric): + bound_lims = {"ioa": [0, 1], "mnmb": [-2, 2]}.get(error_metric, limits) + vmin = relative_round(bound_lims[0], 2, floor=True) + vmax = relative_round(bound_lims[1], 2, ceil=True) + interval = relative_round((vmax - vmin) / ncolors, 1, ceil=True) + bounds = np.sort(np.arange(vmax, vmin, -interval)) + return bounds + + @staticmethod + def _get_colorpalette(error_metric): + # cmap = matplotlib.cm.coolwarm + # cmap = sns.color_palette("magma_r", as_cmap=True) + # cmap="Spectral_r", cmap="RdYlBu_r", cmap="coolwarm", + # cmap = sns.cubehelix_palette(8, start=2, rot=0, dark=0, light=.95, as_cmap=True) + if error_metric == "mnmb": + cmap = sns.mpl_palette("coolwarm", as_cmap=True) + elif error_metric == "ioa": + cmap = sns.mpl_palette("coolwarm_r", as_cmap=True) + else: + cmap = sns.color_palette("magma_r", as_cmap=True) + return cmap + + def plot(self, plot_data, error_metric, error_long_name, error_units, model_type, limits, ahead=None): + import cartopy.crs as ccrs + from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER + fig = plt.figure(figsize=(10, 5)) + ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree()) + _gl = ax.gridlines(xlocs=range(-180, 180, 5), ylocs=range(-90, 90, 2), draw_labels=True) + _gl.xformatter = LONGITUDE_FORMATTER + _gl.yformatter = LATITUDE_FORMATTER + self._draw_background(ax) + cmap = self._get_colorpalette(error_metric) + ncolors = 20 + bounds = self._set_bounds(limits, ncolors, error_metric) + norm = colors.BoundaryNorm(bounds, cmap.N, extend='both') + cb = ax.scatter(plot_data["lon"], plot_data["lat"], c=plot_data[error_metric], marker='o', s=50, + transform=ccrs.PlateCarree(), zorder=2, cmap=cmap, norm=norm) + cbar_label = f"{error_long_name} (in {error_units})" if error_units is not None else error_long_name + plt.colorbar(cb, label=cbar_label) + self._adjust_extent(ax) + sampling_letter = {"d": "D", "h": "H"}.get(self.sampling, self.sampling) + title = model_type if ahead is None else f"{model_type} ({sampling_letter}{ahead})" + plt.title(title) + plt.tight_layout() + + @staticmethod + def _adjust_extent(ax): + import cartopy.crs as ccrs + + def diff(arr): + return arr[1] - arr[0], arr[3] - arr[2] + + def find_ratio(delta, reference=5): + return min(max(abs(reference / delta[0]), abs(reference / delta[1])), 5) + + extent = ax.get_extent(crs=ccrs.PlateCarree()) + ratio = find_ratio(diff(extent)) + new_extent = extent + np.array([-1, 1, -1, 1]) * ratio + ax.set_extent(new_extent, crs=ccrs.PlateCarree()) + + @staticmethod + def _extract_coords(gen): + coll = [] + for station in gen: + coords = station.get_coordinates() + coll.append((str(station), coords["lon"], coords["lat"])) + return pd.DataFrame(coll, columns=["station", "lon", "lat"]).set_index("station") + + @staticmethod + def _prepare_data(errors, model_type_dim, model_type, ahead_dim, error_metric, split_ahead=False): + e = errors.sel({model_type_dim: model_type}, drop=True) + if split_ahead is False: + e = e.mean(ahead_dim) + return e.to_dataframe(error_metric) + + @staticmethod + def _draw_background(ax): + """Draw coastline, lakes, ocean, rivers and country borders as background on the map.""" + + import cartopy.feature as cfeature + + ax.add_feature(cfeature.LAND.with_scale("50m")) + ax.natural_earth_shp(resolution='50m') + ax.add_feature(cfeature.COASTLINE.with_scale("50m"), edgecolor='black') + ax.add_feature(cfeature.LAKES.with_scale("50m")) + ax.add_feature(cfeature.OCEAN.with_scale("50m")) + ax.add_feature(cfeature.RIVERS.with_scale("50m")) + ax.add_feature(cfeature.BORDERS.with_scale("50m"), facecolor='none', edgecolor='black') + + + + + + + def _plot_individual(self): + import cartopy.feature as cfeature + import cartopy.crs as ccrs + from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER + from mpl_toolkits.axes_grid1 import make_axes_locatable + + for competitor in self.reference_models: + file_name = os.path.join(self.skill_score_report_path, + f"error_report_skill_score_{self.model_name}_-_{competitor}.csv" + ) + + plot_path = os.path.join(os.path.abspath(self.plot_folder), + f"{self.plot_name}_{self.model_name}_-_{competitor}.pdf") + pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path) + + for i, lead_name in enumerate(df.columns[:-2]): # last two are lat lon + fig = plt.figure() + self._ax.scatter(df.lon.values, df.lat.values, c=df[lead_name], + transform=ccrs.PlateCarree(), + norm=norm, cmap=cmap) + self._gl = self._ax.gridlines(xlocs=range(0, 21, 5), ylocs=range(44, 59, 2), draw_labels=True) + self._gl.xformatter = LONGITUDE_FORMATTER + self._gl.yformatter = LATITUDE_FORMATTER + label = f"Skill Score: {lead_name.replace('-', 'vs.').replace('(t+', ' (').replace(')', 'd)')}" + self._cbar = fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap), + orientation='horizontal', ticks=ticks, + label=label, + # cax=cax + ) + + # close all open figures / plots + pdf_pages.savefig() + pdf_pages.close() + plt.close('all') + + def _plot(self, ncol: int = 2): + import cartopy.feature as cfeature + import cartopy.crs as ccrs + from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER + import string + base_plot_name = self.plot_name + for competitor in self.reference_models: + file_name = os.path.join(self.skill_score_report_path, + f"error_report_skill_score_{self.model_name}_-_{competitor}.csv" + ) + + self.plot_name = f"{base_plot_name}_{self.model_name}_-_{competitor}" + df = self.open_data(file_name) + + nrow = int(np.ceil(len(df.columns[:-2])/ncol)) + bounds = np.linspace(-1, 1, 100) + cmap = mpl.cm.coolwarm + norm = colors.BoundaryNorm(bounds, cmap.N, extend='both') + ticks = np.arange(norm.vmin, norm.vmax + .2, .2) + fig, self._axes = plt.subplots(nrows=nrow, ncols=ncol, subplot_kw={'projection': ccrs.PlateCarree()}) + for i, ax in enumerate(self._axes.reshape(-1)): # last two are lat lon + + sub_name = f"({string.ascii_lowercase[i]})" + lead_name = df.columns[i] + ax.add_feature(cfeature.LAND.with_scale("50m")) + ax.add_feature(cfeature.COASTLINE.with_scale("50m"), edgecolor='black') + ax.add_feature(cfeature.OCEAN.with_scale("50m")) + ax.add_feature(cfeature.RIVERS.with_scale("50m")) + ax.add_feature(cfeature.BORDERS.with_scale("50m"), facecolor='none', edgecolor='black') + ax.scatter(df.lon.values, df.lat.values, c=df[lead_name], + marker='.', + transform=ccrs.PlateCarree(), + norm=norm, cmap=cmap) + gl = ax.gridlines(xlocs=range(0, 21, 5), ylocs=range(44, 59, 2), draw_labels=True) + gl.xformatter = LONGITUDE_FORMATTER + gl.yformatter = LATITUDE_FORMATTER + gl.top_labels = [] + gl.right_labels = [] + ax.text(0.01, 1.09, f'{sub_name} {lead_name.split("+")[1][:-1]}d', + verticalalignment='top', horizontalalignment='left', + transform=ax.transAxes, + color='black', + ) + label = f"Skill Score: {lead_name.replace('-', 'vs.').split('(')[0]}" + + fig.subplots_adjust(bottom=0.18) + cax = fig.add_axes([0.15, 0.1, 0.7, 0.02]) + self._cbar = fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap), + orientation='horizontal', + ticks=ticks, + label=label, + cax=cax + ) + + fig.subplots_adjust(wspace=.001, hspace=.2) + self._save(bbox_inches="tight") + plt.close('all') + + + @staticmethod + def get_coords_from_index(name_string: str) -> List[float]: + """ + + :param name_string: + :type name_string: + :return: List of coords [lat, lon] + :rtype: List + """ + res = [float(frac.replace("_", ".")) for frac in name_string.split(sep="__")[1:]] + return res diff --git a/mlair/plotting/tracker_plot.py b/mlair/plotting/tracker_plot.py index 53ec7496e7e04da0f53b1d0ce817793dea732963..5c5887a6e1f9b03ed06ade49a49cd70edc51e820 100644 --- a/mlair/plotting/tracker_plot.py +++ b/mlair/plotting/tracker_plot.py @@ -319,8 +319,12 @@ class TrackPlot: x_max = None if chain.successor is not None: + stage_count = 0 for e in chain.successor: if e.stage == stage: + stage_count += 1 + if stage_count > 50: + continue shift = self.width + self.space_intern_x if chain.stage == e.stage else 0 x_tmp = self.plot_track_chain(e, y_pos, x_pos + shift, prev=(x, y), stage=stage, sparse_conn_mode=sparse_conn_mode) diff --git a/mlair/run_modules/model_setup.py b/mlair/run_modules/model_setup.py index b51a3f9c76ace4feef72e4c96945b463fb69a673..efeff06260d1df41d4a83592c225f316574e77eb 100644 --- a/mlair/run_modules/model_setup.py +++ b/mlair/run_modules/model_setup.py @@ -74,9 +74,6 @@ class ModelSetup(RunEnvironment): # set channels depending on inputs self._set_shapes() - # set number of training samples (total) - self._set_num_of_training_samples() - # build model graph using settings from my_model_settings() self.build_model() @@ -109,15 +106,11 @@ class ModelSetup(RunEnvironment): def _set_num_of_training_samples(self): """ Set number of training samples - needed for example for Bayesian NNs""" samples = 0 - for s in self.data_store.get("data_collection", "train"): - if isinstance(s.get_Y(), list): - s_sam = s.get_Y()[0].shape[0] - elif isinstance(s.get_Y(), tuple): - s_sam = s.get_Y().shape[0] - else: - s_sam = np.nan - samples += s_sam - self.num_of_training_samples = samples + upsampling = self.data_store.create_args_dict(["upsampling"], "train") + for data in self.data_store.get("data_collection", "train"): + length = data.__len__(**upsampling) + samples += length + return samples def compile_model(self): """ @@ -179,10 +172,11 @@ class ModelSetup(RunEnvironment): model = self.data_store.get("model_class") args_list = model.requirements() if "num_of_training_samples" in args_list: - self.data_store.set("num_of_training_samples", self.num_of_training_samples, scope=self.scope) - logging.info(f"Store number of training samples ({self.num_of_training_samples}) in data_store: " - f"self.data_store.set('num_of_training_samples', {self.num_of_training_samples}, scope='{self.scope}')") - + num_of_training_samples = self._set_num_of_training_samples() + self.data_store.set("num_of_training_samples", num_of_training_samples, scope=self.scope) + logging.info(f"Store number of training samples ({num_of_training_samples}) in data_store: " + f"self.data_store.set('num_of_training_samples', {num_of_training_samples}, scope=" + f"'{self.scope}')") args = self.data_store.create_args_dict(args_list, self.scope) self.model = model(**args) self.get_model_settings() diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index a48a82b25804da34d07807073b0c153408e4e028..e4cc34f66db7426876209fece0dc902341ac6582 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -24,7 +24,8 @@ from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel from mlair.model_modules import AbstractModelClass from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \ PlotCompetitiveSkillScore, PlotTimeSeries, PlotFeatureImportanceSkillScore, PlotConditionalQuantiles, \ - PlotSeparationOfScales, PlotSampleUncertaintyFromBootstrap, PlotTimeEvolutionMetric + PlotSeparationOfScales, PlotSampleUncertaintyFromBootstrap, PlotTimeEvolutionMetric, PlotSeasonalMSEStack, \ + PlotErrorsOnMap from mlair.plotting.data_insight_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \ PlotPeriodogram, PlotDataHistogram from mlair.run_modules.run_environment import RunEnvironment @@ -72,6 +73,7 @@ class PostProcessing(RunEnvironment): self.model: AbstractModelClass = self._load_model() self.model_name = self.data_store.get("model_name", "model").rsplit("/", 1)[1].split(".", 1)[0] self.ols_model = None + self.persi_model = True self.batch_size: int = self.data_store.get_default("batch_size", "model", 64) self.test_data = self.data_store.get("data_collection", "test") batch_path = self.data_store.get("batch_path", scope="test") @@ -86,10 +88,12 @@ class PostProcessing(RunEnvironment): self._sampling = self.data_store.get("sampling") self.window_lead_time = extract_value(self.data_store.get("output_shape", "model")) self.skill_scores = None + self.errors = None self.feature_importance_skill_scores = None self.uncertainty_estimate = None self.uncertainty_estimate_seasons = {} self.block_mse_per_station = None + self.block_mse = None self.competitor_path = self.data_store.get("competitor_path") self.competitors = to_list(self.data_store.get_default("competitors", default=[])) self.forecast_indicator = "nn" @@ -106,6 +110,9 @@ class PostProcessing(RunEnvironment): # ols model self.train_ols_model() + # persi model + self.setup_persistence() + # forecasts on test data self.make_prediction(self.test_data) self.make_prediction(self.train_val_data) @@ -113,6 +120,10 @@ class PostProcessing(RunEnvironment): # calculate error metrics on test data self.calculate_test_score() + # calculate monthly block mse + self.block_mse, self.block_mse_per_station = self.calculate_block_mse(evaluate_competitors=True, + separate_ahead=True, block_length="1m") + # sample uncertainty if self.data_store.get("do_uncertainty_estimate", "postprocessing"): self.estimate_sample_uncertainty(separate_ahead=True) @@ -136,6 +147,7 @@ class PostProcessing(RunEnvironment): self.report_error_metrics(errors) self.report_error_metrics({self.forecast_indicator: skill_score_climatological}) self.report_error_metrics({"skill_score": skill_score_competitive}) + self.store_errors(errors) # plotting self.plot() @@ -152,10 +164,12 @@ class PostProcessing(RunEnvironment): block_length = self.data_store.get_default("block_length", default="1m", scope="uncertainty_estimate") evaluate_competitors = self.data_store.get_default("evaluate_competitors", default=True, scope="uncertainty_estimate") - block_mse, block_mse_per_station = self.calculate_block_mse(evaluate_competitors=evaluate_competitors, - separate_ahead=separate_ahead, - block_length=block_length) - self.block_mse_per_station = block_mse_per_station + if evaluate_competitors is True and separate_ahead is True and block_length == "1m": + block_mse, block_mse_per_station = self.block_mse, self.block_mse_per_station + else: + block_mse, block_mse_per_station = self.calculate_block_mse(evaluate_competitors=evaluate_competitors, + separate_ahead=separate_ahead, + block_length=block_length) estimate = statistics.create_n_bootstrap_realizations( block_mse, dim_name_time=self.index_dim, dim_name_model=self.model_type_dim, dim_name_boots=self.uncertainty_estimate_boot_dim, n_boots=n_boots, seasons=["DJF", "MAM", "JJA", "SON"]) @@ -178,6 +192,10 @@ class PostProcessing(RunEnvironment): file_name = os.path.join(report_path, f"uncertainty_estimate_raw_results_{season}.nc") self.uncertainty_estimate_seasons[season].to_netcdf(path=file_name) + # store block mse per station + file_name = os.path.join(report_path, f"block_mse_raw_results.nc") + self.block_mse_per_station.to_netcdf(path=file_name) + # store statistics if percentiles is None: percentiles = [.05, .1, .25, .5, .75, .9, .95] @@ -565,7 +583,10 @@ class PostProcessing(RunEnvironment): if "PlotConditionalQuantiles" in plot_list: PlotConditionalQuantiles(self.test_data.keys(), data_pred_path=self.forecast_path, plot_folder=self.plot_path, forecast_indicator=self.forecast_indicator, - obs_indicator=self.observation_indicator) + obs_indicator=self.observation_indicator, competitors=self.competitors, + model_type_dim=self.model_type_dim, index_dim=self.index_dim, + ahead_dim=self.ahead_dim, competitor_path=self.competitor_path, + model_name=self.model_display_name) except Exception as e: logging.error(f"Could not create plot PlotConditionalQuantiles due to the following error:" f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") @@ -618,6 +639,25 @@ class PostProcessing(RunEnvironment): logging.error(f"Could not create plot PlotSampleUncertaintyFromBootstrap due to the following error: {e}" f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + try: + if "PlotErrorMetrics" in plot_list and self.errors is not None: + error_metric_units = statistics.get_error_metrics_units("ppb") + error_metrics_name = statistics.get_error_metrics_long_name() + for error_metric in self.errors.keys(): + try: + PlotSampleUncertaintyFromBootstrap( + data=self.errors[error_metric], plot_folder=self.plot_path, model_type_dim=self.model_type_dim, + dim_name_boots="station", error_measure=error_metrics_name[error_metric], + error_unit=error_metric_units[error_metric], model_name=self.model_display_name, + model_indicator=self.model_display_name, sampling=self._sampling, apply_root=False, + plot_name=f"error_plot_{error_metric}") + except Exception as e: + logging.error(f"Could not create plot PlotErrorMetrics for {error_metric} due to the following " + f"error: {e}\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + except Exception as e: + logging.error(f"Could not create plot PlotErrorMetrics due to the following error: {e}" + f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + try: if "PlotStationMap" in plot_list: gens = [(self.train_data, {"marker": 5, "ms": 9}), @@ -681,6 +721,35 @@ class PostProcessing(RunEnvironment): logging.error(f"Could not create plot PlotTimeEvolutionMetric due to the following error: {e}" f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + try: + if "PlotSeasonalMSEStack" in plot_list: + report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report") + PlotSeasonalMSEStack(data=self.block_mse_per_station, data_path=report_path, plot_folder=self.plot_path, + boot_dim=self.uncertainty_estimate_boot_dim, ahead_dim=self.ahead_dim, + sampling=self._sampling, error_measure="Mean Squared Error", error_unit=r"ppb$^2$", + model_indicator=self.forecast_indicator, model_name=self.model_display_name, + model_type_dim=self.model_type_dim) + except Exception as e: + logging.error(f"Could not create plot PlotSeasonalMSEStack due to the following error: {e}" + f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + + try: + if "PlotErrorsOnMap" in plot_list and self.errors is not None: + for error_metric in self.errors.keys(): + try: + PlotErrorsOnMap(self.test_data, self.errors[error_metric], error_metric, + plot_folder=self.plot_path, sampling=self._sampling) + except Exception as e: + logging.error(f"Could not create plot PlotErrorsOnMap for {error_metric} due to the following " + f"error: {e}\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + except Exception as e: + logging.error(f"Could not create plot PlotErrorsOnMap due to the following error: {e}" + f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}") + + + + + @TimeTrackingWrapper def calculate_test_score(self): """Evaluate test score of model and save locally.""" @@ -705,6 +774,12 @@ class PostProcessing(RunEnvironment): else: logging.info(f"Skip train ols model as it is not present in competitors.") + def setup_persistence(self): + """Check if persistence is requested from competitors and store this information.""" + self.persi_model = any(x in map(str.lower, self.competitors) for x in ["persi", "persistence"]) + if self.persi_model is False: + logging.info(f"Persistence is not calculated as it is not present in competitors.") + @TimeTrackingWrapper def make_prediction(self, subset): """ @@ -738,8 +813,11 @@ class PostProcessing(RunEnvironment): nn_prediction = self._create_nn_forecast(copy.deepcopy(nn_output), nn_prediction, transformation_func, normalised) # persistence - persistence_prediction = self._create_persistence_forecast(observation_data, persistence_prediction, - transformation_func, normalised) + if self.persi_model is True: + persistence_prediction = self._create_persistence_forecast(observation_data, persistence_prediction, + transformation_func, normalised) + else: + persistence_prediction = None # ols if self.ols_model is not None: @@ -1134,3 +1212,18 @@ class PostProcessing(RunEnvironment): file_name = f"error_report_{metric}_{model_type}.%s".replace(' ', '_').replace('/', '_') tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df) tables.save_to_md(report_path, file_name % "md", df=df) + + def store_errors(self, errors): + metric_collection = {} + error_dim = "error_metric" + station_dim = "station" + for model_type in errors.keys(): + station_collection = {} + for station, station_errors in errors[model_type].items(): + if station == "total": + continue + station_collection[station] = xr.Dataset(station_errors).to_array(error_dim) + metric_collection[model_type] = xr.Dataset(station_collection).to_array(station_dim) + coll = xr.Dataset(metric_collection).to_array(self.model_type_dim) + coll = coll.transpose(station_dim, self.ahead_dim, self.model_type_dim, error_dim) + self.errors = {k: coll.sel({error_dim: k}, drop=True) for k in coll.coords[error_dim].values} diff --git a/mlair/run_modules/run_environment.py b/mlair/run_modules/run_environment.py index 2bc81750bf86d60e0c59bbf0fef68ae9c29138c9..9edb0aa27379cc2477e0240c43e4745ab5db1ce2 100644 --- a/mlair/run_modules/run_environment.py +++ b/mlair/run_modules/run_environment.py @@ -8,6 +8,7 @@ import logging import os import shutil import time +import sys from mlair.helpers.datastore import DataStoreByScope as DataStoreObject from mlair.helpers.datastore import NameNotFoundInDataStore @@ -160,8 +161,12 @@ class RunEnvironment(object): json.dump(tracker, f) def __plot_tracking(self): - plot_folder, plot_name = os.path.split(self.__find_file_pattern("tracking_%03i.pdf")) - TrackPlot(self.tracker_list, sparse_conn_mode=True, plot_folder=plot_folder, plot_name=plot_name) + try: + plot_folder, plot_name = os.path.split(self.__find_file_pattern("tracking_%03i.pdf")) + TrackPlot(self.tracker_list, sparse_conn_mode=True, plot_folder=plot_folder, plot_name=plot_name) + except Exception as e: + logging.info(f"Could not plot tracking plot due to:\n{sys.exc_info()[0]}\n" + f"{sys.exc_info()[1]}\n{sys.exc_info()[2]}") def __find_file_pattern(self, name): counter = 0 diff --git a/requirements.txt b/requirements.txt index f644ae9257c0b5a18492f8a2d0ef27d1246ec0d4..e6578031a037ce89054db061c2582dea4d91fdca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,7 +20,7 @@ pytest-html==3.1.1 pytest-lazy-fixture==0.6.3 requests==2.28.1 scipy==1.7.1 -seaborn==0.11.2 +seaborn==0.12.0 setuptools==47.1.0 --no-binary shapely Shapely==1.8.0 six==1.15.0 diff --git a/test/test_helpers/test_helpers.py b/test/test_helpers/test_helpers.py index 6f787d5835bd917fcfc55341d93a2d302f2c6e6e..22eaa102544f93511007204e6633de143c3e022c 100644 --- a/test/test_helpers/test_helpers.py +++ b/test/test_helpers/test_helpers.py @@ -16,7 +16,7 @@ from mlair.helpers import to_list, dict_to_xarray, float_round, remove_items, ex sort_like, filter_dict_by_value from mlair.helpers import PyTestRegex, check_nested_equality from mlair.helpers import Logger, TimeTracking -from mlair.helpers.helpers import is_xarray, convert2xrda, relative_round +from mlair.helpers.helpers import is_xarray, convert2xrda, relative_round, get_order class TestToList: @@ -191,6 +191,10 @@ class TestRelativeRound: assert relative_round(0.03112, 1) == 0.03 assert relative_round(0.031126, 4) == 0.03113 + def test_relative_round_zero(self): + assert relative_round(0, 1) == 0 + assert relative_round(0, 4) == 0 + def test_relative_round_negative_numbers(self): assert relative_round(-101.2033, 5) == -101.2 assert relative_round(-106, 2) == -110 @@ -204,6 +208,66 @@ class TestRelativeRound: with pytest.raises(TypeError): relative_round(300, 1.1) + def test_relative_round_floor(self): + assert relative_round(7.5, 1, floor=True) == 7 + assert relative_round(7.7, 1, floor=True) == 7 + assert relative_round(7.9, 1, floor=True) == 7 + assert relative_round(7.993, 2, floor=True) == 7.9 + assert relative_round(17.9312, 1, floor=True) == 10 + assert relative_round(17.9312, 2, floor=True) == 17 + assert relative_round(127.43, 3, floor=True) == 127 + assert relative_round(0.025, 1, floor=True) == 0.02 + + def test_relative_round_floor_neg_numbers(self): + assert relative_round(-7.9, 1, floor=True) == -8 + assert relative_round(-7.4, 1, floor=True) == -8 + assert relative_round(-7.42, 2, floor=True) == -7.5 + assert relative_round(-127.43, 3, floor=True) == -128 + assert relative_round(-127.43, 2, floor=True) == -130 + assert relative_round(-127.43, 1, floor=True) == -200 + + def test_relative_round_ceil(self): + assert relative_round(7.5, 1, ceil=True) == 8 + assert relative_round(7.7, 1, ceil=True) == 8 + assert relative_round(7.2, 1, ceil=True) == 8 + assert relative_round(7.993, 2, ceil=True) == 8 + assert relative_round(17.9312, 1, ceil=True) == 20 + assert relative_round(17.9312, 2, ceil=True) == 18 + assert relative_round(127.43, 3, ceil=True) == 128 + + def test_relative_round_ceil_neg_numbers(self): + assert relative_round(-7.9, 1, ceil=True) == -7 + assert relative_round(-7.4, 1, ceil=True) == -7 + assert relative_round(-7.42, 2, ceil=True) == -7.4 + assert relative_round(-127.43, 3, ceil=True) == -127 + assert relative_round(-127.43, 2, ceil=True) == -120 + assert relative_round(-127.43, 1, ceil=True) == -100 + + def test_relative_round_ceil_floor(self): + with pytest.raises(AssertionError): + relative_round(300, -1, ceil=True, floor=True) + + +class TestGetOrder: + + def test_get_order(self): + assert get_order(10) == 1 + assert get_order(11) == 1 + assert get_order(9.99) == 0 + assert get_order(1000) == 3 + assert get_order(.006) == -3 + + def test_get_order_neg_orders(self): + assert get_order(.006) == -3 + assert np.isinf(get_order(0)) + assert get_order(0.00622) == -3 + assert get_order(0.00022) == -4 + + def test_get_order_neg_numbers(self): + assert get_order(-0.00622) == -3 + assert get_order(-0.1) == -1 + assert get_order(-622) == 2 + class TestSelectFromDict: diff --git a/test/test_helpers/test_statistics.py b/test/test_helpers/test_statistics.py index 6f1952a9c1df2b16a2e298b433d732d9e69200d4..5ff6f7ee526960a90755f09e5c090735e2b532bb 100644 --- a/test/test_helpers/test_statistics.py +++ b/test/test_helpers/test_statistics.py @@ -375,6 +375,7 @@ class TestCalculateErrorMetrics: x_array1 = xr.DataArray(d1, coords=coords, dims=coords.keys()) x_array2 = xr.DataArray(d2, coords=coords, dims=coords.keys()) expected = {"mse": xr.DataArray(np.array([1, 2, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]), + "me": xr.DataArray(np.array([-1./3, -2./3, 0, 0, -1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]), "rmse": np.sqrt(xr.DataArray(np.array([1, 2, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"])), "mae": xr.DataArray(np.array([1, 4./3, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]), "ioa": xr.DataArray(np.array([0.3721, 0.4255, 1, 1, 0.4706]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]), @@ -383,6 +384,7 @@ class TestCalculateErrorMetrics: assert check_nested_equality(expected, calculate_error_metrics(x_array1, x_array2, "index"), 3) is True expected = {"mse": xr.DataArray(np.array([1.2, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"]), + "me": xr.DataArray(np.array([-0.8, -0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"]), "rmse": np.sqrt(xr.DataArray(np.array([1.2, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"])), "mae": xr.DataArray(np.array([0.8, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"]), "ioa": xr.DataArray(np.array([0.8478, 0.9333, 0.9629]), coords={"index": [0, 1, 2]}, dims=["index"]), diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py index 6646e1a4795756edd1792ef91f535132e8cde61d..9debec9f04583b401698cea2f79f78b523fe7927 100644 --- a/test/test_run_modules/test_pre_processing.py +++ b/test/test_run_modules/test_pre_processing.py @@ -45,14 +45,16 @@ class TestPreProcessing: with PreProcessing(): assert caplog.record_tuples[0] == ('root', 20, 'PreProcessing started') assert caplog.record_tuples[1] == ('root', 20, 'check valid stations started (preprocessing)') - assert caplog.record_tuples[-6] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 3 ' + assert caplog.record_tuples[-7] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 3 ' r'station\(s\). Found 3/3 valid stations.')) - assert caplog.record_tuples[-5] == ('root', 20, "use serial create_info_df (train)") - assert caplog.record_tuples[-4] == ('root', 20, "use serial create_info_df (val)") - assert caplog.record_tuples[-3] == ('root', 20, "use serial create_info_df (test)") - assert caplog.record_tuples[-2] == ('root', 20, "Searching for competitors to be prepared for use.") - assert caplog.record_tuples[-1] == ('root', 20, "No preparation required for competitor ols as no specific " + assert caplog.record_tuples[-6] == ('root', 20, "use serial create_info_df (train)") + assert caplog.record_tuples[-5] == ('root', 20, "use serial create_info_df (val)") + assert caplog.record_tuples[-4] == ('root', 20, "use serial create_info_df (test)") + assert caplog.record_tuples[-3] == ('root', 20, "Searching for competitors to be prepared for use.") + assert caplog.record_tuples[-2] == ('root', 20, "No preparation required for competitor ols as no specific " "instruction is provided.") + assert caplog.record_tuples[-1] == ('root', 20, "No preparation required for competitor persi as no " + "specific instruction is provided.") RunEnvironment().__del__() def test_run(self, obj_with_exp_setup):