From 010cf989058d4fb383401f3fd53e864d617ba69f Mon Sep 17 00:00:00 2001 From: Michael <m.langguth@fz-juelich.de> Date: Thu, 29 Jun 2023 15:24:11 +0200 Subject: [PATCH] Add example runscripts for MLAir with IFS forecasts to repo incl. various comments in respective Python-file. --- ...run_german_background_stations_O3ResNet.py | 449 ++++++++++++++++++ ...run_hdfml_batch_ifs_forecast_O3ResNet.bash | 17 + .../run_ifs_forecast_O3ResNet.py | 224 +++++++++ mlair/helpers/data_sources/ifs.py | 6 +- 4 files changed, 695 insertions(+), 1 deletion(-) create mode 100644 example_runscripts_ifs/run_german_background_stations_O3ResNet.py create mode 100644 example_runscripts_ifs/run_hdfml_batch_ifs_forecast_O3ResNet.bash create mode 100644 example_runscripts_ifs/run_ifs_forecast_O3ResNet.py diff --git a/example_runscripts_ifs/run_german_background_stations_O3ResNet.py b/example_runscripts_ifs/run_german_background_stations_O3ResNet.py new file mode 100644 index 0000000..06116b6 --- /dev/null +++ b/example_runscripts_ifs/run_german_background_stations_O3ResNet.py @@ -0,0 +1,449 @@ +__author__ = "Lukas Leufen" +__date__ = '2023-05-04' +__note__ = "Use O3ResNet model from Leufen et al (2023) and run on all german background stations in any type of area (rural, suburban, urban)." + +import argparse +import sys +sys.path.append("/p/home/jusers/leufen1/juwels/intelliaq/mlair_tf2/mlair") +import logging + +from mlair.workflows import DefaultWorkflow +from mlair.helpers import remove_items, to_list +from mlair.helpers.helpers import str2bool +from mlair.configuration.defaults import DEFAULT_PLOT_LIST, DEFAULT_TRAIN_END +from mlair.model_modules.branched_input_networks import BranchedInputResNet as NN +import os +from mlair.data_handler.data_handler_mixed_sampling import DataHandlerMixedSamplingWithClimateAndFirFilter + + +def load_stations(station_type_of_area=None): + import json + try: + filename = '/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/supplement/station_lists/germany_background_all_area_type.json' + with open(filename, 'r') as jfile: + stations_raw = json.load(jfile) + stations = [] + if station_type_of_area is None: + station_type_of_area = list(stations_raw.keys()) + station_type_of_area = to_list(station_type_of_area) + for type_of_area in station_type_of_area: + stations.extend(stations_raw[type_of_area]) + except FileNotFoundError: + stations = None + return stations + + + + +def load_layer_configuration(config_name="resnet_v08", activation=None, dropout=None): + config_dict = {"resnet_v01": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, + {"type": "Dense", "units": 128, "activation": "relu"}, ], + "resnet_v02": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, + {"type": "Dense", "units": 128, "activation": "relu"}, ], + "resnet_v03": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, + {"type": "Dense", "units": 128, "activation": "relu"}, ], + "resnet_v04": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (9, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, + {"type": "Dense", "units": 128, "activation": "relu"},], + "resnet_v05": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, + {"type": "Dense", "units": 128, "activation": "relu"}, ], + "resnet_v06": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same", "strides": (2, 1)}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same", "strides": (2, 1)}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, + {"type": "Dense", "units": 128, "activation": "relu"}, ], + "resnet_v07": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, ], + "resnet_v08": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, ], + "resnet_v09": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"},], + "resnet_v10": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (9, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 128, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"},], + "resnet_v11": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same"}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, ], + "resnet_v12": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same", "strides": (2, 1)}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same", "strides": (2, 1)}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"},], + "resnet_v13": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 16, "padding": "same", "strides": (2, 1)}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"},], + "resnet_v14": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 16, "padding": "same", "strides": (2, 1)}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"},], + "resnet_v15": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 16, "padding": "same", "strides": (2, 1)}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"},], + "resnet_v16": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 16, "padding": "same", "strides": (2, 1)}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 16, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, + {"type": "Dense", "units": 128, "activation": "relu"},] + } + + if config_name not in config_dict.keys(): + try: + return [int(item) for item in config_name.split(',')] + except: + return config_name + else: + config = config_dict[config_name] + if activation is not None or dropout is not None: + for c in config: + if "activation" in c.keys() and activation is not None: + c.update({"activation": activation}) + if "rate" in c.keys() and dropout is not None: + c.update({"rate": dropout}) + print(config) + return config + + +STATS = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', + 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values', + 'pblheight': 'maximum', 'press': 'average_values'} + + +DATA_ORIGIN = {"no": ["UBA", "EEA", "AIRBASE", "EMEP", "GAW"], "no2": ["UBA", "EEA", "AIRBASE", "EMEP", "GAW"], "o3": ["UBA", "EEA", "AIRBASE", "EMEP", "GAW"], + "cloudcover": "era5", "pblheight": "era5", "relhum": "era5", + "temp": "era5", "u": "era5", "v": "era5", "press": "era5"} + + +def main(parser_args): + + parser_dict = parser_args.__dict__ + parser_dict.update({'lr': 0.0003, 'decay': 0.0, 'batch_size': 1024, 'layer_configuration': 'resnet_v06', 'activation': 'prelu', 'activation_output': 'linear', 'kernel_regularizer': 'l1_l2', 'l2': 0.12, 'l1': 0.095, 'dropout': 0.59, 'batch_normalization': False}) + layer_configuration = parser_dict.pop("layer_configuration", None) + dropout = parser_dict.pop("dropout", None) + activation = parser_dict.pop("activation", None) + + args = dict( + sampling=("hourly", "daily"), + stations=load_stations(["rural", "suburban", "urban"]), + variables=["o3", "no", "no2", "cloudcover", "pblheight", "relhum", "temp", "u", "v", "press"], + statistics_per_var=STATS, + + start="2000-01-01", + end="2021-12-31", + train_start="2000-01-01", + train_end="2015-12-31", + val_start="2016-01-01", + val_end="2018-12-31", + test_start="2019-03-01", + test_end="2022-12-31", + + data_origin=DATA_ORIGIN, + data_handler=DataHandlerMixedSamplingWithClimateAndFirFilter, + filter_cutoff_period=[21], + filter_order=[42], + filter_window_type=("kaiser", 5), + filter_add_unfiltered=False, + apriori_sel_opts=slice("2015-12-31"), + apriori_type="residuum_stats", + apriori_diurnal=True, + use_filter_branches=True, + + # only past obs and input + # window_history_size=3 * 24 - 1, + # window_history_offset={"chem": 24, "meteo": 24}, + # window_history_end={"chem": 0, "meteo": 0}, + # extend_length_opts={"chem": 0, "meteo": 0}, # no forecast for meteo (similar to MBFCN) + # extend_length_opts = {"chem": 0}, # inf forecast for meteo + + # future obs and input for meteo + window_history_size={"chem":3 * 24 - 1, "meteo": 3 * 24 - 1 + 4 * 24}, + window_history_offset={"chem": 24, "meteo": 24}, + window_history_end={"chem": 0, "meteo": 4 * 24}, + extend_length_opts = {"chem": 0}, # inf forecast for meteo + + # extend_length_opts={"chem": 0, "meteo": 24}, # 1d forecast for meteo + # extend_length_opts={"chem": 0, "meteo": 3*24}, # 3d forecast for meteo + # extend_length_opts={"chem": 0, "meteo": 4*24}, # 4d forecast for meteo + # extend_length_opts={"chem": 0, "meteo": 7*24}, # 7d meteo forecast + # extend_length_opts={"chem": 0, "meteo": 14*24}, # 14d meteo forecast + + window_lead_time=4, # T1D + target_var="o3", # T1D + interpolation_limit=(24, 2), # T1F + transformation={ + "o3": {"method": "standardise"}, + "no": {"method": "standardise"}, + "no2": {"method": "standardise"}, + "cloudcover": {"method": "min_max", "feature_range": [-1, 1]}, + "pblheight": {"method": "standardise"}, + "press": {"method": "standardise"}, + "relhum": {"method": "min_max", "feature_range": [-1, 1]}, + "temp": {"method": "standardise"}, + "u": {"method": "standardise"}, + "v": {"method": "standardise"}, }, + # transformation_file=os.path.join("/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/MB-FCN-LT_ST-c+0d_m+inf_past_network_daily", "data", "transformation", "transformation.pickle"), + # calculate_fresh_transformation=False, + + # train_model=True, create_new_model=True, + train_model=False, create_new_model=False, + epochs=100, + model=NN, + model_display_name="O3ResNet", + layer_configuration = load_layer_configuration(layer_configuration, activation, dropout), + evaluate_feature_importance=False, + feature_importance_bootstrap_type=["singleinput", "branch", "variable"], + feature_importance_bootstrap_method=["zero_mean"], + feature_importance_create_new_bootstraps=True, + feature_importance_n_boots=100, + plot_list = ["PlotMonthlySummary", "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotFeatureImportanceSkillScore", "PlotConditionalQuantiles", "PlotSampleUncertaintyFromBootstrap", "PlotTimeEvolutionMetric", "PlotSeasonalMSEStack", "PlotErrorMetrics", "PlotErrorsOnMap", "PlotDataMonthlyDistribution"], + # competitors=["MBFCN-cEU-m+0d_past", "MBFCN-cEU-m+inf_fut", "MBCNN-cEU-m+inf_fut", "MBRNN-cEU-m+inf_fut", "MBUNet-cEU-m+inf_fut"], + # competitors=["MBFCN-cEU-m+inf_fut", "MBCNN-cEU-m+inf_fut", "MBRNN-cEU-m+inf_fut", "MBUNet-cEU-m+inf_fut"], + competitors=["CAMS"], + cams_interp_method=["linear", "nearest"], + cams_data_path="/p/project/deepacf/intelliaq/leufen1/DATA/CAMS_dma8eu", + data_path="/p/project/deepacf/intelliaq/leufen1/DATA/GERMANY/", + overwrite_local_data=False, + overwrite_lazy_data=False, + lazy_preprocessing=True, + use_multiprocessing=True, + max_number_multiprocessing=16, + + # snapshot_load_path="/p/project/deepacf/intelliaq/leufen1/MLAir_snapshots/EU_c+0d_m+inf_fut/snapshot_preprocessing_jay.pickle", ## test start 2019-01-01 + # snapshot_load_path="/p/project/deepacf/intelliaq/leufen1/MLAir_snapshots/EU_c+0d_m+inf_fut/snapshot_preprocessing_rooster.pickle", # test start 2019-03-01 + + # create_snapshot=True, + # snapshot_path="/p/project/deepacf/intelliaq/leufen1/MLAir_snapshots/EU_c+0d_m+inf_fut", + experiment_path=os.path.abspath(os.getcwd()), + **parser_dict + ) + print(parser_args.__dict__) + workflow = DefaultWorkflow(**args, start_script=__file__) + workflow.run() + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default="testrun", + help="set experiment date as string") + parser.add_argument('--lr', default=argparse.SUPPRESS, type=float) + parser.add_argument('--decay', default=argparse.SUPPRESS, type=float) + parser.add_argument('--n_layer', default=argparse.SUPPRESS, type=int) + parser.add_argument('--n_hidden', default=argparse.SUPPRESS, type=int) + parser.add_argument('--activation', default=argparse.SUPPRESS, type=str) + parser.add_argument('--activation_output', default=argparse.SUPPRESS, type=str) + parser.add_argument('--batch_size', default=argparse.SUPPRESS, type=int) + parser.add_argument('--regularizer', default=argparse.SUPPRESS, type=str) + parser.add_argument('--l1', default=argparse.SUPPRESS, type=float) + parser.add_argument('--l2', default=argparse.SUPPRESS, type=float) + parser.add_argument('--dropout', default=argparse.SUPPRESS, type=float) + parser.add_argument('--dropout_rnn', default=argparse.SUPPRESS, type=float) + parser.add_argument('--layer_configuration', default=argparse.SUPPRESS, type=lambda s: [int(item) for item in s.split(',')]) + parser.add_argument('--kernel_size', default=argparse.SUPPRESS, type=int) + parser.add_argument('--batch_normalization', default=argparse.SUPPRESS, type=lambda s: str2bool(s)) + parser.add_argument('--exponent_last_layer', default=argparse.SUPPRESS, type=int) + parser.add_argument('--loss_type', default=argparse.SUPPRESS, type=str) + parser.add_argument('--loss_weights', default=argparse.SUPPRESS, type=lambda s: [int(item) for item in s.split(',')]) + + args = parser.parse_args() + main(args) diff --git a/example_runscripts_ifs/run_hdfml_batch_ifs_forecast_O3ResNet.bash b/example_runscripts_ifs/run_hdfml_batch_ifs_forecast_O3ResNet.bash new file mode 100644 index 0000000..ccf1b7a --- /dev/null +++ b/example_runscripts_ifs/run_hdfml_batch_ifs_forecast_O3ResNet.bash @@ -0,0 +1,17 @@ +#!/bin/bash -x +#SBATCH --account=deepacf +#SBATCH --nodes=1 +#SBATCH --output=HPC_logging/mlt-out.%j +#SBATCH --error=HPC_logging/mlt-err.%j +#SBATCH --time=23:59:00 +#SBATCH --gres=gpu:4 +#SBATCH --mail-type=ALL +#SBATCH --mail-user=l.leufen@fz-juelich.de + +source /p/home/jusers/leufen1/hdfml/intelliaq/mlair_tf2/mlair/HPC_setup/mlt_modules_hdfml.sh +source /p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/bin/activate + +export PYTHONPATH=/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tf2/mlair:${PYTHONPATH} + +RUNFILE="run_ifs_forecast_O3ResNet.py" +srun --cpu-bind=none python $RUNFILE --experiment_date="O3ResNet_ifs_forecast_no_train" diff --git a/example_runscripts_ifs/run_ifs_forecast_O3ResNet.py b/example_runscripts_ifs/run_ifs_forecast_O3ResNet.py new file mode 100644 index 0000000..9fded3c --- /dev/null +++ b/example_runscripts_ifs/run_ifs_forecast_O3ResNet.py @@ -0,0 +1,224 @@ +__author__ = "Lukas Leufen" +__date__ = '2023-05-04' +__note__ = "Use O3ResNet model from Leufen et al (2023) and apply model to IFS forecast data (only 24h forecast)." + +import argparse +import sys +sys.path.append("/p/home/jusers/leufen1/juwels/intelliaq/mlair_tf2/mlair") +import logging + +from mlair.workflows import DefaultWorkflow +from mlair.helpers import remove_items, to_list +from mlair.helpers.helpers import str2bool +from mlair.configuration.defaults import DEFAULT_PLOT_LIST, DEFAULT_TRAIN_END +from mlair.model_modules.branched_input_networks import BranchedInputResNet as NN +import os +from mlair.data_handler.data_handler_mixed_sampling import DataHandlerIFS + + +def load_stations(station_type_of_area=None): + import json + try: + # ML: Loads a list of background stations from JSON file (to avoid parsing of long station lists) + filename = '/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/supplement/station_lists/central_eu_background_stations_reduced.json' + with open(filename, 'r') as jfile: + stations_raw = json.load(jfile) + stations = [] + if station_type_of_area is None: + station_type_of_area = list(stations_raw.keys()) + station_type_of_area = to_list(station_type_of_area) + for type_of_area in station_type_of_area: + stations.extend(stations_raw[type_of_area]) + except FileNotFoundError: + stations = None + return stations + + + + +def load_layer_configuration(config_name="resnet_v08", activation=None, dropout=None): + # ML: resnet_v06 is the model architecture that was identified as best suited in Lukas' study + config_dict = {"resnet_v06": [ + {"type": "Conv2D", "activation": "relu", "kernel_size": (7, 1), "filters": 32, "padding": "same", "strides": (2, 1)}, + {"type": "Conv2D", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "padding": "same", "strides": (2, 1)}, + {"type": "MaxPooling2D", "pool_size": (2, 1), "strides": (2, 1)}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 32, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2", "use_1x1conv": True}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "residual_block", "activation": "relu", "kernel_size": (3, 1), "filters": 64, "strides": (1, 1), "kernel_regularizer": "l2"}, + {"type": "Dropout", "rate": 0.25}, + {"type": "Flatten"}, + {"type": "Concatenate"}, + {"type": "Dense", "units": 128, "activation": "relu"}, ], + } + + if config_name not in config_dict.keys(): + try: + return [int(item) for item in config_name.split(',')] + except: + return config_name + else: + config = config_dict[config_name] + if activation is not None or dropout is not None: + for c in config: + if "activation" in c.keys() and activation is not None: + c.update({"activation": activation}) + if "rate" in c.keys() and dropout is not None: + c.update({"rate": dropout}) + print(config) + return config + + +STATS = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', + 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values', + 'pblheight': 'maximum', 'press': 'average_values'} + + +# ML: Here, we specify that the meteorological parameters are obtained from IFS forecasts. +# The chemical quantities are colledted from different observation networks, e.g. UBA, EEA, ... +# This should also match with the variables-values of the args-dictionary defined below (cf. l. 102) +DATA_ORIGIN = {"no": ["UBA", "EEA", "AIRBASE", "EMEP", "GAW"], "no2": ["UBA", "EEA", "AIRBASE", "EMEP", "GAW"], "o3": ["UBA", "EEA", "AIRBASE", "EMEP", "GAW"], + "cloudcover": "ifs", "pblheight": "ifs", "relhum": "ifs", + "temp": "ifs", "u": "ifs", "v": "ifs", "press": "ifs"} + + +def main(parser_args): + + parser_dict = parser_args.__dict__ + # ML: hard-coded configuration to enfore resnet_v06 (best suited mode, see above) + parser_dict.update({'lr': 0.0003, 'decay': 0.0, 'batch_size': 1024, 'layer_configuration': 'resnet_v06', 'activation': 'prelu', 'activation_output': 'linear', 'kernel_regularizer': 'l1_l2', 'l2': 0.12, 'l1': 0.095, 'dropout': 0.59, 'batch_normalization': False}) + layer_configuration = parser_dict.pop("layer_configuration", None) + dropout = parser_dict.pop("dropout", None) + activation = parser_dict.pop("activation", None) + + args = dict( + sampling=("hourly", "daily"), + # stations=load_stations(["rural", "suburban", "urban"]), + stations=load_stations(["rural", "suburban"]), + variables=["o3", "no", "no2", "cloudcover", "pblheight", "relhum", "temp", "u", "v", "press"], + statistics_per_var=STATS, + + # ML: definition of time window to obtain data (to be adapted when actual set of available IFS forecasts is known) + start="2000-01-01", + end="2021-12-31", + train_start="2000-01-01", + train_end="2015-12-31", + val_start="2016-01-01", + val_end="2018-12-31", + test_start="2019-03-01", + test_end="2023-12-31", + # ML: Used to be 90 to ensure that the time series are suifficntly long to allow training + train_min_length=0, + val_min_length=0, + test_min_length=0, + + data_origin=DATA_ORIGIN, + # ML: Sets IFS data handler + data_handler=DataHandlerIFS, + # ML: filter configurations (to be kept as they are) + filter_cutoff_period=[21], + filter_order=[42], + filter_window_type=("kaiser", 5), + filter_add_unfiltered=False, + # apriori_sel_opts=slice("2015-12-31"), + apriori_type="residuum_stats", + apriori_diurnal=True, + use_filter_branches=True, + # apriori_file=os.path.join("/p/project/deepacf/intelliaq/leufen1/demystify-temporal-components/experiment_IFS_forecast_O3ResNet", "apriori", "apriori.pickle"), + + + # future obs and input for meteo + window_history_size={"chem":3 * 24 - 1, "meteo": 3 * 24 - 1 + 4 * 24}, # ML: total length of the respective windows (number of time steps) + window_history_offset={"chem": 25, "meteo": 25}, # ML: t0 is offset by one day, set to 12/13 to include 12 UTC-run + window_history_end={"chem": 0, "meteo": 4 * 24}, # ML: When do the windows defined above end with respect to t0 (-> meteo ends at four day lead time, chem ends at 0) + extend_length_opts={"chem": 0, "meteo": 4 * 24}, # 24h forecast for meteo # ML: four day forecast is used + # ML: (e.g. if set to 2 days, 2 days of climate data would be used given + # ML: that window_history_end is still {... "meteo": 4*24}) + + # extend_length_opts={"chem": 0, "meteo": 24}, # 1d forecast for meteo + # extend_length_opts={"chem": 0, "meteo": 3*24}, # 3d forecast for meteo + # extend_length_opts={"chem": 0, "meteo": 4*24}, # 4d forecast for meteo + # extend_length_opts={"chem": 0, "meteo": 7*24}, # 7d meteo forecast + # extend_length_opts={"chem": 0, "meteo": 14*24}, # 14d meteo forecast + + window_lead_time=4, # T1D # ML: Four day lead time + target_var="o3", # T1D # ML: ozone as target variable + interpolation_limit=(24, 2), # T1F + transformation={ + "o3": {"method": "standardise"}, + "no": {"method": "standardise"}, + "no2": {"method": "standardise"}, + "cloudcover": {"method": "min_max", "feature_range": [-1, 1]}, + "pblheight": {"method": "standardise"}, + "press": {"method": "standardise"}, + "relhum": {"method": "min_max", "feature_range": [-1, 1]}, + "temp": {"method": "standardise"}, + "u": {"method": "standardise"}, + "v": {"method": "standardise"}, }, + transformation_file=os.path.join("/p/project/deepacf/intelliaq/leufen1/demystify-temporal-components/experiment_IFS_forecast_O3ResNet", "transformation", "transformation.pickle"), + calculate_fresh_transformation=False, + + # ML: To re-start training (e.g. finetuning of trained network with IFS forecasts), make use of the following combination: + # train_model=True, create_new_model=False, # this triggers loading of an existing model and continues training + # ML: Ensure that the checkpoint-directory is empty, whereas the pretrained model must be provided via the model_path-key + + ## train_model=True, create_new_model=True, + train_model=False, create_new_model=False, # Used + epochs=100, + model=NN, + model_display_name="O3ResNet", # ML: internally used model name + layer_configuration = load_layer_configuration(layer_configuration, activation, dropout), + evaluate_feature_importance=False, + competitors=["CAMS"], # ML: defines competitor model (empty list would put linear regression as competitor) + cams_interp_method=["linear", "nearest"], + cams_data_path="/p/project/deepacf/intelliaq/leufen1/DATA/CAMS_dma8eu", # ML: to be adapted if relevant (cf. previous comment) + data_path="/p/project/deepacf/intelliaq/leufen1/DATA/IFS_EXPERIMENT/", # ML: data path to IFS forecast -> to be adapted + overwrite_local_data=False, + overwrite_lazy_data=False, + lazy_preprocessing=True, # ML: re-uses downloaded data (preprocessing time: few hours) + use_multiprocessing=True, + max_number_multiprocessing=10, + # ML: only use snapshot if you are absolutely sure that the snapshotted state of MLAir after preprocessing is proper (preprocessing time: few seconds) + # log_level_stream=logging.DEBUG, + + # snapshot_load_path="/p/project/deepacf/intelliaq/leufen1/MLAir_snapshots/EU_c+0d_m+inf_fut/snapshot_preprocessing_jay.pickle", ## test start 2019-01-01 + # snapshot_load_path="/p/project/deepacf/intelliaq/leufen1/MLAir_snapshots/EU_c+0d_m+inf_fut/snapshot_preprocessing_rooster.pickle", # test start 2019-03-01 + + # create_snapshot=True, + # snapshot_path="/p/project/deepacf/intelliaq/leufen1/MLAir_snapshots/EU_c+0d_m+inf_fut", + experiment_path=os.path.abspath(os.getcwd()), + **parser_dict + ) + print(parser_args.__dict__) + workflow = DefaultWorkflow(**args, start_script=__file__) + workflow.run() + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default="testrun", + help="set experiment date as string") + parser.add_argument('--lr', default=argparse.SUPPRESS, type=float) + parser.add_argument('--decay', default=argparse.SUPPRESS, type=float) + parser.add_argument('--n_layer', default=argparse.SUPPRESS, type=int) + parser.add_argument('--n_hidden', default=argparse.SUPPRESS, type=int) + parser.add_argument('--activation', default=argparse.SUPPRESS, type=str) + parser.add_argument('--activation_output', default=argparse.SUPPRESS, type=str) + parser.add_argument('--batch_size', default=argparse.SUPPRESS, type=int) + parser.add_argument('--regularizer', default=argparse.SUPPRESS, type=str) + parser.add_argument('--l1', default=argparse.SUPPRESS, type=float) + parser.add_argument('--l2', default=argparse.SUPPRESS, type=float) + parser.add_argument('--dropout', default=argparse.SUPPRESS, type=float) + parser.add_argument('--dropout_rnn', default=argparse.SUPPRESS, type=float) + parser.add_argument('--layer_configuration', default=argparse.SUPPRESS, type=lambda s: [int(item) for item in s.split(',')]) + parser.add_argument('--kernel_size', default=argparse.SUPPRESS, type=int) + parser.add_argument('--batch_normalization', default=argparse.SUPPRESS, type=lambda s: str2bool(s)) + parser.add_argument('--exponent_last_layer', default=argparse.SUPPRESS, type=int) + parser.add_argument('--loss_type', default=argparse.SUPPRESS, type=str) + parser.add_argument('--loss_weights', default=argparse.SUPPRESS, type=lambda s: [int(item) for item in s.split(',')]) + + args = parser.parse_args() + main(args) diff --git a/mlair/helpers/data_sources/ifs.py b/mlair/helpers/data_sources/ifs.py index eba40dc..99b6664 100644 --- a/mlair/helpers/data_sources/ifs.py +++ b/mlair/helpers/data_sources/ifs.py @@ -1,5 +1,5 @@ """Methods to load ifs data.""" -__author__ = "Lukas Leufen, Michael Langgut" +__author__ = "Lukas Leufen, Michael Langguth" __date__ = "2023-06-07" import logging @@ -113,6 +113,10 @@ def _emulate_meta_data(variables): def _rename_ifs_variables(ifs_names): + """ + This is used to match the IFS variable names with the variable names used in the TOAR database. + :param ifs_names: list of IFS variables to be matched + """ mapper = {"sp": "press", "u10": "u", "v10": "v", "t2m": "temp", "d2m": "dew", "blh": "pblheight", "tcc": "cloudcover", "rhw": "relhum"} ifs_names = list(ifs_names) -- GitLab