diff --git a/src/configuration/defaults.py b/src/configuration/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..7ce96cfce515e7f32d98444e6a9542c9fbd7b4f4 --- /dev/null +++ b/src/configuration/defaults.py @@ -0,0 +1,64 @@ +__author__ = "Lukas Leufen" +__date__ = '2020-06-25' + + +DEFAULT_STATIONS = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBY052', 'DEBY032', 'DEBW022', 'DEBY004', + 'DEBY020', 'DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW073', 'DEBY039', 'DEBW038', 'DEBW081', + 'DEBY075', 'DEBW040', 'DEBY053', 'DEBW059', 'DEBW027', 'DEBY072', 'DEBW042', 'DEBW039', 'DEBY001', + 'DEBY113', 'DEBY089', 'DEBW024', 'DEBW004', 'DEBY037', 'DEBW056', 'DEBW029', 'DEBY068', 'DEBW010', + 'DEBW026', 'DEBY002', 'DEBY079', 'DEBW084', 'DEBY049', 'DEBY031', 'DEBW019', 'DEBW001', 'DEBY063', + 'DEBY005', 'DEBW046', 'DEBW103', 'DEBW052', 'DEBW034', 'DEBY088', ] +DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', + 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values', + 'pblheight': 'maximum'} +DEFAULT_NETWORK = "AIRBASE" +DEFAULT_STATION_TYPE = "background" +DEFAULT_VARIABLES = DEFAULT_VAR_ALL_DICT.keys() +DEFAULT_START = "1997-01-01" +DEFAULT_END = "2017-12-31" +DEFAULT_WINDOW_HISTORY_SIZE = 13 +DEFAULT_OVERWRITE_LOCAL_DATA = False +DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise", "mean": "estimate"} +DEFAULT_HPC_LOGIN_LIST = ["ju", "hdfmll"] # ju[wels} #hdfmll(ogin) +DEFAULT_HPC_HOST_LIST = ["jw", "hdfmlc"] # first part of node names for Juwels (jw[comp], hdfmlc(ompute). +DEFAULT_CREATE_NEW_MODEL = True +DEFAULT_TRAINABLE = True +DEFAULT_FRACTION_OF_TRAINING = 0.8 +DEFAULT_EXTREME_VALUES = None +DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY = False +DEFAULT_PERMUTE_DATA = False +DEFAULT_BATCH_SIZE = int(256 * 2) +DEFAULT_EPOCHS = 20 +DEFAULT_TARGET_VAR = "o3" +DEFAULT_TARGET_DIM = "variables" +DEFAULT_WINDOW_LEAD_TIME = 3 +DEFAULT_DIMENSIONS = {"new_index": ["datetime", "Stations"]} +DEFAULT_INTERPOLATE_DIM = "datetime" +DEFAULT_INTERPOLATE_METHOD = "linear" +DEFAULT_LIMIT_NAN_FILL = 1 +DEFAULT_TRAIN_START = "1997-01-01" +DEFAULT_TRAIN_END = "2007-12-31" +DEFAULT_TRAIN_MIN_LENGTH = 90 +DEFAULT_VAL_START = "2008-01-01" +DEFAULT_VAL_END = "2009-12-31" +DEFAULT_VAL_MIN_LENGTH = 90 +DEFAULT_TEST_START = "2010-01-01" +DEFAULT_TEST_END = "2017-12-31" +DEFAULT_TEST_MIN_LENGTH = 90 +DEFAULT_TRAIN_VAL_MIN_LENGTH = 180 +DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS = True +DEFAULT_EVALUATE_BOOTSTRAPS = True +DEFAULT_CREATE_NEW_BOOTSTRAPS = False +DEFAULT_NUMBER_OF_BOOTSTRAPS = 20 +DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries", + "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles", + "PlotAvailability"] + + +def get_defaults(): + """Return all default parameters set in defaults.py""" + return {key: value for key, value in globals().items() if key.startswith('DEFAULT')} + + +if __name__ == "__main__": + print(get_defaults()) \ No newline at end of file diff --git a/src/run.py b/src/run.py index 11029817a978b872d0f99954a50ab5f5b93aa012..cd97217efd74b81bcaf79b4b2351c4c063efcbf0 100644 --- a/src/run.py +++ b/src/run.py @@ -4,33 +4,33 @@ import argparse import inspect -def run(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], - station_type='background', - trainable=False, create_new_model=True, - window_history_size=6, +def run(stations=None, + station_type=None, + trainable=None, create_new_model=None, + window_history_size=None, experiment_date="testrun", network=None, variables=None, statistics_per_var=None, start=None, end=None, - target_var="o3", target_dim=None, + target_var=None, target_dim=None, window_lead_time=None, dimensions=None, interpolate_method=None, interpolate_dim=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, - use_all_stations_on_all_data_sets=True, fraction_of_train=None, + use_all_stations_on_all_data_sets=None, fraction_of_train=None, experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None, - sampling="daily", - permute_data_on_training=False, extreme_values=None, extremes_on_right_tail_only=None, + sampling=None, + permute_data_on_training=None, extreme_values=None, extremes_on_right_tail_only=None, transformation=None, train_min_length=None, val_min_length=None, test_min_length=None, - evaluate_bootstraps=True, number_of_bootstraps=None, create_new_bootstraps=False, + evaluate_bootstraps=None, number_of_bootstraps=None, create_new_bootstraps=None, plot_list=None, model=None, batch_size=None, epochs=None): params = inspect.getfullargspec(ExperimentSetup).args - kwargs = {k: v for k, v in locals().items() if k in params} + kwargs = {k: v for k, v in locals().items() if k in params and v is not None} parser = argparse.ArgumentParser() parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default="testrun", diff --git a/src/run_modules/experiment_setup.py b/src/run_modules/experiment_setup.py index ff6fec842714d599696b8726e9d25aa22e55583f..e89926a67ea4d63847fdc3eab71d56c45a99f6b3 100644 --- a/src/run_modules/experiment_setup.py +++ b/src/run_modules/experiment_setup.py @@ -8,25 +8,19 @@ from typing import Union, Dict, Any, List from src.configuration import path_config from src import helpers +from src.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_NETWORK, DEFAULT_STATION_TYPE, \ + DEFAULT_START, DEFAULT_END, DEFAULT_WINDOW_HISTORY_SIZE, DEFAULT_OVERWRITE_LOCAL_DATA, DEFAULT_TRANSFORMATION, \ + DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \ + DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \ + DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \ + DEFAULT_DIMENSIONS, DEFAULT_INTERPOLATE_DIM, DEFAULT_INTERPOLATE_METHOD, DEFAULT_LIMIT_NAN_FILL, \ + DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \ + DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \ + DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \ + DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST from src.run_modules.run_environment import RunEnvironment from src.model_modules.model_class import MyLittleModel as VanillaModel -DEFAULT_STATIONS = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBY052', 'DEBY032', 'DEBW022', 'DEBY004', - 'DEBY020', 'DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW073', 'DEBY039', 'DEBW038', 'DEBW081', - 'DEBY075', 'DEBW040', 'DEBY053', 'DEBW059', 'DEBW027', 'DEBY072', 'DEBW042', 'DEBW039', 'DEBY001', - 'DEBY113', 'DEBY089', 'DEBW024', 'DEBW004', 'DEBY037', 'DEBW056', 'DEBW029', 'DEBY068', 'DEBW010', - 'DEBW026', 'DEBY002', 'DEBY079', 'DEBW084', 'DEBY049', 'DEBY031', 'DEBW019', 'DEBW001', 'DEBY063', - 'DEBY005', 'DEBW046', 'DEBW103', 'DEBW052', 'DEBW034', 'DEBY088', ] -DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', - 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values', - 'pblheight': 'maximum'} -DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise", "mean": "estimate"} -DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries", - "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles", - "PlotAvailability"] -DEFAULT_HPC_LOGIN_LIST = ["ju", "hdfmll"] # ju[wels} #hdfmll(ogin) -DEFAULT_HPC_HOST_LIST = ["jw", "hdfmlc"] # first part of node names for Juwels (jw[comp], hdfmlc(ompute). - class ExperimentSetup(RunEnvironment): """ @@ -228,11 +222,11 @@ class ExperimentSetup(RunEnvironment): interpolate_dim=None, interpolate_method=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, - test_end=None, use_all_stations_on_all_data_sets=True, trainable: bool = None, fraction_of_train: float = None, - experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data: bool = None, sampling: str = "daily", - create_new_model: bool = None, bootstrap_path=None, permute_data_on_training: bool = None, transformation=None, + test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None, + experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data = None, sampling: str = "daily", + create_new_model = None, bootstrap_path=None, permute_data_on_training = None, transformation=None, train_min_length=None, val_min_length=None, test_min_length=None, extreme_values: list = None, - extremes_on_right_tail_only: bool = None, evaluate_bootstraps=True, plot_list=None, number_of_bootstraps=None, + extremes_on_right_tail_only: bool = None, evaluate_bootstraps=None, plot_list=None, number_of_bootstraps=None, create_new_bootstraps=None, data_path: str = None, login_nodes=None, hpc_hosts=None, model=None, batch_size=None, epochs=None): @@ -244,22 +238,23 @@ class ExperimentSetup(RunEnvironment): self._set_param("hostname", path_config.get_host()) self._set_param("hpc_hosts", hpc_hosts, default=DEFAULT_HPC_HOST_LIST + DEFAULT_HPC_LOGIN_LIST) self._set_param("login_nodes", login_nodes, default=DEFAULT_HPC_LOGIN_LIST) - self._set_param("create_new_model", create_new_model, default=True) + self._set_param("create_new_model", create_new_model, default=DEFAULT_CREATE_NEW_MODEL) if self.data_store.get("create_new_model"): trainable = True data_path = self.data_store.get("data_path") bootstrap_path = path_config.set_bootstrap_path(bootstrap_path, data_path, sampling) self._set_param("bootstrap_path", bootstrap_path) - self._set_param("trainable", trainable, default=True) - self._set_param("fraction_of_training", fraction_of_train, default=0.8) - self._set_param("extreme_values", extreme_values, default=None, scope="train") - self._set_param("extremes_on_right_tail_only", extremes_on_right_tail_only, default=False, scope="train") + self._set_param("trainable", trainable, default=DEFAULT_TRAINABLE) + self._set_param("fraction_of_training", fraction_of_train, default=DEFAULT_FRACTION_OF_TRAINING) + self._set_param("extreme_values", extreme_values, default=DEFAULT_EXTREME_VALUES, scope="train") + self._set_param("extremes_on_right_tail_only", extremes_on_right_tail_only, + default=DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, scope="train") self._set_param("upsampling", extreme_values is not None, scope="train") upsampling = self.data_store.get("upsampling", "train") - permute_data = False if permute_data_on_training is None else permute_data_on_training + permute_data = DEFAULT_PERMUTE_DATA if permute_data_on_training is None else permute_data_on_training self._set_param("permute_data", permute_data or upsampling, scope="train") - self._set_param("batch_size", batch_size, default=int(256 * 2)) - self._set_param("epochs", epochs, default=20) + self._set_param("batch_size", batch_size, default=DEFAULT_BATCH_SIZE) + self._set_param("epochs", epochs, default=DEFAULT_EPOCHS) # set experiment name exp_date = self._get_parser_args(parser_args).get("experiment_date") @@ -290,58 +285,63 @@ class ExperimentSetup(RunEnvironment): # setup for data self._set_param("stations", stations, default=DEFAULT_STATIONS) - self._set_param("network", network, default="AIRBASE") - self._set_param("station_type", station_type, default=None) + self._set_param("network", network, default=DEFAULT_NETWORK) + self._set_param("station_type", station_type, default=DEFAULT_STATION_TYPE) self._set_param("statistics_per_var", statistics_per_var, default=DEFAULT_VAR_ALL_DICT) self._set_param("variables", variables, default=list(self.data_store.get("statistics_per_var").keys())) - self._set_param("start", start, default="1997-01-01") - self._set_param("end", end, default="2017-12-31") - self._set_param("window_history_size", window_history_size, default=13) - self._set_param("overwrite_local_data", overwrite_local_data, default=False, scope="preprocessing") + self._set_param("start", start, default=DEFAULT_START) + self._set_param("end", end, default=DEFAULT_END) + self._set_param("window_history_size", window_history_size, default=DEFAULT_WINDOW_HISTORY_SIZE) + self._set_param("overwrite_local_data", overwrite_local_data, default=DEFAULT_OVERWRITE_LOCAL_DATA, + scope="preprocessing") self._set_param("sampling", sampling) self._set_param("transformation", transformation, default=DEFAULT_TRANSFORMATION) self._set_param("transformation", None, scope="preprocessing") # target - self._set_param("target_var", target_var, default="o3") - self._set_param("target_dim", target_dim, default='variables') - self._set_param("window_lead_time", window_lead_time, default=3) + self._set_param("target_var", target_var, default=DEFAULT_TARGET_VAR) + self._set_param("target_dim", target_dim, default=DEFAULT_TARGET_DIM) + self._set_param("window_lead_time", window_lead_time, default=DEFAULT_WINDOW_LEAD_TIME) # interpolation - self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']}) - self._set_param("interpolate_dim", interpolate_dim, default='datetime') - self._set_param("interpolate_method", interpolate_method, default='linear') - self._set_param("limit_nan_fill", limit_nan_fill, default=1) + self._set_param("dimensions", dimensions, default=DEFAULT_DIMENSIONS) + self._set_param("interpolate_dim", interpolate_dim, default=DEFAULT_INTERPOLATE_DIM) + self._set_param("interpolate_method", interpolate_method, default=DEFAULT_INTERPOLATE_METHOD) + self._set_param("limit_nan_fill", limit_nan_fill, default=DEFAULT_LIMIT_NAN_FILL) # train set parameters - self._set_param("start", train_start, default="1997-01-01", scope="train") - self._set_param("end", train_end, default="2007-12-31", scope="train") - self._set_param("min_length", train_min_length, default=90, scope="train") + self._set_param("start", train_start, default=DEFAULT_TRAIN_START, scope="train") + self._set_param("end", train_end, default=DEFAULT_TRAIN_END, scope="train") + self._set_param("min_length", train_min_length, default=DEFAULT_TRAIN_MIN_LENGTH, scope="train") # validation set parameters - self._set_param("start", val_start, default="2008-01-01", scope="val") - self._set_param("end", val_end, default="2009-12-31", scope="val") - self._set_param("min_length", val_min_length, default=90, scope="val") + self._set_param("start", val_start, default=DEFAULT_VAL_START, scope="val") + self._set_param("end", val_end, default=DEFAULT_VAL_END, scope="val") + self._set_param("min_length", val_min_length, default=DEFAULT_VAL_MIN_LENGTH, scope="val") # test set parameters - self._set_param("start", test_start, default="2010-01-01", scope="test") - self._set_param("end", test_end, default="2017-12-31", scope="test") - self._set_param("min_length", test_min_length, default=90, scope="test") + self._set_param("start", test_start, default=DEFAULT_TEST_START, scope="test") + self._set_param("end", test_end, default=DEFAULT_TEST_END, scope="test") + self._set_param("min_length", test_min_length, default=DEFAULT_TEST_MIN_LENGTH, scope="test") # train_val set parameters self._set_param("start", self.data_store.get("start", "train"), scope="train_val") self._set_param("end", self.data_store.get("end", "val"), scope="train_val") train_val_min_length = sum([self.data_store.get("min_length", s) for s in ["train", "val"]]) - self._set_param("min_length", train_val_min_length, default=180, scope="train_val") + self._set_param("min_length", train_val_min_length, default=DEFAULT_TRAIN_VAL_MIN_LENGTH, scope="train_val") # use all stations on all data sets (train, val, test) - self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, default=True) + self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, + default=DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS) # set post-processing instructions - self._set_param("evaluate_bootstraps", evaluate_bootstraps, scope="general.postprocessing") - create_new_bootstraps = max([self.data_store.get("trainable", "general"), create_new_bootstraps or False]) + self._set_param("evaluate_bootstraps", evaluate_bootstraps, default=DEFAULT_EVALUATE_BOOTSTRAPS, + scope="general.postprocessing") + create_new_bootstraps = max([self.data_store.get("trainable", "general"), + create_new_bootstraps or DEFAULT_CREATE_NEW_BOOTSTRAPS]) self._set_param("create_new_bootstraps", create_new_bootstraps, scope="general.postprocessing") - self._set_param("number_of_bootstraps", number_of_bootstraps, default=20, scope="general.postprocessing") + self._set_param("number_of_bootstraps", number_of_bootstraps, default=DEFAULT_NUMBER_OF_BOOTSTRAPS, + scope="general.postprocessing") self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing") # check variables, statistics and target variable