diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dec6a0fb2ab9eb51fef737c50d5beab4270c8942..32d8f138020e175ed4e17077713ed7cb26c5c533 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -198,4 +198,4 @@ pages: - public/badges/ - public/coverage/ - public/test/ - - public/webpage/ + - public/docs/ diff --git a/run.py b/run.py index 572e59e2f75c4068bc63ecb6bb54a687bafebf4b..2395064c25fcaf5fed1ede6a919b06bbc62f27e9 100644 --- a/run.py +++ b/run.py @@ -13,10 +13,11 @@ from src.run_modules.training import Training def main(parser_args): + experiment_date = parser_args.experiment_date with RunEnvironment(): - ExperimentSetup(parser_args, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], + ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], station_type='background', trainable=False, create_new_model=True, window_history_size=6, - create_new_bootstraps=False) + experiment_date=experiment_date, create_new_bootstraps=False) PreProcessing() PartitionCheck() diff --git a/run_hourly.py b/run_hourly.py index 559bf1a1056928f55f9ff3527805da121091d830..df3266405bc195cbe4c3546b4c7fd0c6b2925a84 100644 --- a/run_hourly.py +++ b/run_hourly.py @@ -12,9 +12,11 @@ from src.run_modules.training import Training def main(parser_args): + experiment_date = parser_args.experiment_date with RunEnvironment(): - ExperimentSetup(parser_args, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], - station_type='background', trainable=True, sampling="hourly", window_history_size=48) + ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], + station_type='background', trainable=True, sampling="hourly", window_history_size=48, + experiment_date=experiment_date) PreProcessing() ModelSetup() diff --git a/run_zam347.py b/run_zam347.py index d95067bb84a91230b0877f7a2b3d0cac5dc495e1..69b3cd6fee0e60212de9c74c2dd29b720bc81b81 100644 --- a/run_zam347.py +++ b/run_zam347.py @@ -29,9 +29,10 @@ def load_stations(): def main(parser_args): + experiment_date = parser_args.experiment_date with RunEnvironment(): - ExperimentSetup(parser_args, stations=load_stations(), station_type='background', trainable=False, - create_new_model=True) + ExperimentSetup(stations=load_stations(), station_type='background', trainable=False, create_new_model=True, + experiment_date=experiment_date) PreProcessing() ModelSetup() diff --git a/src/configuration/__init__.py b/src/configuration/__init__.py index 48d9f38ce01d168dc8ed3019e98f48e9ab19bf72..a14a815b49fa536c0a223fbc55d136680c764eab 100644 --- a/src/configuration/__init__.py +++ b/src/configuration/__init__.py @@ -1,2 +1,3 @@ """Collection of configuration functions, paths and classes.""" -from .path_config import ROOT_PATH, prepare_host, set_experiment_name, set_bootstrap_path, check_path_and_create, get_host \ No newline at end of file +from .path_config import ROOT_PATH, prepare_host, set_experiment_name, set_bootstrap_path, check_path_and_create, \ + get_host, set_experiment_path \ No newline at end of file diff --git a/src/configuration/defaults.py b/src/configuration/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..7ce96cfce515e7f32d98444e6a9542c9fbd7b4f4 --- /dev/null +++ b/src/configuration/defaults.py @@ -0,0 +1,64 @@ +__author__ = "Lukas Leufen" +__date__ = '2020-06-25' + + +DEFAULT_STATIONS = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBY052', 'DEBY032', 'DEBW022', 'DEBY004', + 'DEBY020', 'DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW073', 'DEBY039', 'DEBW038', 'DEBW081', + 'DEBY075', 'DEBW040', 'DEBY053', 'DEBW059', 'DEBW027', 'DEBY072', 'DEBW042', 'DEBW039', 'DEBY001', + 'DEBY113', 'DEBY089', 'DEBW024', 'DEBW004', 'DEBY037', 'DEBW056', 'DEBW029', 'DEBY068', 'DEBW010', + 'DEBW026', 'DEBY002', 'DEBY079', 'DEBW084', 'DEBY049', 'DEBY031', 'DEBW019', 'DEBW001', 'DEBY063', + 'DEBY005', 'DEBW046', 'DEBW103', 'DEBW052', 'DEBW034', 'DEBY088', ] +DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', + 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values', + 'pblheight': 'maximum'} +DEFAULT_NETWORK = "AIRBASE" +DEFAULT_STATION_TYPE = "background" +DEFAULT_VARIABLES = DEFAULT_VAR_ALL_DICT.keys() +DEFAULT_START = "1997-01-01" +DEFAULT_END = "2017-12-31" +DEFAULT_WINDOW_HISTORY_SIZE = 13 +DEFAULT_OVERWRITE_LOCAL_DATA = False +DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise", "mean": "estimate"} +DEFAULT_HPC_LOGIN_LIST = ["ju", "hdfmll"] # ju[wels} #hdfmll(ogin) +DEFAULT_HPC_HOST_LIST = ["jw", "hdfmlc"] # first part of node names for Juwels (jw[comp], hdfmlc(ompute). +DEFAULT_CREATE_NEW_MODEL = True +DEFAULT_TRAINABLE = True +DEFAULT_FRACTION_OF_TRAINING = 0.8 +DEFAULT_EXTREME_VALUES = None +DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY = False +DEFAULT_PERMUTE_DATA = False +DEFAULT_BATCH_SIZE = int(256 * 2) +DEFAULT_EPOCHS = 20 +DEFAULT_TARGET_VAR = "o3" +DEFAULT_TARGET_DIM = "variables" +DEFAULT_WINDOW_LEAD_TIME = 3 +DEFAULT_DIMENSIONS = {"new_index": ["datetime", "Stations"]} +DEFAULT_INTERPOLATE_DIM = "datetime" +DEFAULT_INTERPOLATE_METHOD = "linear" +DEFAULT_LIMIT_NAN_FILL = 1 +DEFAULT_TRAIN_START = "1997-01-01" +DEFAULT_TRAIN_END = "2007-12-31" +DEFAULT_TRAIN_MIN_LENGTH = 90 +DEFAULT_VAL_START = "2008-01-01" +DEFAULT_VAL_END = "2009-12-31" +DEFAULT_VAL_MIN_LENGTH = 90 +DEFAULT_TEST_START = "2010-01-01" +DEFAULT_TEST_END = "2017-12-31" +DEFAULT_TEST_MIN_LENGTH = 90 +DEFAULT_TRAIN_VAL_MIN_LENGTH = 180 +DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS = True +DEFAULT_EVALUATE_BOOTSTRAPS = True +DEFAULT_CREATE_NEW_BOOTSTRAPS = False +DEFAULT_NUMBER_OF_BOOTSTRAPS = 20 +DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries", + "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles", + "PlotAvailability"] + + +def get_defaults(): + """Return all default parameters set in defaults.py""" + return {key: value for key, value in globals().items() if key.startswith('DEFAULT')} + + +if __name__ == "__main__": + print(get_defaults()) \ No newline at end of file diff --git a/src/configuration/path_config.py b/src/configuration/path_config.py index 85a536ccf2e0711eea119072122b7a852de5fa90..7af25875eea58de081012fc6040a76a04f001d54 100644 --- a/src/configuration/path_config.py +++ b/src/configuration/path_config.py @@ -54,31 +54,43 @@ def prepare_host(create_new=True, data_path=None, sampling="daily") -> str: return path -def set_experiment_name(experiment_name=None, experiment_path=None, sampling=None) -> Tuple[str, str]: +def set_experiment_path(name: str, path: str = None) -> str: """ Set name of experiment and its path. - * Experiment name is set to `TestExperiment` if not provided in kwargs. If a name is given, this string is expanded + * Experiment path is set to `<experiment_path>/<exp_name>` if provided or `ROOT_PATH/<exp_name>` otherwise + + :param name: custom experiment name + :param path: custom experiment path + + :return: full experiment path + """ + if path is None: + experiment_path = os.path.abspath(os.path.join(ROOT_PATH, name)) + else: + experiment_path = os.path.join(os.path.abspath(path), name) + return experiment_path + + +def set_experiment_name(name: str = None, sampling: str = None) -> str: + """ + Set name of experiment and its path. + + * Experiment name is set to `TestExperiment` if not provided. If a name is given, this string is expanded by suffix `_network`. Experiment name is always expanded by `_<sampling>` as ending suffix if sampling is given. - * Experiment path is set to `ROOT_PATH/<exp_name>` if not provided or otherwise use `<experiment_path>/<exp_name>` - :param experiment_name: custom experiment name - :param experiment_path: custom experiment path + :param name: custom experiment name :param sampling: sampling rate as string to add to experiment name - :return: experiment name and full experiment path + :return: experiment name """ - if experiment_name is None: + if name is None: experiment_name = "TestExperiment" else: - experiment_name = f"{experiment_name}_network" + experiment_name = f"{name}_network" if sampling is not None: experiment_name += f"_{sampling}" - if experiment_path is None: - experiment_path = os.path.abspath(os.path.join(ROOT_PATH, experiment_name)) - else: - experiment_path = os.path.join(os.path.abspath(experiment_path), experiment_name) - return experiment_name, experiment_path + return experiment_name def set_bootstrap_path(bootstrap_path: str, data_path: str, sampling: str) -> str: diff --git a/src/run.py b/src/run.py index 11029817a978b872d0f99954a50ab5f5b93aa012..900ebb47bc868b1364d10021e6b4fe8dc7186c7d 100644 --- a/src/run.py +++ b/src/run.py @@ -4,41 +4,36 @@ import argparse import inspect -def run(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], - station_type='background', - trainable=False, create_new_model=True, - window_history_size=6, +def run(stations=None, + station_type=None, + trainable=None, create_new_model=None, + window_history_size=None, experiment_date="testrun", network=None, variables=None, statistics_per_var=None, start=None, end=None, - target_var="o3", target_dim=None, + target_var=None, target_dim=None, window_lead_time=None, dimensions=None, interpolate_method=None, interpolate_dim=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, - use_all_stations_on_all_data_sets=True, fraction_of_train=None, + use_all_stations_on_all_data_sets=None, fraction_of_train=None, experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None, - sampling="daily", - permute_data_on_training=False, extreme_values=None, extremes_on_right_tail_only=None, + sampling=None, + permute_data_on_training=None, extreme_values=None, extremes_on_right_tail_only=None, transformation=None, train_min_length=None, val_min_length=None, test_min_length=None, - evaluate_bootstraps=True, number_of_bootstraps=None, create_new_bootstraps=False, + evaluate_bootstraps=None, number_of_bootstraps=None, create_new_bootstraps=None, plot_list=None, model=None, batch_size=None, epochs=None): params = inspect.getfullargspec(ExperimentSetup).args - kwargs = {k: v for k, v in locals().items() if k in params} - - parser = argparse.ArgumentParser() - parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default="testrun", - help="set experiment date as string") - args = parser.parse_args() + kwargs = {k: v for k, v in locals().items() if k in params and v is not None} with RunEnvironment(): - ExperimentSetup(args, **kwargs) + ExperimentSetup(**kwargs) PreProcessing() PartitionCheck() ModelSetup() diff --git a/src/run_modules/experiment_setup.py b/src/run_modules/experiment_setup.py index ff6fec842714d599696b8726e9d25aa22e55583f..a93fe403e11b6483832df417b0e97462bc252a4a 100644 --- a/src/run_modules/experiment_setup.py +++ b/src/run_modules/experiment_setup.py @@ -8,25 +8,19 @@ from typing import Union, Dict, Any, List from src.configuration import path_config from src import helpers +from src.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_NETWORK, DEFAULT_STATION_TYPE, \ + DEFAULT_START, DEFAULT_END, DEFAULT_WINDOW_HISTORY_SIZE, DEFAULT_OVERWRITE_LOCAL_DATA, DEFAULT_TRANSFORMATION, \ + DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \ + DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \ + DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \ + DEFAULT_DIMENSIONS, DEFAULT_INTERPOLATE_DIM, DEFAULT_INTERPOLATE_METHOD, DEFAULT_LIMIT_NAN_FILL, \ + DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \ + DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \ + DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \ + DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST from src.run_modules.run_environment import RunEnvironment from src.model_modules.model_class import MyLittleModel as VanillaModel -DEFAULT_STATIONS = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBY052', 'DEBY032', 'DEBW022', 'DEBY004', - 'DEBY020', 'DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW073', 'DEBY039', 'DEBW038', 'DEBW081', - 'DEBY075', 'DEBW040', 'DEBY053', 'DEBW059', 'DEBW027', 'DEBY072', 'DEBW042', 'DEBW039', 'DEBY001', - 'DEBY113', 'DEBY089', 'DEBW024', 'DEBW004', 'DEBY037', 'DEBW056', 'DEBW029', 'DEBY068', 'DEBW010', - 'DEBW026', 'DEBY002', 'DEBY079', 'DEBW084', 'DEBY049', 'DEBY031', 'DEBW019', 'DEBW001', 'DEBY063', - 'DEBY005', 'DEBW046', 'DEBW103', 'DEBW052', 'DEBW034', 'DEBY088', ] -DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', - 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values', - 'pblheight': 'maximum'} -DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise", "mean": "estimate"} -DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries", - "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles", - "PlotAvailability"] -DEFAULT_HPC_LOGIN_LIST = ["ju", "hdfmll"] # ju[wels} #hdfmll(ogin) -DEFAULT_HPC_HOST_LIST = ["jw", "hdfmlc"] # first part of node names for Juwels (jw[comp], hdfmlc(ompute). - class ExperimentSetup(RunEnvironment): """ @@ -212,7 +206,7 @@ class ExperimentSetup(RunEnvironment): """ def __init__(self, - parser_args=None, + experiment_date=None, stations: Union[str, List[str]] = None, network: str = None, station_type: str = None, @@ -228,11 +222,11 @@ class ExperimentSetup(RunEnvironment): interpolate_dim=None, interpolate_method=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, - test_end=None, use_all_stations_on_all_data_sets=True, trainable: bool = None, fraction_of_train: float = None, - experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data: bool = None, sampling: str = "daily", - create_new_model: bool = None, bootstrap_path=None, permute_data_on_training: bool = None, transformation=None, + test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None, + experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data = None, sampling: str = "daily", + create_new_model = None, bootstrap_path=None, permute_data_on_training = None, transformation=None, train_min_length=None, val_min_length=None, test_min_length=None, extreme_values: list = None, - extremes_on_right_tail_only: bool = None, evaluate_bootstraps=True, plot_list=None, number_of_bootstraps=None, + extremes_on_right_tail_only: bool = None, evaluate_bootstraps=None, plot_list=None, number_of_bootstraps=None, create_new_bootstraps=None, data_path: str = None, login_nodes=None, hpc_hosts=None, model=None, batch_size=None, epochs=None): @@ -244,104 +238,109 @@ class ExperimentSetup(RunEnvironment): self._set_param("hostname", path_config.get_host()) self._set_param("hpc_hosts", hpc_hosts, default=DEFAULT_HPC_HOST_LIST + DEFAULT_HPC_LOGIN_LIST) self._set_param("login_nodes", login_nodes, default=DEFAULT_HPC_LOGIN_LIST) - self._set_param("create_new_model", create_new_model, default=True) + self._set_param("create_new_model", create_new_model, default=DEFAULT_CREATE_NEW_MODEL) if self.data_store.get("create_new_model"): trainable = True data_path = self.data_store.get("data_path") bootstrap_path = path_config.set_bootstrap_path(bootstrap_path, data_path, sampling) self._set_param("bootstrap_path", bootstrap_path) - self._set_param("trainable", trainable, default=True) - self._set_param("fraction_of_training", fraction_of_train, default=0.8) - self._set_param("extreme_values", extreme_values, default=None, scope="train") - self._set_param("extremes_on_right_tail_only", extremes_on_right_tail_only, default=False, scope="train") + self._set_param("trainable", trainable, default=DEFAULT_TRAINABLE) + self._set_param("fraction_of_training", fraction_of_train, default=DEFAULT_FRACTION_OF_TRAINING) + self._set_param("extreme_values", extreme_values, default=DEFAULT_EXTREME_VALUES, scope="train") + self._set_param("extremes_on_right_tail_only", extremes_on_right_tail_only, + default=DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, scope="train") self._set_param("upsampling", extreme_values is not None, scope="train") upsampling = self.data_store.get("upsampling", "train") - permute_data = False if permute_data_on_training is None else permute_data_on_training + permute_data = DEFAULT_PERMUTE_DATA if permute_data_on_training is None else permute_data_on_training self._set_param("permute_data", permute_data or upsampling, scope="train") - self._set_param("batch_size", batch_size, default=int(256 * 2)) - self._set_param("epochs", epochs, default=20) + self._set_param("batch_size", batch_size, default=DEFAULT_BATCH_SIZE) + self._set_param("epochs", epochs, default=DEFAULT_EPOCHS) # set experiment name - exp_date = self._get_parser_args(parser_args).get("experiment_date") - exp_name, exp_path = path_config.set_experiment_name(experiment_name=exp_date, experiment_path=experiment_path, - sampling=sampling) - self._set_param("experiment_name", exp_name) - self._set_param("experiment_path", exp_path) - logging.info(f"Experiment path is: {exp_path}") + experiment_name = path_config.set_experiment_name(name=experiment_date, sampling=sampling) + experiment_path = path_config.set_experiment_path(name=experiment_name, path=experiment_path) + self._set_param("experiment_name", experiment_name) + self._set_param("experiment_path", experiment_path) + logging.info(f"Experiment path is: {experiment_path}") path_config.check_path_and_create(self.data_store.get("experiment_path")) # set model path - self._set_param("model_path", None, os.path.join(exp_path, "model")) + self._set_param("model_path", None, os.path.join(experiment_path, "model")) path_config.check_path_and_create(self.data_store.get("model_path")) # set plot path - default_plot_path = os.path.join(exp_path, "plots") + default_plot_path = os.path.join(experiment_path, "plots") self._set_param("plot_path", plot_path, default=default_plot_path) path_config.check_path_and_create(self.data_store.get("plot_path")) # set results path - default_forecast_path = os.path.join(exp_path, "forecasts") + default_forecast_path = os.path.join(experiment_path, "forecasts") self._set_param("forecast_path", forecast_path, default_forecast_path) path_config.check_path_and_create(self.data_store.get("forecast_path")) # set logging path - self._set_param("logging_path", None, os.path.join(exp_path, "logging")) + self._set_param("logging_path", None, os.path.join(experiment_path, "logging")) path_config.check_path_and_create(self.data_store.get("logging_path")) # setup for data self._set_param("stations", stations, default=DEFAULT_STATIONS) - self._set_param("network", network, default="AIRBASE") - self._set_param("station_type", station_type, default=None) + self._set_param("network", network, default=DEFAULT_NETWORK) + self._set_param("station_type", station_type, default=DEFAULT_STATION_TYPE) self._set_param("statistics_per_var", statistics_per_var, default=DEFAULT_VAR_ALL_DICT) self._set_param("variables", variables, default=list(self.data_store.get("statistics_per_var").keys())) - self._set_param("start", start, default="1997-01-01") - self._set_param("end", end, default="2017-12-31") - self._set_param("window_history_size", window_history_size, default=13) - self._set_param("overwrite_local_data", overwrite_local_data, default=False, scope="preprocessing") + self._set_param("start", start, default=DEFAULT_START) + self._set_param("end", end, default=DEFAULT_END) + self._set_param("window_history_size", window_history_size, default=DEFAULT_WINDOW_HISTORY_SIZE) + self._set_param("overwrite_local_data", overwrite_local_data, default=DEFAULT_OVERWRITE_LOCAL_DATA, + scope="preprocessing") self._set_param("sampling", sampling) self._set_param("transformation", transformation, default=DEFAULT_TRANSFORMATION) self._set_param("transformation", None, scope="preprocessing") # target - self._set_param("target_var", target_var, default="o3") - self._set_param("target_dim", target_dim, default='variables') - self._set_param("window_lead_time", window_lead_time, default=3) + self._set_param("target_var", target_var, default=DEFAULT_TARGET_VAR) + self._set_param("target_dim", target_dim, default=DEFAULT_TARGET_DIM) + self._set_param("window_lead_time", window_lead_time, default=DEFAULT_WINDOW_LEAD_TIME) # interpolation - self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']}) - self._set_param("interpolate_dim", interpolate_dim, default='datetime') - self._set_param("interpolate_method", interpolate_method, default='linear') - self._set_param("limit_nan_fill", limit_nan_fill, default=1) + self._set_param("dimensions", dimensions, default=DEFAULT_DIMENSIONS) + self._set_param("interpolate_dim", interpolate_dim, default=DEFAULT_INTERPOLATE_DIM) + self._set_param("interpolate_method", interpolate_method, default=DEFAULT_INTERPOLATE_METHOD) + self._set_param("limit_nan_fill", limit_nan_fill, default=DEFAULT_LIMIT_NAN_FILL) # train set parameters - self._set_param("start", train_start, default="1997-01-01", scope="train") - self._set_param("end", train_end, default="2007-12-31", scope="train") - self._set_param("min_length", train_min_length, default=90, scope="train") + self._set_param("start", train_start, default=DEFAULT_TRAIN_START, scope="train") + self._set_param("end", train_end, default=DEFAULT_TRAIN_END, scope="train") + self._set_param("min_length", train_min_length, default=DEFAULT_TRAIN_MIN_LENGTH, scope="train") # validation set parameters - self._set_param("start", val_start, default="2008-01-01", scope="val") - self._set_param("end", val_end, default="2009-12-31", scope="val") - self._set_param("min_length", val_min_length, default=90, scope="val") + self._set_param("start", val_start, default=DEFAULT_VAL_START, scope="val") + self._set_param("end", val_end, default=DEFAULT_VAL_END, scope="val") + self._set_param("min_length", val_min_length, default=DEFAULT_VAL_MIN_LENGTH, scope="val") # test set parameters - self._set_param("start", test_start, default="2010-01-01", scope="test") - self._set_param("end", test_end, default="2017-12-31", scope="test") - self._set_param("min_length", test_min_length, default=90, scope="test") + self._set_param("start", test_start, default=DEFAULT_TEST_START, scope="test") + self._set_param("end", test_end, default=DEFAULT_TEST_END, scope="test") + self._set_param("min_length", test_min_length, default=DEFAULT_TEST_MIN_LENGTH, scope="test") # train_val set parameters self._set_param("start", self.data_store.get("start", "train"), scope="train_val") self._set_param("end", self.data_store.get("end", "val"), scope="train_val") train_val_min_length = sum([self.data_store.get("min_length", s) for s in ["train", "val"]]) - self._set_param("min_length", train_val_min_length, default=180, scope="train_val") + self._set_param("min_length", train_val_min_length, default=DEFAULT_TRAIN_VAL_MIN_LENGTH, scope="train_val") # use all stations on all data sets (train, val, test) - self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, default=True) + self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, + default=DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS) # set post-processing instructions - self._set_param("evaluate_bootstraps", evaluate_bootstraps, scope="general.postprocessing") - create_new_bootstraps = max([self.data_store.get("trainable", "general"), create_new_bootstraps or False]) + self._set_param("evaluate_bootstraps", evaluate_bootstraps, default=DEFAULT_EVALUATE_BOOTSTRAPS, + scope="general.postprocessing") + create_new_bootstraps = max([self.data_store.get("trainable", "general"), + create_new_bootstraps or DEFAULT_CREATE_NEW_BOOTSTRAPS]) self._set_param("create_new_bootstraps", create_new_bootstraps, scope="general.postprocessing") - self._set_param("number_of_bootstraps", number_of_bootstraps, default=20, scope="general.postprocessing") + self._set_param("number_of_bootstraps", number_of_bootstraps, default=DEFAULT_NUMBER_OF_BOOTSTRAPS, + scope="general.postprocessing") self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing") # check variables, statistics and target variable @@ -358,22 +357,6 @@ class ExperimentSetup(RunEnvironment): self.data_store.set(param, value, scope) logging.debug(f"set experiment attribute: {param}({scope})={value}") - @staticmethod - def _get_parser_args(args: Union[Dict, argparse.Namespace]) -> Dict: - """ - Transform args to dict if given as argparse.Namespace. - - :param args: either a dictionary or an argument parser instance - - :return: dictionary with all arguments - """ - if isinstance(args, argparse.Namespace): - return args.__dict__ - elif isinstance(args, dict): - return args - else: - return {} - def _compare_variables_and_statistics(self): """ Compare variables and statistics. diff --git a/test/test_configuration/test_path_config.py b/test/test_configuration/test_path_config.py index 557e59d7c9cd1ed45e2fc80c93ede020976e2976..acb43676cb86ca76aded88aa0d46f62dd78d9992 100644 --- a/test/test_configuration/test_path_config.py +++ b/test/test_configuration/test_path_config.py @@ -4,7 +4,8 @@ import os import mock import pytest -from src.configuration import prepare_host, set_experiment_name, set_bootstrap_path, check_path_and_create +from src.configuration import prepare_host, set_experiment_name, set_bootstrap_path, check_path_and_create, \ + set_experiment_path, ROOT_PATH from src.helpers import PyTestRegex @@ -54,23 +55,27 @@ class TestPrepareHost: class TestSetExperimentName: - def test_set_experiment(self): - exp_name, exp_path = set_experiment_name() + def test_set_experiment_name(self): + exp_name = set_experiment_name() assert exp_name == "TestExperiment" - assert exp_path == os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "TestExperiment")) - exp_name, exp_path = set_experiment_name(experiment_name="2019-11-14", experiment_path=os.path.join( - os.path.dirname(__file__), "test2")) + exp_name = set_experiment_name(name="2019-11-14") assert exp_name == "2019-11-14_network" - assert exp_path == os.path.abspath(os.path.join(os.path.dirname(__file__), "test2", exp_name)) - def test_set_experiment_from_sys(self): - exp_name, _ = set_experiment_name(experiment_name="2019-11-14") - assert exp_name == "2019-11-14_network" - - def test_set_experiment_hourly(self): - exp_name, exp_path = set_experiment_name(sampling="hourly") + def test_set_experiment_name_sampling(self): + exp_name = set_experiment_name(sampling="hourly") assert exp_name == "TestExperiment_hourly" - assert exp_path == os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "TestExperiment_hourly")) + exp_name = set_experiment_name(sampling="daily") + assert exp_name == "TestExperiment_daily" + + def test_set_experiment_path(self): + exp_path = set_experiment_path("TestExperiment") + assert exp_path == os.path.abspath(os.path.join(ROOT_PATH, "TestExperiment")) + exp_path = set_experiment_path(name="2019-11-14_network", path=os.path.join(os.path.dirname(__file__), "test2")) + assert exp_path == os.path.abspath(os.path.join(os.path.dirname(__file__), "test2", "2019-11-14_network")) + + def test_set_experiment_path_given_path(self): + exp_path = set_experiment_path("TestExperiment", path=os.path.dirname(__file__)) + assert exp_path == os.path.abspath(os.path.join(os.path.dirname(__file__), "TestExperiment")) class TestSetBootstrapPath: diff --git a/test/test_modules/test_experiment_setup.py b/test/test_modules/test_experiment_setup.py index e06ba6c0ce5b9abb169e20016342b2a0dfb47d0f..2ecd78f0c7e2b14c0b9c64192c06014e2a0da75f 100644 --- a/test/test_modules/test_experiment_setup.py +++ b/test/test_modules/test_experiment_setup.py @@ -33,16 +33,6 @@ class TestExperimentSetup: empty_obj._set_param("AnotherNoneTester", None) assert empty_obj.data_store.get("AnotherNoneTester", "general") is None - def test_get_parser_args_from_dict(self, empty_obj): - res = empty_obj._get_parser_args({'test2': 2, 'test10str': "10"}) - assert res == {'test2': 2, 'test10str': "10"} - - def test_get_parser_args_from_parse_args(self, empty_obj): - parser = argparse.ArgumentParser() - parser.add_argument('--experiment_date', type=str) - parser_args = parser.parse_args(["--experiment_date", "TOMORROW"]) - assert empty_obj._get_parser_args(parser_args) == {"experiment_date": "TOMORROW"} - def test_init_default(self): exp_setup = ExperimentSetup() data_store = exp_setup.data_store @@ -68,7 +58,7 @@ class TestExperimentSetup: 'DEBW052', 'DEBW034', 'DEBY088', ] assert data_store.get("stations", "general") == default_stations assert data_store.get("network", "general") == "AIRBASE" - assert data_store.get("station_type", "general") is None + assert data_store.get("station_type", "general") == "background" assert data_store.get("variables", "general") == list(default_statistics_per_var.keys()) assert data_store.get("statistics_per_var", "general") == default_statistics_per_var assert data_store.get("start", "general") == "1997-01-01" @@ -104,7 +94,7 @@ class TestExperimentSetup: def test_init_no_default(self): experiment_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data", "testExperimentFolder")) - kwargs = dict(parser_args={"experiment_date": "TODAY"}, + kwargs = dict(experiment_date= "TODAY", statistics_per_var={'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum'}, stations=['DEBY053', 'DEBW059', 'DEBW027'], network="INTERNET", station_type="background", variables=["o3", "temp"], start="1999-01-01", end="2001-01-01", window_history_size=4, @@ -183,7 +173,7 @@ class TestExperimentSetup: def test_compare_variables_and_statistics(self): experiment_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data", "testExperimentFolder")) - kwargs = dict(parser_args={"experiment_date": "TODAY"}, + kwargs = dict(experiment_date="TODAY", statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}, stations=['DEBY053', 'DEBW059', 'DEBW027'], variables=["o3", "relhum"], experiment_path=experiment_path) diff --git a/test/test_modules/test_partition_check.py b/test/test_modules/test_partition_check.py index d862b9e879c1121c78bc72ee1c9b8ba08f73a239..b04e01d13e9e160553f8ff66af8d97f65aa24bf0 100644 --- a/test/test_modules/test_partition_check.py +++ b/test/test_modules/test_partition_check.py @@ -24,7 +24,7 @@ class TestPartitionCheck: @mock.patch("os.path.exists", return_value=False) @mock.patch("os.makedirs", side_effect=None) def obj_with_exp_setup_login(self, mock_host, mock_user, mock_path, mock_check): - ExperimentSetup(parser_args={}, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], + ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}, station_type="background") pre = object.__new__(PartitionCheck) super(PartitionCheck, pre).__init__() @@ -37,7 +37,7 @@ class TestPartitionCheck: @mock.patch("os.path.exists", return_value=False) @mock.patch("os.makedirs", side_effect=None) def obj_with_exp_setup_compute(self, mock_host, mock_user, mock_path, mock_check): - ExperimentSetup(parser_args={}, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], + ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}, station_type="background") pre = object.__new__(PartitionCheck) super(PartitionCheck, pre).__init__() @@ -45,7 +45,7 @@ class TestPartitionCheck: RunEnvironment().__del__() def test_init(self, caplog): - ExperimentSetup(parser_args={}, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087'], + ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) caplog.clear() caplog.set_level(logging.INFO) diff --git a/test/test_modules/test_pre_processing.py b/test/test_modules/test_pre_processing.py index 6abc722273613a1f4d6727396b114939b4d6a552..29811fb04789f32a1e2cc1b3affb6f8d4ae99730 100644 --- a/test/test_modules/test_pre_processing.py +++ b/test/test_modules/test_pre_processing.py @@ -26,7 +26,7 @@ class TestPreProcessing: @pytest.fixture def obj_with_exp_setup(self): - ExperimentSetup(parser_args={}, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], + ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}, station_type="background") pre = object.__new__(PreProcessing) super(PreProcessing, pre).__init__() @@ -34,7 +34,7 @@ class TestPreProcessing: RunEnvironment().__del__() def test_init(self, caplog): - ExperimentSetup(parser_args={}, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087'], + ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) caplog.clear() caplog.set_level(logging.INFO)