Skip to content
Snippets Groups Projects
Commit cf3a557b authored by lukas leufen's avatar lukas leufen
Browse files

all defaults are now centralised in defaults.py, run scripts are partly adjusted

parent 76aaa9a7
No related branches found
No related tags found
3 merge requests!125Release v0.10.0,!124Update Master to new version v0.10.0,!106Resolve "redefine default experiment settings"
Pipeline #39699 failed
__author__ = "Lukas Leufen"
__date__ = '2020-06-25'
DEFAULT_STATIONS = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBY052', 'DEBY032', 'DEBW022', 'DEBY004',
'DEBY020', 'DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW073', 'DEBY039', 'DEBW038', 'DEBW081',
'DEBY075', 'DEBW040', 'DEBY053', 'DEBW059', 'DEBW027', 'DEBY072', 'DEBW042', 'DEBW039', 'DEBY001',
'DEBY113', 'DEBY089', 'DEBW024', 'DEBW004', 'DEBY037', 'DEBW056', 'DEBW029', 'DEBY068', 'DEBW010',
'DEBW026', 'DEBY002', 'DEBY079', 'DEBW084', 'DEBY049', 'DEBY031', 'DEBW019', 'DEBW001', 'DEBY063',
'DEBY005', 'DEBW046', 'DEBW103', 'DEBW052', 'DEBW034', 'DEBY088', ]
DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values',
'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values',
'pblheight': 'maximum'}
DEFAULT_NETWORK = "AIRBASE"
DEFAULT_STATION_TYPE = "background"
DEFAULT_VARIABLES = DEFAULT_VAR_ALL_DICT.keys()
DEFAULT_START = "1997-01-01"
DEFAULT_END = "2017-12-31"
DEFAULT_WINDOW_HISTORY_SIZE = 13
DEFAULT_OVERWRITE_LOCAL_DATA = False
DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise", "mean": "estimate"}
DEFAULT_HPC_LOGIN_LIST = ["ju", "hdfmll"] # ju[wels} #hdfmll(ogin)
DEFAULT_HPC_HOST_LIST = ["jw", "hdfmlc"] # first part of node names for Juwels (jw[comp], hdfmlc(ompute).
DEFAULT_CREATE_NEW_MODEL = True
DEFAULT_TRAINABLE = True
DEFAULT_FRACTION_OF_TRAINING = 0.8
DEFAULT_EXTREME_VALUES = None
DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY = False
DEFAULT_PERMUTE_DATA = False
DEFAULT_BATCH_SIZE = int(256 * 2)
DEFAULT_EPOCHS = 20
DEFAULT_TARGET_VAR = "o3"
DEFAULT_TARGET_DIM = "variables"
DEFAULT_WINDOW_LEAD_TIME = 3
DEFAULT_DIMENSIONS = {"new_index": ["datetime", "Stations"]}
DEFAULT_INTERPOLATE_DIM = "datetime"
DEFAULT_INTERPOLATE_METHOD = "linear"
DEFAULT_LIMIT_NAN_FILL = 1
DEFAULT_TRAIN_START = "1997-01-01"
DEFAULT_TRAIN_END = "2007-12-31"
DEFAULT_TRAIN_MIN_LENGTH = 90
DEFAULT_VAL_START = "2008-01-01"
DEFAULT_VAL_END = "2009-12-31"
DEFAULT_VAL_MIN_LENGTH = 90
DEFAULT_TEST_START = "2010-01-01"
DEFAULT_TEST_END = "2017-12-31"
DEFAULT_TEST_MIN_LENGTH = 90
DEFAULT_TRAIN_VAL_MIN_LENGTH = 180
DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS = True
DEFAULT_EVALUATE_BOOTSTRAPS = True
DEFAULT_CREATE_NEW_BOOTSTRAPS = False
DEFAULT_NUMBER_OF_BOOTSTRAPS = 20
DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
"PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
"PlotAvailability"]
def get_defaults():
"""Return all default parameters set in defaults.py"""
return {key: value for key, value in globals().items() if key.startswith('DEFAULT')}
if __name__ == "__main__":
print(get_defaults())
\ No newline at end of file
......@@ -4,33 +4,33 @@ import argparse
import inspect
def run(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'],
station_type='background',
trainable=False, create_new_model=True,
window_history_size=6,
def run(stations=None,
station_type=None,
trainable=None, create_new_model=None,
window_history_size=None,
experiment_date="testrun",
network=None,
variables=None, statistics_per_var=None,
start=None, end=None,
target_var="o3", target_dim=None,
target_var=None, target_dim=None,
window_lead_time=None,
dimensions=None,
interpolate_method=None, interpolate_dim=None, limit_nan_fill=None,
train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None,
use_all_stations_on_all_data_sets=True, fraction_of_train=None,
use_all_stations_on_all_data_sets=None, fraction_of_train=None,
experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None,
sampling="daily",
permute_data_on_training=False, extreme_values=None, extremes_on_right_tail_only=None,
sampling=None,
permute_data_on_training=None, extreme_values=None, extremes_on_right_tail_only=None,
transformation=None,
train_min_length=None, val_min_length=None, test_min_length=None,
evaluate_bootstraps=True, number_of_bootstraps=None, create_new_bootstraps=False,
evaluate_bootstraps=None, number_of_bootstraps=None, create_new_bootstraps=None,
plot_list=None,
model=None,
batch_size=None,
epochs=None):
params = inspect.getfullargspec(ExperimentSetup).args
kwargs = {k: v for k, v in locals().items() if k in params}
kwargs = {k: v for k, v in locals().items() if k in params and v is not None}
parser = argparse.ArgumentParser()
parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default="testrun",
......
......@@ -8,25 +8,19 @@ from typing import Union, Dict, Any, List
from src.configuration import path_config
from src import helpers
from src.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_NETWORK, DEFAULT_STATION_TYPE, \
DEFAULT_START, DEFAULT_END, DEFAULT_WINDOW_HISTORY_SIZE, DEFAULT_OVERWRITE_LOCAL_DATA, DEFAULT_TRANSFORMATION, \
DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \
DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \
DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \
DEFAULT_DIMENSIONS, DEFAULT_INTERPOLATE_DIM, DEFAULT_INTERPOLATE_METHOD, DEFAULT_LIMIT_NAN_FILL, \
DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \
DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST
from src.run_modules.run_environment import RunEnvironment
from src.model_modules.model_class import MyLittleModel as VanillaModel
DEFAULT_STATIONS = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBY052', 'DEBY032', 'DEBW022', 'DEBY004',
'DEBY020', 'DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW073', 'DEBY039', 'DEBW038', 'DEBW081',
'DEBY075', 'DEBW040', 'DEBY053', 'DEBW059', 'DEBW027', 'DEBY072', 'DEBW042', 'DEBW039', 'DEBY001',
'DEBY113', 'DEBY089', 'DEBW024', 'DEBW004', 'DEBY037', 'DEBW056', 'DEBW029', 'DEBY068', 'DEBW010',
'DEBW026', 'DEBY002', 'DEBY079', 'DEBW084', 'DEBY049', 'DEBY031', 'DEBW019', 'DEBW001', 'DEBY063',
'DEBY005', 'DEBW046', 'DEBW103', 'DEBW052', 'DEBW034', 'DEBY088', ]
DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values',
'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values',
'pblheight': 'maximum'}
DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise", "mean": "estimate"}
DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
"PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles",
"PlotAvailability"]
DEFAULT_HPC_LOGIN_LIST = ["ju", "hdfmll"] # ju[wels} #hdfmll(ogin)
DEFAULT_HPC_HOST_LIST = ["jw", "hdfmlc"] # first part of node names for Juwels (jw[comp], hdfmlc(ompute).
class ExperimentSetup(RunEnvironment):
"""
......@@ -228,11 +222,11 @@ class ExperimentSetup(RunEnvironment):
interpolate_dim=None,
interpolate_method=None,
limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None,
test_end=None, use_all_stations_on_all_data_sets=True, trainable: bool = None, fraction_of_train: float = None,
experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data: bool = None, sampling: str = "daily",
create_new_model: bool = None, bootstrap_path=None, permute_data_on_training: bool = None, transformation=None,
test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None,
experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data = None, sampling: str = "daily",
create_new_model = None, bootstrap_path=None, permute_data_on_training = None, transformation=None,
train_min_length=None, val_min_length=None, test_min_length=None, extreme_values: list = None,
extremes_on_right_tail_only: bool = None, evaluate_bootstraps=True, plot_list=None, number_of_bootstraps=None,
extremes_on_right_tail_only: bool = None, evaluate_bootstraps=None, plot_list=None, number_of_bootstraps=None,
create_new_bootstraps=None, data_path: str = None, login_nodes=None, hpc_hosts=None, model=None,
batch_size=None, epochs=None):
......@@ -244,22 +238,23 @@ class ExperimentSetup(RunEnvironment):
self._set_param("hostname", path_config.get_host())
self._set_param("hpc_hosts", hpc_hosts, default=DEFAULT_HPC_HOST_LIST + DEFAULT_HPC_LOGIN_LIST)
self._set_param("login_nodes", login_nodes, default=DEFAULT_HPC_LOGIN_LIST)
self._set_param("create_new_model", create_new_model, default=True)
self._set_param("create_new_model", create_new_model, default=DEFAULT_CREATE_NEW_MODEL)
if self.data_store.get("create_new_model"):
trainable = True
data_path = self.data_store.get("data_path")
bootstrap_path = path_config.set_bootstrap_path(bootstrap_path, data_path, sampling)
self._set_param("bootstrap_path", bootstrap_path)
self._set_param("trainable", trainable, default=True)
self._set_param("fraction_of_training", fraction_of_train, default=0.8)
self._set_param("extreme_values", extreme_values, default=None, scope="train")
self._set_param("extremes_on_right_tail_only", extremes_on_right_tail_only, default=False, scope="train")
self._set_param("trainable", trainable, default=DEFAULT_TRAINABLE)
self._set_param("fraction_of_training", fraction_of_train, default=DEFAULT_FRACTION_OF_TRAINING)
self._set_param("extreme_values", extreme_values, default=DEFAULT_EXTREME_VALUES, scope="train")
self._set_param("extremes_on_right_tail_only", extremes_on_right_tail_only,
default=DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, scope="train")
self._set_param("upsampling", extreme_values is not None, scope="train")
upsampling = self.data_store.get("upsampling", "train")
permute_data = False if permute_data_on_training is None else permute_data_on_training
permute_data = DEFAULT_PERMUTE_DATA if permute_data_on_training is None else permute_data_on_training
self._set_param("permute_data", permute_data or upsampling, scope="train")
self._set_param("batch_size", batch_size, default=int(256 * 2))
self._set_param("epochs", epochs, default=20)
self._set_param("batch_size", batch_size, default=DEFAULT_BATCH_SIZE)
self._set_param("epochs", epochs, default=DEFAULT_EPOCHS)
# set experiment name
exp_date = self._get_parser_args(parser_args).get("experiment_date")
......@@ -290,58 +285,63 @@ class ExperimentSetup(RunEnvironment):
# setup for data
self._set_param("stations", stations, default=DEFAULT_STATIONS)
self._set_param("network", network, default="AIRBASE")
self._set_param("station_type", station_type, default=None)
self._set_param("network", network, default=DEFAULT_NETWORK)
self._set_param("station_type", station_type, default=DEFAULT_STATION_TYPE)
self._set_param("statistics_per_var", statistics_per_var, default=DEFAULT_VAR_ALL_DICT)
self._set_param("variables", variables, default=list(self.data_store.get("statistics_per_var").keys()))
self._set_param("start", start, default="1997-01-01")
self._set_param("end", end, default="2017-12-31")
self._set_param("window_history_size", window_history_size, default=13)
self._set_param("overwrite_local_data", overwrite_local_data, default=False, scope="preprocessing")
self._set_param("start", start, default=DEFAULT_START)
self._set_param("end", end, default=DEFAULT_END)
self._set_param("window_history_size", window_history_size, default=DEFAULT_WINDOW_HISTORY_SIZE)
self._set_param("overwrite_local_data", overwrite_local_data, default=DEFAULT_OVERWRITE_LOCAL_DATA,
scope="preprocessing")
self._set_param("sampling", sampling)
self._set_param("transformation", transformation, default=DEFAULT_TRANSFORMATION)
self._set_param("transformation", None, scope="preprocessing")
# target
self._set_param("target_var", target_var, default="o3")
self._set_param("target_dim", target_dim, default='variables')
self._set_param("window_lead_time", window_lead_time, default=3)
self._set_param("target_var", target_var, default=DEFAULT_TARGET_VAR)
self._set_param("target_dim", target_dim, default=DEFAULT_TARGET_DIM)
self._set_param("window_lead_time", window_lead_time, default=DEFAULT_WINDOW_LEAD_TIME)
# interpolation
self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']})
self._set_param("interpolate_dim", interpolate_dim, default='datetime')
self._set_param("interpolate_method", interpolate_method, default='linear')
self._set_param("limit_nan_fill", limit_nan_fill, default=1)
self._set_param("dimensions", dimensions, default=DEFAULT_DIMENSIONS)
self._set_param("interpolate_dim", interpolate_dim, default=DEFAULT_INTERPOLATE_DIM)
self._set_param("interpolate_method", interpolate_method, default=DEFAULT_INTERPOLATE_METHOD)
self._set_param("limit_nan_fill", limit_nan_fill, default=DEFAULT_LIMIT_NAN_FILL)
# train set parameters
self._set_param("start", train_start, default="1997-01-01", scope="train")
self._set_param("end", train_end, default="2007-12-31", scope="train")
self._set_param("min_length", train_min_length, default=90, scope="train")
self._set_param("start", train_start, default=DEFAULT_TRAIN_START, scope="train")
self._set_param("end", train_end, default=DEFAULT_TRAIN_END, scope="train")
self._set_param("min_length", train_min_length, default=DEFAULT_TRAIN_MIN_LENGTH, scope="train")
# validation set parameters
self._set_param("start", val_start, default="2008-01-01", scope="val")
self._set_param("end", val_end, default="2009-12-31", scope="val")
self._set_param("min_length", val_min_length, default=90, scope="val")
self._set_param("start", val_start, default=DEFAULT_VAL_START, scope="val")
self._set_param("end", val_end, default=DEFAULT_VAL_END, scope="val")
self._set_param("min_length", val_min_length, default=DEFAULT_VAL_MIN_LENGTH, scope="val")
# test set parameters
self._set_param("start", test_start, default="2010-01-01", scope="test")
self._set_param("end", test_end, default="2017-12-31", scope="test")
self._set_param("min_length", test_min_length, default=90, scope="test")
self._set_param("start", test_start, default=DEFAULT_TEST_START, scope="test")
self._set_param("end", test_end, default=DEFAULT_TEST_END, scope="test")
self._set_param("min_length", test_min_length, default=DEFAULT_TEST_MIN_LENGTH, scope="test")
# train_val set parameters
self._set_param("start", self.data_store.get("start", "train"), scope="train_val")
self._set_param("end", self.data_store.get("end", "val"), scope="train_val")
train_val_min_length = sum([self.data_store.get("min_length", s) for s in ["train", "val"]])
self._set_param("min_length", train_val_min_length, default=180, scope="train_val")
self._set_param("min_length", train_val_min_length, default=DEFAULT_TRAIN_VAL_MIN_LENGTH, scope="train_val")
# use all stations on all data sets (train, val, test)
self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, default=True)
self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets,
default=DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS)
# set post-processing instructions
self._set_param("evaluate_bootstraps", evaluate_bootstraps, scope="general.postprocessing")
create_new_bootstraps = max([self.data_store.get("trainable", "general"), create_new_bootstraps or False])
self._set_param("evaluate_bootstraps", evaluate_bootstraps, default=DEFAULT_EVALUATE_BOOTSTRAPS,
scope="general.postprocessing")
create_new_bootstraps = max([self.data_store.get("trainable", "general"),
create_new_bootstraps or DEFAULT_CREATE_NEW_BOOTSTRAPS])
self._set_param("create_new_bootstraps", create_new_bootstraps, scope="general.postprocessing")
self._set_param("number_of_bootstraps", number_of_bootstraps, default=20, scope="general.postprocessing")
self._set_param("number_of_bootstraps", number_of_bootstraps, default=DEFAULT_NUMBER_OF_BOOTSTRAPS,
scope="general.postprocessing")
self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing")
# check variables, statistics and target variable
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment