diff --git a/run.py b/run.py index 5e09269807aa239f21a3b59e3373ddc355dd667f..eaedb838135344df8a6b28339fa81660dfc85458 100644 --- a/run.py +++ b/run.py @@ -30,4 +30,6 @@ if __name__ == "__main__": help="set experiment date as string") args = parser.parse_args() - main() + experiment = ExperimentSetup(args, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']) + a = 1 + # main() diff --git a/src/experiment_setup.py b/src/experiment_setup.py index 4fc145732ecbce653e617060a9b38edc359c7a43..ce43b38ef631c3dfbb176d4301c0a28349abe1f5 100644 --- a/src/experiment_setup.py +++ b/src/experiment_setup.py @@ -1,94 +1,90 @@ __author__ = "Lukas Leufen" __date__ = '2019-11-15' - -from src import helpers +from src import helpers, modules import logging import argparse +DEFAULT_STATIONS = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBY052', 'DEBY032', 'DEBW022', 'DEBY004', + 'DEBY020', 'DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW073', 'DEBY039', 'DEBW038', 'DEBW081', + 'DEBY075', 'DEBW040', 'DEBY053', 'DEBW059', 'DEBW027', 'DEBY072', 'DEBW042', 'DEBW039', 'DEBY001', + 'DEBY113', 'DEBY089', 'DEBW024', 'DEBW004', 'DEBY037', 'DEBW056', 'DEBW029', 'DEBY068', 'DEBW010', + 'DEBW026', 'DEBY002', 'DEBY079', 'DEBW084', 'DEBY049', 'DEBY031', 'DEBW019', 'DEBW001', 'DEBY063', + 'DEBY005', 'DEBW046', 'DEBW103', 'DEBW052', 'DEBW034', 'DEBY088', ] +DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', + 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values', + 'pblheight': 'maximum'} + -class ExperimentSetup(object): +class ExperimentSetup(modules.run): """ params: trainable: Train new model if true, otherwise try to load existing model """ - def __init__(self, parser_args, **kwargs): - self.args = self._set_parser_args(parser_args) - self.data_path = None - self.experiment_path = None - self.experiment_name = None - self.trainable = None - self.fraction_of_train = None - self.use_all_stations_on_all_data_sets = None - self.network = None - self.var_all_dict = None - self.stations = None - self.variables = None - self.dimensions = None - self.interpolate_dim = None - self.target_dim = None - self.target_var = None - self.train_kwargs = None - self.val_kwargs = None - self.test_kwargs = None - self.setup_experiment(**kwargs) - - def _set_param(self, param, value, default=None): - if default is not None: - value = value.get(param, default) - setattr(self, param, value) - logging.debug(f"set experiment attribute: {param}={value}") - - @staticmethod - def _set_parser_args(args): - """ - Transform args to dict if given as argparse.Namespace - :param args: - :return: - """ - if isinstance(args, argparse.Namespace): - return args.__dict__ - return args + def __init__(self, parser_args, var_all_dict=None, stations=None, network=None, variables=None, target_var="o3", + target_dim=None, dimensions=None, interpolate_dim=None, train_start=None, train_end=None, + val_start=None, val_end=None, test_start=None, test_end=None, use_all_stations_on_all_data_sets=True, + trainable=False, fraction_of_train=None): - def setup_experiment(self, **kwargs): + # create run framework + super().__init__() - # set data path of this experiment + # experiment setup self._set_param("data_path", helpers.prepare_host()) + self._set_param("trainable", trainable, default=False) + self._set_param("fraction_of_train", fraction_of_train, default=0.8) # set experiment name - exp_date = self.args.get("experiment_date") + exp_date = self._get_parser_args(parser_args).get("experiment_date") exp_name, exp_path = helpers.set_experiment_name(experiment_date=exp_date) self._set_param("experiment_name", exp_name) self._set_param("experiment_path", exp_path) - helpers.check_path_and_create(self.experiment_path) + helpers.check_path_and_create(self.data_store.get("experiment_path", "general")) + + # setup for data + self._set_param("var_all_dict", var_all_dict, default=DEFAULT_VAR_ALL_DICT) + self._set_param("stations", stations, default=DEFAULT_STATIONS) + self._set_param("network", network, default="AIRBASE") + self._set_param("variables", variables, default=list(self.data_store.get("var_all_dict", "general").keys())) + + # target + self._set_param("target_var", target_var, default="o3") + self._set_param("target_dim", target_dim, default='variables') - # set if model is trainable - self._set_param("trainable", kwargs, default=True) + # interpolation + self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']}) + self._set_param("interpolate_dim", interpolate_dim, default='datetime') - # set fraction of train - self._set_param("fraction_of_train", kwargs, default=0.8) + # train parameters + self._set_param("start", train_start, default="1997-01-01", scope="general.train") + self._set_param("end", train_end, default="2007-12-31", scope="general.train") + + # validation parameters + self._set_param("start", val_start, default="2008-01-01", scope="general.val") + self._set_param("end", val_end, default="2009-12-31", scope="general.val") + + # validation parameters + self._set_param("start", test_start, default="2010-01-01", scope="general.test") + self._set_param("end", test_end, default="2017-12-31", scope="general.test") # use all stations on all data sets (train, val, test) - self._set_param("use_all_stations_on_all_data_sets", kwargs, default=True) - self._set_param("network", kwargs, default="AIRBASE") - self._set_param("var_all_dict", kwargs, - default={'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', - 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values', - 'pblheight': 'maximum'}) - self._set_param("stations", kwargs, - default=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBY052', 'DEBY032', 'DEBW022', - 'DEBY004', 'DEBY020', 'DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW073', 'DEBY039', - 'DEBW038', 'DEBW081', 'DEBY075', 'DEBW040', 'DEBY053', 'DEBW059', 'DEBW027', 'DEBY072', - 'DEBW042', 'DEBW039', 'DEBY001', 'DEBY113', 'DEBY089', 'DEBW024', 'DEBW004', 'DEBY037', - 'DEBW056', 'DEBW029', 'DEBY068', 'DEBW010', 'DEBW026', 'DEBY002', 'DEBY079', 'DEBW084', - 'DEBY049', 'DEBY031', 'DEBW019', 'DEBW001', 'DEBY063', 'DEBY005', 'DEBW046', 'DEBW103', - 'DEBW052', 'DEBW034', 'DEBY088', ]) - self._set_param("variables", kwargs, default=list(self.var_all_dict.keys())) - self._set_param("dimensions", kwargs, default={'new_index': ['datetime', 'Stations']}) - self._set_param("interpolate_dim", kwargs, default='datetime') - self._set_param("target_dim", kwargs, default='variables') - self._set_param("target_var", kwargs, default="o3") - self._set_param("train_kwargs", kwargs, default={"start": "1997-01-01", "end": "2007-12-31"}) - self._set_param("val_kwargs", kwargs, default={"start": "2008-01-01", "end": "2009-12-31"}) - self._set_param("test_kwargs", kwargs, default={"start": "2010-01-01", "end": "2017-12-31"}) + self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, default=True) + + def _set_param(self, param, value, default=None, scope="general"): + if value is None and default is not None: + value = default + self.data_store.put(param, value, scope) + # setattr(self, param, value) + logging.debug(f"set experiment attribute: {param}({scope})={value}") + + @staticmethod + def _get_parser_args(args): + """ + Transform args to dict if given as argparse.Namespace + :param args: + :return: + """ + if isinstance(args, argparse.Namespace): + return args.__dict__ + return args diff --git a/src/modules.py b/src/modules.py index 01f7ed677c426ba7a6a0c180aeaef5de6257ae77..8e1b821e5851f8b00284949f454ef809fb1ad78d 100644 --- a/src/modules.py +++ b/src/modules.py @@ -2,7 +2,8 @@ from src.helpers import TimeTracking import logging import time from src.data_generator import DataGenerator -from src.experiment_setup import ExperimentSetup +# from src.experiment_setup import ExperimentSetup +from src.datastore import DataStoreByScope as DataStoreObject import argparse from typing import Dict, List, Any, Tuple @@ -14,6 +15,7 @@ class run(object): """ del_by_exit = False + data_store = DataStoreObject() def __init__(self): """ @@ -52,7 +54,7 @@ class PreProcessing(run): testing subsets. """ - def __init__(self, experiment_setup: ExperimentSetup): + def __init__(self, experiment_setup: Any): super().__init__() self.setup = experiment_setup self.kwargs = None @@ -190,6 +192,6 @@ if __name__ == "__main__": parser.add_argument('--experiment_date', metavar='--exp_date', type=str, nargs=1, default=None, help="set experiment date as string") parser_args = parser.parse_args() - with run(): - setup = ExperimentSetup(parser_args, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']) - PreProcessing(setup) + # with run(): + # setup = ExperimentSetup(parser_args, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']) + # PreProcessing(setup)