From a1815b4921c20f9fa885c8900cdf937a17704ae5 Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz-juelich.de> Date: Wed, 31 Mar 2021 11:11:27 +0200 Subject: [PATCH] updated docs --- docs/_source/defaults.rst | 2 + mlair/run_modules/experiment_setup.py | 72 +++++++++++---------------- 2 files changed, 30 insertions(+), 44 deletions(-) diff --git a/docs/_source/defaults.rst b/docs/_source/defaults.rst index 775134f5..e95cf10e 100644 --- a/docs/_source/defaults.rst +++ b/docs/_source/defaults.rst @@ -17,6 +17,7 @@ create_new_model data_handler data_origin data_path +debug - MLAir checks if it is running in debug mode and stores this dimensions end epochs @@ -57,6 +58,7 @@ train_start transformation :py:`{}` implement all further transformation functionality inside your custom data handler use_all_stations_on_all_data_sets +use_multiprocessing :py:`True` is set to False if MLAir is running in debug mode upsampling val_end val_min_length diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py index f51cee8a..c777bcc4 100644 --- a/mlair/run_modules/experiment_setup.py +++ b/mlair/run_modules/experiment_setup.py @@ -64,48 +64,6 @@ class ExperimentSetup(RunEnvironment): * `target_dim` [.] * `window_lead_time` [.] - # interpolation - self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']}) - self._set_param("time_dim", time_dim, default='datetime') - self._set_param("interpolation_method", interpolation_method, default='linear') - self._set_param("limit_nan_fill", limit_nan_fill, default=1) - - # train set parameters - self._set_param("start", train_start, default="1997-01-01", scope="train") - self._set_param("end", train_end, default="2007-12-31", scope="train") - self._set_param("min_length", train_min_length, default=90, scope="train") - - # validation set parameters - self._set_param("start", val_start, default="2008-01-01", scope="val") - self._set_param("end", val_end, default="2009-12-31", scope="val") - self._set_param("min_length", val_min_length, default=90, scope="val") - - # test set parameters - self._set_param("start", test_start, default="2010-01-01", scope="test") - self._set_param("end", test_end, default="2017-12-31", scope="test") - self._set_param("min_length", test_min_length, default=90, scope="test") - - # train_val set parameters - self._set_param("start", self.data_store.get("start", "train"), scope="train_val") - self._set_param("end", self.data_store.get("end", "val"), scope="train_val") - train_val_min_length = sum([self.data_store.get("min_length", s) for s in ["train", "val"]]) - self._set_param("min_length", train_val_min_length, default=180, scope="train_val") - - # use all stations on all data sets (train, val, test) - self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, default=True) - - # set post-processing instructions - self._set_param("evaluate_bootstraps", evaluate_bootstraps, scope="general.postprocessing") - create_new_bootstraps = max([self.data_store.get("train_model", "general"), create_new_bootstraps or False]) - self._set_param("create_new_bootstraps", create_new_bootstraps, scope="general.postprocessing") - self._set_param("number_of_bootstraps", number_of_bootstraps, default=20, scope="general.postprocessing") - self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing") - - # check variables, statistics and target variable - self._check_target_var() - self._compare_variables_and_statistics() - - Creates * plot of model architecture in `<model_name>.pdf` @@ -137,8 +95,11 @@ class ExperimentSetup(RunEnvironment): predicted. :param dimensions: :param time_dim: - :param interpolation_method: - :param limit_nan_fill: + :param interpolation_method: The method to use for interpolation. + :param interpolation_limit: The maximum number of subsequent time steps in a gap to fill by interpolation. If the + gap exceeds this number, the gap is not filled by interpolation at all. The value of time steps is an arbitrary + number that is applied depending on the `sampling` frequency. A limit of 2 means that either 2 hours or 2 days + are allowed to be interpolated in dependency of the set sampling rate. :param train_start: :param train_end: :param val_start: @@ -199,6 +160,29 @@ class ExperimentSetup(RunEnvironment): :param data_path: path to find and store meteorological and environmental / air quality data. Leave this parameter empty, if your host system is known and a suitable path was already hardcoded in the program (see :py:func:`prepare host <src.configuration.path_config.prepare_host>`). + :param experiment_date: + :param window_dim: "Temporal" dimension of the input and target data, that is provided for each sample. The number + of samples provided in this dimension can be set using `window_history_size` for inputs and `window_lead_time` + on target site. + :param iter_dim: + :param batch_path: + :param login_nodes: + :param hpc_hosts: + :param model: + :param batch_size: + :param epochs: Number of epochs used in training. If a training is resumed and the number of epochs of the already + (partly) trained model is lower than this parameter, training is continue. In case this number is higher than + the given epochs parameter, no training is resumed. Epochs is set to 20 per default, but this value is just a + placeholder that should be adjusted for a meaningful training. + :param data_handler: + :param data_origin: + :param competitors: Provide names of reference models trained by MLAir that can be found in the `competitor_path`. + These models will be used in the postprocessing for comparison. + :param competitor_path: The path where MLAir can find competing models. If not provided, this path is assumed to be + in the ´data_path´ directory as a subdirectory called `competitors` (default). + :param use_multiprocessing: Enable parallel preprocessing (postprocessing not implemented yet) by setting this + parameter to `True` (default). If set to `False` the computation is performed in an serial approach. + Multiprocessing is disabled when running in debug mode and cannot be switched on. """ -- GitLab