diff --git a/docs/_source/defaults.rst b/docs/_source/defaults.rst index e95cf10eb8b53e776a2607dafba52fd1edad98ca..360c9ad1269213efe42a252fc901b36eb223e2a5 100644 --- a/docs/_source/defaults.rst +++ b/docs/_source/defaults.rst @@ -4,66 +4,185 @@ Defaults In this section, we explain which parameters are set by MLAir during the :py:`ExperimentSetup` if not specified by the user. This is important information for example if a new :ref:`Custom Data Handler` is implemented. -================================= =============== ============================================================ -parameter default comment -================================= =============== ============================================================ -batch_path -batch_size -bootstrap_path -competitor_path -competitors -create_new_bootstraps -create_new_model -data_handler -data_origin -data_path -debug - MLAir checks if it is running in debug mode and stores this -dimensions -end -epochs -evaluate_bootstraps -experiment_name -experiment_path -extreme_values -extremes_on_right_tail_only -forecast_path -fraction_of_training -hostname -hpc_hosts -interpolation_limit -interpolation_method -logging_path -login_nodes -model_class -model_path -neighbors -number_of_bootstraps -overwrite_local_data -permute_data -plot_list -plot_path -start -stations -statistics_per_var -target_dim -target_var -test_start -test_end -test_min_length -time_dim -train_model -train_end -train_min_length -train_start -transformation :py:`{}` implement all further transformation functionality - inside your custom data handler -use_all_stations_on_all_data_sets -use_multiprocessing :py:`True` is set to False if MLAir is running in debug mode -upsampling -val_end -val_min_length -val_start -variables -window_history_size -window_lead_time -================================= =============== ============================================================ + +.. list-table:: Defaults Overview + :widths: 25 25 50 + :header-rows: 1 + + * - parameter + - default + - comment + * - batch_path + - + - + * - batch_size + - :py:`512` + - + * - bootstrap_path + - + - + * - competitor_path + - + - + * - competitors + - :py:`[]` + - + * - create_new_bootstraps + - :py:`False` + - + * - create_new_model + - :py:`True` + - + * - data_handler + - :py:`DefaultDataHandler` + - + * - data_origin + - + - + * - data_path + - + - + * - debug + - ``-`` + - MLAir checks if it is running in debug mode and stores this dimensions + * - end + - :py:`"2017-12-31"` + - + * - epochs + - :py:`20` + - This is just a placeholder to prevent unintended longish training + * - evaluate_bootstraps + - :py:`True` + - Bootstrapping may take some time. + * - experiment_name + - + - + * - experiment_path + - + - + * - extreme_values + - :py:`None` + - + * - extremes_on_right_tail_only + - :py:`False` + - Could be used for skewed distributions + * - forecast_path + - + - + * - fraction_of_training + - + - + * - hostname + - + - + * - hpc_hosts + - + - + * - interpolation_limit + - :py:`1` + - + * - interpolation_method + - :py:`"linear"` + - + * - logging_path + - + - + * - login_nodes + - + - + * - model_class + - + - + * - model_path + - + - + * - neighbors + - + - + * - number_of_bootstraps + - + - + * - overwrite_local_data + - + - + * - permute_data + - :py:`False` + - + * - plot_list + - + - + * - plot_path + - + - + * - start + - :py:`"1997-01-01"` + - + * - stations + - + - + * - statistics_per_var + - + - + * - target_dim + - + - + * - target_var + - + - + * - test_start + - + - + * - test_end + - + - + * - test_min_length + - + - + * - time_dim + - + - + * - train_model + - + - + * - train_end + - + - + * - train_min_length + - + - + * - train_start + - + - + * - transformation + - :py:`{}` + - implement all further transformation functionality inside your custom data handler + * - use_all_stations_on_all_data_sets + - + - + * - use_multiprocessing + - :py:`True` + - is used if MLAir is not running in debug mode + * - use_multiprocessing_on_debug + - :py:`False` + - is used if MLAir is running in debug mode + * - upsampling + - + - + * - val_end + - + - + * - val_min_length + - + - + * - val_start + - + - + * - variables + - + - + * - window_history_size + - :py:`13` + - + * - window_lead_time + - :py:`3` + - diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py index 8805acfc99d2064b656e3fc80c95a6de198acf29..a874611a42cbfb4ce4e663f3acad6fc4eed04607 100644 --- a/mlair/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -54,6 +54,7 @@ DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "", "pm10": "", "so2": ""} DEFAULT_USE_MULTIPROCESSING = True +DEFAULT_USE_MULTIPROCESSING_ON_DEBUG = False def get_defaults(): diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py index c777bcc4d568862485d733ca42f2ea38c52799eb..24fedaa82615f93941ee737f13981e0c334259a9 100644 --- a/mlair/run_modules/experiment_setup.py +++ b/mlair/run_modules/experiment_setup.py @@ -19,7 +19,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \ DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \ DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \ - DEFAULT_USE_MULTIPROCESSING + DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG from mlair.data_handler import DefaultDataHandler from mlair.run_modules.run_environment import RunEnvironment from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel @@ -215,7 +215,7 @@ class ExperimentSetup(RunEnvironment): create_new_bootstraps=None, data_path: str = None, batch_path: str = None, login_nodes=None, hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None, data_origin: Dict = None, competitors: list = None, competitor_path: str = None, - use_multiprocessing: bool = None, **kwargs): + use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None, **kwargs): # create run framework super().__init__() @@ -255,8 +255,11 @@ class ExperimentSetup(RunEnvironment): # host system setup debug_mode = sys.gettrace() is not None self._set_param("debug_mode", debug_mode) - use_multiprocessing = False if debug_mode is True else use_multiprocessing - self._set_param("use_multiprocessing", use_multiprocessing, default=DEFAULT_USE_MULTIPROCESSING) + if debug_mode is True: + self._set_param("use_multiprocessing", use_multiprocessing_on_debug, + default=DEFAULT_USE_MULTIPROCESSING_ON_DEBUG) + else: + self._set_param("use_multiprocessing", use_multiprocessing, default=DEFAULT_USE_MULTIPROCESSING) # batch path (temporary) self._set_param("batch_path", batch_path, default=os.path.join(experiment_path, "batch_data")) diff --git a/test/test_run_modules/test_experiment_setup.py b/test/test_run_modules/test_experiment_setup.py index 7c63d3d101176a40749ce903f569263b9c884d5e..d7cd08879c7b480650d9d1b3f337cc7a33955ad3 100644 --- a/test/test_run_modules/test_experiment_setup.py +++ b/test/test_run_modules/test_experiment_setup.py @@ -3,6 +3,7 @@ import logging import os import pytest +import mock from mlair.helpers import TimeTracking, to_list from mlair.configuration.path_config import prepare_host @@ -185,3 +186,20 @@ class TestExperimentSetup: kwargs["variables"] = ["o3", "temp"] assert ExperimentSetup(**kwargs) is not None + + def test_multiprocessing_no_debug(self): + # no debug mode, parallel + exp_setup = ExperimentSetup(use_multiprocessing_on_debug=False) + assert exp_setup.data_store.get("use_multiprocessing") is True + # no debug mode, serial + exp_setup = ExperimentSetup(use_multiprocessing=False, use_multiprocessing_on_debug=True) + assert exp_setup.data_store.get("use_multiprocessing") is False + + @mock.patch("sys.gettrace", return_value="dummy_not_null") + def test_multiprocessing_debug(self, mock_gettrace): + # debug mode, parallel + exp_setup = ExperimentSetup(use_multiprocessing=False, use_multiprocessing_on_debug=True) + assert exp_setup.data_store.get("use_multiprocessing") is True + # debug mode, serial + exp_setup = ExperimentSetup(use_multiprocessing=True) + assert exp_setup.data_store.get("use_multiprocessing") is False