From 88c70c578a4f7c59d90147a70acf9c9e73ea3841 Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz-juelich.de> Date: Wed, 7 Apr 2021 10:34:32 +0200 Subject: [PATCH] multiprocessing can now be enabled in debug mode --- docs/_source/defaults.rst | 67 ++++++++++++------- mlair/configuration/defaults.py | 1 + mlair/run_modules/experiment_setup.py | 11 +-- .../test_run_modules/test_experiment_setup.py | 18 +++++ 4 files changed, 69 insertions(+), 28 deletions(-) diff --git a/docs/_source/defaults.rst b/docs/_source/defaults.rst index e95cf10e..3a25ae61 100644 --- a/docs/_source/defaults.rst +++ b/docs/_source/defaults.rst @@ -4,34 +4,34 @@ Defaults In this section, we explain which parameters are set by MLAir during the :py:`ExperimentSetup` if not specified by the user. This is important information for example if a new :ref:`Custom Data Handler` is implemented. -================================= =============== ============================================================ -parameter default comment -================================= =============== ============================================================ +================================= ======================== ============================================================ +parameter default comment +================================= ======================== ============================================================ batch_path -batch_size +batch_size :py:`512` bootstrap_path competitor_path -competitors -create_new_bootstraps -create_new_model -data_handler +competitors :py:`[]` +create_new_bootstraps :py:`False` +create_new_model :py:`True` +data_handler :py:`DefaultDataHandler` data_origin data_path -debug - MLAir checks if it is running in debug mode and stores this +debug ``-`` MLAir checks if it is running in debug mode and stores this dimensions -end -epochs -evaluate_bootstraps +end :py:`"2017-12-31"` +epochs :py:`20` This is just a placeholder to prevent unintended longish training +evaluate_bootstraps :py:`True` Bootstrapping can take some time. experiment_name experiment_path -extreme_values -extremes_on_right_tail_only +extreme_values :py:`None` +extremes_on_right_tail_only :py:`False` Could be used for skew distributions forecast_path fraction_of_training hostname hpc_hosts -interpolation_limit -interpolation_method +interpolation_limit :py:`1` +interpolation_method :py:`"linear"` logging_path login_nodes model_class @@ -39,10 +39,10 @@ model_path neighbors number_of_bootstraps overwrite_local_data -permute_data +permute_data :py:`False` plot_list plot_path -start +start ``"1997-01-01"`` stations statistics_per_var target_dim @@ -55,15 +55,34 @@ train_model train_end train_min_length train_start -transformation :py:`{}` implement all further transformation functionality - inside your custom data handler +transformation :py:`{}` implement all further transformation functionality + inside your custom data handler use_all_stations_on_all_data_sets -use_multiprocessing :py:`True` is set to False if MLAir is running in debug mode +use_multiprocessing :py:`True` is used if MLAir is not running in debug mode +use_multiprocessing_on_debug :py:`False` is used if MLAir is running in debug mode upsampling val_end val_min_length val_start variables -window_history_size -window_lead_time -================================= =============== ============================================================ +window_history_size :py:`13` +window_lead_time :py:`3` +================================= ==================== ============================================================ + + + +Test Table + + ++---------+---------+---------+ +| parameter | default | comment | ++---------+---------+---------+ +| batch_path | | +| batch_size | :py:`512` | | +| bootstrap_path | | +| competitor_path | | +| debug | ``-`` | MLAir checks if it is running in debug mode and stores this | +| dimensions | | +| end | :py:`"2017-12-31"` | | +| epochs | :py:`20` | This is just a placeholder to prevent unintended longish training | ++---------+---------+---------+ \ No newline at end of file diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py index 8805acfc..a874611a 100644 --- a/mlair/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -54,6 +54,7 @@ DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "", "pm10": "", "so2": ""} DEFAULT_USE_MULTIPROCESSING = True +DEFAULT_USE_MULTIPROCESSING_ON_DEBUG = False def get_defaults(): diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py index c777bcc4..24fedaa8 100644 --- a/mlair/run_modules/experiment_setup.py +++ b/mlair/run_modules/experiment_setup.py @@ -19,7 +19,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \ DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \ DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \ - DEFAULT_USE_MULTIPROCESSING + DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG from mlair.data_handler import DefaultDataHandler from mlair.run_modules.run_environment import RunEnvironment from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel @@ -215,7 +215,7 @@ class ExperimentSetup(RunEnvironment): create_new_bootstraps=None, data_path: str = None, batch_path: str = None, login_nodes=None, hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None, data_origin: Dict = None, competitors: list = None, competitor_path: str = None, - use_multiprocessing: bool = None, **kwargs): + use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None, **kwargs): # create run framework super().__init__() @@ -255,8 +255,11 @@ class ExperimentSetup(RunEnvironment): # host system setup debug_mode = sys.gettrace() is not None self._set_param("debug_mode", debug_mode) - use_multiprocessing = False if debug_mode is True else use_multiprocessing - self._set_param("use_multiprocessing", use_multiprocessing, default=DEFAULT_USE_MULTIPROCESSING) + if debug_mode is True: + self._set_param("use_multiprocessing", use_multiprocessing_on_debug, + default=DEFAULT_USE_MULTIPROCESSING_ON_DEBUG) + else: + self._set_param("use_multiprocessing", use_multiprocessing, default=DEFAULT_USE_MULTIPROCESSING) # batch path (temporary) self._set_param("batch_path", batch_path, default=os.path.join(experiment_path, "batch_data")) diff --git a/test/test_run_modules/test_experiment_setup.py b/test/test_run_modules/test_experiment_setup.py index 7c63d3d1..d7cd0887 100644 --- a/test/test_run_modules/test_experiment_setup.py +++ b/test/test_run_modules/test_experiment_setup.py @@ -3,6 +3,7 @@ import logging import os import pytest +import mock from mlair.helpers import TimeTracking, to_list from mlair.configuration.path_config import prepare_host @@ -185,3 +186,20 @@ class TestExperimentSetup: kwargs["variables"] = ["o3", "temp"] assert ExperimentSetup(**kwargs) is not None + + def test_multiprocessing_no_debug(self): + # no debug mode, parallel + exp_setup = ExperimentSetup(use_multiprocessing_on_debug=False) + assert exp_setup.data_store.get("use_multiprocessing") is True + # no debug mode, serial + exp_setup = ExperimentSetup(use_multiprocessing=False, use_multiprocessing_on_debug=True) + assert exp_setup.data_store.get("use_multiprocessing") is False + + @mock.patch("sys.gettrace", return_value="dummy_not_null") + def test_multiprocessing_debug(self, mock_gettrace): + # debug mode, parallel + exp_setup = ExperimentSetup(use_multiprocessing=False, use_multiprocessing_on_debug=True) + assert exp_setup.data_store.get("use_multiprocessing") is True + # debug mode, serial + exp_setup = ExperimentSetup(use_multiprocessing=True) + assert exp_setup.data_store.get("use_multiprocessing") is False -- GitLab