Merge branch 'lukas_issue297_tech_multiprocessing-switch' into 'develop'

enhanced multiprocessing switch for debugging See merge request !285

Merge branch 'lukas_issue297_tech_multiprocessing-switch' into 'develop'
5c72060a · lukas leufen · 447ade0f · 4fa68402 · 5c72060a · 5c72060a
Commit 5c72060a authored 4 years ago by lukas leufen
--- a/docs/_source/defaults.rst
+++ b/docs/_source/defaults.rst
@@ -4,66 +4,185 @@ Defaults
 In this section, we explain which parameters are set by MLAir during the :py:`ExperimentSetup` if not specified by the
 user. This is important information for example if a new :ref:`Custom Data Handler` is implemented.

-================================= =============== ============================================================
-parameter                         default         comment
-================================= =============== ============================================================
-batch_path
-batch_size
-bootstrap_path
-competitor_path
-competitors
-create_new_bootstraps
-create_new_model
-data_handler
-data_origin
-data_path
-debug                             -               MLAir checks if it is running in debug mode and stores this
-dimensions
-end
-epochs
-evaluate_bootstraps
-experiment_name
-experiment_path
-extreme_values
-extremes_on_right_tail_only
-forecast_path
-fraction_of_training
-hostname
-hpc_hosts
-interpolation_limit
-interpolation_method
-logging_path
-login_nodes
-model_class
-model_path
-neighbors
-number_of_bootstraps
-overwrite_local_data
-permute_data
-plot_list
-plot_path
-start
-stations
-statistics_per_var
-target_dim
-target_var
-test_start
-test_end
-test_min_length
-time_dim
-train_model
-train_end
-train_min_length
-train_start
-transformation                    :py:`{}`        implement all further transformation functionality
-                                                  inside your custom data handler
-use_all_stations_on_all_data_sets
-use_multiprocessing               :py:`True`      is set to False if MLAir is running in debug mode
-upsampling
-val_end
-val_min_length
-val_start
-variables
-window_history_size
-window_lead_time
-================================= =============== ============================================================
+
+.. list-table:: Defaults Overview
+   :widths: 25 25 50
+   :header-rows: 1
+
+   * - parameter
+     - default
+     - comment
+   * - batch_path
+     -
+     -
+   * - batch_size
+     - :py:`512`
+     -
+   * - bootstrap_path
+     -
+     -
+   * - competitor_path
+     -
+     -
+   * - competitors
+     - :py:`[]`
+     -
+   * - create_new_bootstraps
+     - :py:`False`
+     -
+   * - create_new_model
+     - :py:`True`
+     -
+   * - data_handler
+     - :py:`DefaultDataHandler`
+     -
+   * - data_origin
+     -
+     -
+   * - data_path
+     -
+     -
+   * - debug
+     - ``-``
+     - MLAir checks if it is running in debug mode and stores this dimensions
+   * - end
+     - :py:`"2017-12-31"`
+     -
+   * - epochs
+     - :py:`20`
+     - This is just a placeholder to prevent unintended longish training
+   * - evaluate_bootstraps
+     - :py:`True`
+     - Bootstrapping may take some time.
+   * - experiment_name
+     -
+     -
+   * - experiment_path
+     -
+     -
+   * - extreme_values
+     - :py:`None`
+     -
+   * - extremes_on_right_tail_only
+     - :py:`False`
+     - Could be used for skewed distributions
+   * - forecast_path
+     -
+     -
+   * - fraction_of_training
+     -
+     -
+   * - hostname
+     -
+     -
+   * - hpc_hosts
+     -
+     -
+   * - interpolation_limit
+     - :py:`1`
+     -
+   * - interpolation_method
+     - :py:`"linear"`
+     -
+   * - logging_path
+     -
+     -
+   * - login_nodes
+     -
+     -
+   * - model_class
+     -
+     -
+   * - model_path
+     -
+     -
+   * - neighbors
+     -
+     -
+   * - number_of_bootstraps
+     -
+     -
+   * - overwrite_local_data
+     -
+     -
+   * - permute_data
+     - :py:`False`
+     -
+   * - plot_list
+     -
+     -
+   * - plot_path
+     -
+     -
+   * - start
+     - :py:`"1997-01-01"`
+     -
+   * - stations
+     -
+     -
+   * - statistics_per_var
+     -
+     -
+   * - target_dim
+     -
+     -
+   * - target_var
+     -
+     -
+   * - test_start
+     -
+     -
+   * - test_end
+     -
+     -
+   * - test_min_length
+     -
+     -
+   * - time_dim
+     -
+     -
+   * - train_model
+     -
+     -
+   * - train_end
+     -
+     -
+   * - train_min_length
+     -
+     -
+   * - train_start
+     -
+     -
+   * - transformation
+     - :py:`{}`
+     - implement all further transformation functionality inside your custom data handler
+   * - use_all_stations_on_all_data_sets
+     -
+     -
+   * - use_multiprocessing
+     - :py:`True`
+     - is used if MLAir is not running in debug mode
+   * - use_multiprocessing_on_debug
+     - :py:`False`
+     - is used if MLAir is running in debug mode
+   * - upsampling
+     -
+     -
+   * - val_end
+     -
+     -
+   * - val_min_length
+     -
+     -
+   * - val_start
+     -
+     -
+   * - variables
+     -
+     -
+   * - window_history_size
+     - :py:`13`
+     -
+   * - window_lead_time
+     - :py:`3`
+     -
--- a/mlair/configuration/defaults.py
+++ b/mlair/configuration/defaults.py
@@ -54,6 +54,7 @@ DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA
                       "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "",
                       "pm10": "", "so2": ""}
 DEFAULT_USE_MULTIPROCESSING = True
+DEFAULT_USE_MULTIPROCESSING_ON_DEBUG = False


 def get_defaults():

--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -19,7 +19,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
    DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
    DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
    DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \
-    DEFAULT_USE_MULTIPROCESSING
+    DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG
 from mlair.data_handler import DefaultDataHandler
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel
@@ -215,7 +215,7 @@ class ExperimentSetup(RunEnvironment):
                 create_new_bootstraps=None, data_path: str = None, batch_path: str = None, login_nodes=None,
                 hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None,
                 data_origin: Dict = None, competitors: list = None, competitor_path: str = None,
-                 use_multiprocessing: bool = None, **kwargs):
+                 use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None, **kwargs):

        # create run framework
        super().__init__()
@@ -255,7 +255,10 @@ class ExperimentSetup(RunEnvironment):
        # host system setup
        debug_mode = sys.gettrace() is not None
        self._set_param("debug_mode", debug_mode)
-        use_multiprocessing = False if debug_mode is True else use_multiprocessing
+        if debug_mode is True:
+            self._set_param("use_multiprocessing", use_multiprocessing_on_debug,
+                            default=DEFAULT_USE_MULTIPROCESSING_ON_DEBUG)
+        else:
            self._set_param("use_multiprocessing", use_multiprocessing, default=DEFAULT_USE_MULTIPROCESSING)

        # batch path (temporary)

--- a/test/test_run_modules/test_experiment_setup.py
+++ b/test/test_run_modules/test_experiment_setup.py
@@ -3,6 +3,7 @@ import logging
 import os

 import pytest
+import mock

 from mlair.helpers import TimeTracking, to_list
 from mlair.configuration.path_config import prepare_host
@@ -185,3 +186,20 @@ class TestExperimentSetup:

        kwargs["variables"] = ["o3", "temp"]
        assert ExperimentSetup(**kwargs) is not None
+
+    def test_multiprocessing_no_debug(self):
+        # no debug mode, parallel
+        exp_setup = ExperimentSetup(use_multiprocessing_on_debug=False)
+        assert exp_setup.data_store.get("use_multiprocessing") is True
+        # no debug mode, serial
+        exp_setup = ExperimentSetup(use_multiprocessing=False, use_multiprocessing_on_debug=True)
+        assert exp_setup.data_store.get("use_multiprocessing") is False
+
+    @mock.patch("sys.gettrace", return_value="dummy_not_null")
+    def test_multiprocessing_debug(self, mock_gettrace):
+        # debug mode, parallel
+        exp_setup = ExperimentSetup(use_multiprocessing=False, use_multiprocessing_on_debug=True)
+        assert exp_setup.data_store.get("use_multiprocessing") is True
+        # debug mode, serial
+        exp_setup = ExperimentSetup(use_multiprocessing=True)
+        assert exp_setup.data_store.get("use_multiprocessing") is False