Skip to content
Snippets Groups Projects
Commit 5c72060a authored by lukas leufen's avatar lukas leufen
Browse files

Merge branch 'lukas_issue297_tech_multiprocessing-switch' into 'develop'

enhanced multiprocessing switch for debugging

See merge request !285
parents 447ade0f 4fa68402
Branches
Tags
5 merge requests!319add all changes of dev into release v1.4.0 branch,!318Resolve "release v1.4.0",!299Draft: Merge default data handler and preprocessing support parameter use_multiprocessing....,!285enhanced multiprocessing switch for debugging,!259Draft: Resolve "WRF-Datahandler should inherit from SingleStationDatahandler"
Pipeline #65182 passed
......@@ -4,66 +4,185 @@ Defaults
In this section, we explain which parameters are set by MLAir during the :py:`ExperimentSetup` if not specified by the
user. This is important information for example if a new :ref:`Custom Data Handler` is implemented.
================================= =============== ============================================================
parameter default comment
================================= =============== ============================================================
batch_path
batch_size
bootstrap_path
competitor_path
competitors
create_new_bootstraps
create_new_model
data_handler
data_origin
data_path
debug - MLAir checks if it is running in debug mode and stores this
dimensions
end
epochs
evaluate_bootstraps
experiment_name
experiment_path
extreme_values
extremes_on_right_tail_only
forecast_path
fraction_of_training
hostname
hpc_hosts
interpolation_limit
interpolation_method
logging_path
login_nodes
model_class
model_path
neighbors
number_of_bootstraps
overwrite_local_data
permute_data
plot_list
plot_path
start
stations
statistics_per_var
target_dim
target_var
test_start
test_end
test_min_length
time_dim
train_model
train_end
train_min_length
train_start
transformation :py:`{}` implement all further transformation functionality
inside your custom data handler
use_all_stations_on_all_data_sets
use_multiprocessing :py:`True` is set to False if MLAir is running in debug mode
upsampling
val_end
val_min_length
val_start
variables
window_history_size
window_lead_time
================================= =============== ============================================================
.. list-table:: Defaults Overview
:widths: 25 25 50
:header-rows: 1
* - parameter
- default
- comment
* - batch_path
-
-
* - batch_size
- :py:`512`
-
* - bootstrap_path
-
-
* - competitor_path
-
-
* - competitors
- :py:`[]`
-
* - create_new_bootstraps
- :py:`False`
-
* - create_new_model
- :py:`True`
-
* - data_handler
- :py:`DefaultDataHandler`
-
* - data_origin
-
-
* - data_path
-
-
* - debug
- ``-``
- MLAir checks if it is running in debug mode and stores this dimensions
* - end
- :py:`"2017-12-31"`
-
* - epochs
- :py:`20`
- This is just a placeholder to prevent unintended longish training
* - evaluate_bootstraps
- :py:`True`
- Bootstrapping may take some time.
* - experiment_name
-
-
* - experiment_path
-
-
* - extreme_values
- :py:`None`
-
* - extremes_on_right_tail_only
- :py:`False`
- Could be used for skewed distributions
* - forecast_path
-
-
* - fraction_of_training
-
-
* - hostname
-
-
* - hpc_hosts
-
-
* - interpolation_limit
- :py:`1`
-
* - interpolation_method
- :py:`"linear"`
-
* - logging_path
-
-
* - login_nodes
-
-
* - model_class
-
-
* - model_path
-
-
* - neighbors
-
-
* - number_of_bootstraps
-
-
* - overwrite_local_data
-
-
* - permute_data
- :py:`False`
-
* - plot_list
-
-
* - plot_path
-
-
* - start
- :py:`"1997-01-01"`
-
* - stations
-
-
* - statistics_per_var
-
-
* - target_dim
-
-
* - target_var
-
-
* - test_start
-
-
* - test_end
-
-
* - test_min_length
-
-
* - time_dim
-
-
* - train_model
-
-
* - train_end
-
-
* - train_min_length
-
-
* - train_start
-
-
* - transformation
- :py:`{}`
- implement all further transformation functionality inside your custom data handler
* - use_all_stations_on_all_data_sets
-
-
* - use_multiprocessing
- :py:`True`
- is used if MLAir is not running in debug mode
* - use_multiprocessing_on_debug
- :py:`False`
- is used if MLAir is running in debug mode
* - upsampling
-
-
* - val_end
-
-
* - val_min_length
-
-
* - val_start
-
-
* - variables
-
-
* - window_history_size
- :py:`13`
-
* - window_lead_time
- :py:`3`
-
......@@ -54,6 +54,7 @@ DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA
"temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", "no": "", "no2": "", "o3": "",
"pm10": "", "so2": ""}
DEFAULT_USE_MULTIPROCESSING = True
DEFAULT_USE_MULTIPROCESSING_ON_DEBUG = False
def get_defaults():
......
......@@ -19,7 +19,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \
DEFAULT_USE_MULTIPROCESSING
DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG
from mlair.data_handler import DefaultDataHandler
from mlair.run_modules.run_environment import RunEnvironment
from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel
......@@ -215,7 +215,7 @@ class ExperimentSetup(RunEnvironment):
create_new_bootstraps=None, data_path: str = None, batch_path: str = None, login_nodes=None,
hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None,
data_origin: Dict = None, competitors: list = None, competitor_path: str = None,
use_multiprocessing: bool = None, **kwargs):
use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None, **kwargs):
# create run framework
super().__init__()
......@@ -255,7 +255,10 @@ class ExperimentSetup(RunEnvironment):
# host system setup
debug_mode = sys.gettrace() is not None
self._set_param("debug_mode", debug_mode)
use_multiprocessing = False if debug_mode is True else use_multiprocessing
if debug_mode is True:
self._set_param("use_multiprocessing", use_multiprocessing_on_debug,
default=DEFAULT_USE_MULTIPROCESSING_ON_DEBUG)
else:
self._set_param("use_multiprocessing", use_multiprocessing, default=DEFAULT_USE_MULTIPROCESSING)
# batch path (temporary)
......
......@@ -3,6 +3,7 @@ import logging
import os
import pytest
import mock
from mlair.helpers import TimeTracking, to_list
from mlair.configuration.path_config import prepare_host
......@@ -185,3 +186,20 @@ class TestExperimentSetup:
kwargs["variables"] = ["o3", "temp"]
assert ExperimentSetup(**kwargs) is not None
def test_multiprocessing_no_debug(self):
# no debug mode, parallel
exp_setup = ExperimentSetup(use_multiprocessing_on_debug=False)
assert exp_setup.data_store.get("use_multiprocessing") is True
# no debug mode, serial
exp_setup = ExperimentSetup(use_multiprocessing=False, use_multiprocessing_on_debug=True)
assert exp_setup.data_store.get("use_multiprocessing") is False
@mock.patch("sys.gettrace", return_value="dummy_not_null")
def test_multiprocessing_debug(self, mock_gettrace):
# debug mode, parallel
exp_setup = ExperimentSetup(use_multiprocessing=False, use_multiprocessing_on_debug=True)
assert exp_setup.data_store.get("use_multiprocessing") is True
# debug mode, serial
exp_setup = ExperimentSetup(use_multiprocessing=True)
assert exp_setup.data_store.get("use_multiprocessing") is False
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment