diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py
index 785aab88992e84a84ab4144040597922a48e5134..bfbef52180b5c8156acae0b7005e8ad984ae1cba 100644
--- a/mlair/configuration/defaults.py
+++ b/mlair/configuration/defaults.py
@@ -55,6 +55,7 @@ DEFAULT_DATA_ORIGIN = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA
                        "pm10": "", "so2": ""}
 DEFAULT_USE_MULTIPROCESSING = True
 DEFAULT_USE_MULTIPROCESSING_ON_DEBUG = False
+DEFAULT_MAX_NUMBER_MULTIPROCESSING = 16
 
 
 def get_defaults():
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index f70f454b2cc2cd9113565c4403969d08fa47072a..a17de95407a74d1504877fdce03a82d1c943e868 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -37,11 +37,12 @@ class DefaultDataHandler(AbstractDataHandler):
 
     DEFAULT_ITER_DIM = "Stations"
     DEFAULT_TIME_DIM = "datetime"
+    MAX_NUMBER_MULTIPROCESSING = 16
 
     def __init__(self, id_class: data_handler, experiment_path: str, min_length: int = 0,
                  extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None,
                  store_processed_data=True, iter_dim=DEFAULT_ITER_DIM, time_dim=DEFAULT_TIME_DIM,
-                 use_multiprocessing=True):
+                 use_multiprocessing=True, max_number_multiprocessing=MAX_NUMBER_MULTIPROCESSING):
         super().__init__()
         self.id_class = id_class
         self.time_dim = time_dim
@@ -52,6 +53,7 @@ class DefaultDataHandler(AbstractDataHandler):
         self._X_extreme = None
         self._Y_extreme = None
         self._use_multiprocessing = use_multiprocessing
+        self._max_number_multiprocessing = max_number_multiprocessing
         _name_affix = str(f"{str(self.id_class)}_{name_affix}" if name_affix is not None else id(self))
         self._save_file = os.path.join(experiment_path, "data", f"{_name_affix}.pickle")
         self._collection = self._create_collection()
@@ -301,7 +303,9 @@ class DefaultDataHandler(AbstractDataHandler):
                         if "feature_range" in opts.keys():
                             transformation_dict[i][var]["feature_range"] = opts.get("feature_range", None)
 
-        if multiprocessing.cpu_count() > 1 and kwargs.get("use_multiprocessing", True) is True:  # parallel solution
+        max_process = kwargs.get("max_number_multiprocessing", 16)
+        n_process = min([psutil.cpu_count(logical=False), len(set_stations), max_process])  # use only physical cpus
+        if n_process > 1 and kwargs.get("use_multiprocessing", True) is True:  # parallel solution
             logging.info("use parallel transformation approach")
             pool = multiprocessing.Pool(
                 min([psutil.cpu_count(logical=False), len(set_stations), 16]))  # use only physical cpus
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index 24fedaa82615f93941ee737f13981e0c334259a9..bd06914f3f7c2e8f745afbd4998eed68964b6fa1 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -19,7 +19,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
     DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
     DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
     DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST, DEFAULT_SAMPLING, DEFAULT_DATA_ORIGIN, DEFAULT_ITER_DIM, \
-    DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG
+    DEFAULT_USE_MULTIPROCESSING, DEFAULT_USE_MULTIPROCESSING_ON_DEBUG, DEFAULT_MAX_NUMBER_MULTIPROCESSING
 from mlair.data_handler import DefaultDataHandler
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.model_modules.fully_connected_networks import FCN_64_32_16 as VanillaModel
@@ -215,7 +215,8 @@ class ExperimentSetup(RunEnvironment):
                  create_new_bootstraps=None, data_path: str = None, batch_path: str = None, login_nodes=None,
                  hpc_hosts=None, model=None, batch_size=None, epochs=None, data_handler=None,
                  data_origin: Dict = None, competitors: list = None, competitor_path: str = None,
-                 use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None, **kwargs):
+                 use_multiprocessing: bool = None, use_multiprocessing_on_debug: bool = None,
+                 max_number_multiprocessing: int = None, **kwargs):
 
         # create run framework
         super().__init__()
@@ -260,6 +261,8 @@ class ExperimentSetup(RunEnvironment):
                             default=DEFAULT_USE_MULTIPROCESSING_ON_DEBUG)
         else:
             self._set_param("use_multiprocessing", use_multiprocessing, default=DEFAULT_USE_MULTIPROCESSING)
+        self._set_param("max_number_multiprocessing", max_number_multiprocessing,
+                        default=DEFAULT_MAX_NUMBER_MULTIPROCESSING)
 
         # batch path (temporary)
         self._set_param("batch_path", batch_path, default=os.path.join(experiment_path, "batch_data"))
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index db9d1d5ef41dfa7068c630d1f563b39d95eee06f..11d732760d99a8ac1c9b94e6e87378f21fe8b825 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -243,10 +243,11 @@ class PreProcessing(RunEnvironment):
         kwargs = self.data_store.create_args_dict(data_handler.requirements(), scope=set_name)
         use_multiprocessing = self.data_store.get("use_multiprocessing")
 
-        if multiprocessing.cpu_count() > 1 and use_multiprocessing:  # parallel solution
+        max_process = self.data_store.get("max_number_multiprocessing")
+        n_process = min([psutil.cpu_count(logical=False), len(set_stations), max_process])  # use only physical cpus
+        if n_process > 1 and use_multiprocessing:  # parallel solution
             logging.info("use parallel validate station approach")
-            pool = multiprocessing.Pool(
-                min([psutil.cpu_count(logical=False), len(set_stations), 16]))  # use only physical cpus
+            pool = multiprocessing.Pool(n_process)
             logging.info(f"running {getattr(pool, '_processes')} processes in parallel")
             output = [
                 pool.apply_async(f_proc, args=(data_handler, station, set_name, store_processed_data), kwds=kwargs)