diff --git a/src/run_modules/pre_processing.py b/src/run_modules/pre_processing.py index 1d014c9e6f4fc0a9168c4d3d31b1141c39fff2a1..ee764174a86ef1c72e5f13d0db66502eda5d72a2 100644 --- a/src/run_modules/pre_processing.py +++ b/src/run_modules/pre_processing.py @@ -36,7 +36,7 @@ class PreProcessing(RunEnvironment): args = self.data_store.create_args_dict(DEFAULT_ARGS_LIST, scope="general.preprocessing") kwargs = self.data_store.create_args_dict(DEFAULT_KWARGS_LIST, scope="general.preprocessing") stations = self.data_store.get("stations", "general") - valid_stations = self.check_valid_stations(args, kwargs, stations, load_tmp=False, save_tmp=False) + valid_stations = self.check_valid_stations(args, kwargs, stations, load_tmp=False, save_tmp=False, name="all") self.data_store.set("stations", valid_stations, "general") self.split_train_val_test() self.report_pre_processing() @@ -107,7 +107,7 @@ class PreProcessing(RunEnvironment): else: set_stations = stations[index_list] logging.debug(f"{set_name.capitalize()} stations (len={len(set_stations)}): {set_stations}") - set_stations = self.check_valid_stations(args, kwargs, set_stations, load_tmp=False) + set_stations = self.check_valid_stations(args, kwargs, set_stations, load_tmp=False, name=set_name) self.data_store.set("stations", set_stations, scope) set_args = self.data_store.create_args_dict(DEFAULT_ARGS_LIST, scope) data_set = DataGenerator(**set_args, **kwargs) @@ -116,7 +116,7 @@ class PreProcessing(RunEnvironment): self.data_store.set("transformation", data_set.transformation, "general") @staticmethod - def check_valid_stations(args: Dict, kwargs: Dict, all_stations: List[str], load_tmp=True, save_tmp=True): + def check_valid_stations(args: Dict, kwargs: Dict, all_stations: List[str], load_tmp=True, save_tmp=True, name=None): """ Check if all given stations in `all_stations` are valid. Valid means, that there is data available for the given time range (is included in `kwargs`). The shape and the loading time are logged in debug mode. @@ -129,7 +129,7 @@ class PreProcessing(RunEnvironment): """ t_outer = TimeTracking() t_inner = TimeTracking(start=False) - logging.info("check valid stations started") + logging.info(f"check valid stations started{' (%s)' % name if name else name}") valid_stations = [] # all required arguments of the DataGenerator can be found in args, positional arguments in args and kwargs