From 3c05a5bdb8d32915f9982716ff1f2dd6bab8e7a3 Mon Sep 17 00:00:00 2001 From: lukas leufen <l.leufen@fz-juelich.de> Date: Thu, 12 Mar 2020 14:29:28 +0100 Subject: [PATCH] added min_length parameter to data handling, currently only supported for test set --- src/data_handling/data_preparation.py | 4 +++- src/run_modules/experiment_setup.py | 4 +++- src/run_modules/pre_processing.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/data_handling/data_preparation.py b/src/data_handling/data_preparation.py index 3fae0930..ff3006e3 100644 --- a/src/data_handling/data_preparation.py +++ b/src/data_handling/data_preparation.py @@ -353,7 +353,9 @@ class DataPrep(object): non_nan_observation = self.observation.dropna(dim=dim) intersect = reduce(np.intersect1d, (non_nan_history.coords[dim].values, non_nan_label.coords[dim].values, non_nan_observation.coords[dim].values)) - if len(intersect) == 0: + min_length = self.kwargs.get("min_length", 0) + length = len(intersect) + if len(intersect) < max(min_length, 1): self.history = None self.label = None self.observation = None diff --git a/src/run_modules/experiment_setup.py b/src/run_modules/experiment_setup.py index 56c22a81..387feaa8 100644 --- a/src/run_modules/experiment_setup.py +++ b/src/run_modules/experiment_setup.py @@ -34,7 +34,8 @@ class ExperimentSetup(RunEnvironment): limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, use_all_stations_on_all_data_sets=True, trainable=None, fraction_of_train=None, experiment_path=None, plot_path=None, forecast_path=None, overwrite_local_data=None, sampling="daily", - create_new_model=None, bootstrap_path=None, permute_data_on_training=None, transformation=None): + create_new_model=None, bootstrap_path=None, permute_data_on_training=None, transformation=None, + test_min_length=None): # create run framework super().__init__() @@ -107,6 +108,7 @@ class ExperimentSetup(RunEnvironment): # test set parameters self._set_param("start", test_start, default="2010-01-01", scope="general.test") self._set_param("end", test_end, default="2017-12-31", scope="general.test") + self._set_param("min_length", test_min_length, default=30, scope="general.test") # train_val set parameters self._set_param("start", self.data_store.get("start", "general.train"), scope="general.train_val") diff --git a/src/run_modules/pre_processing.py b/src/run_modules/pre_processing.py index 1d014c9e..20286bc4 100644 --- a/src/run_modules/pre_processing.py +++ b/src/run_modules/pre_processing.py @@ -11,7 +11,7 @@ from src.join import EmptyQueryResult from src.run_modules.run_environment import RunEnvironment DEFAULT_ARGS_LIST = ["data_path", "network", "stations", "variables", "interpolate_dim", "target_dim", "target_var"] -DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var", +DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var", "min_length", "station_type", "overwrite_local_data", "start", "end", "sampling", "transformation"] -- GitLab