From 1de5655bafe0a75c7933d93bed7163dba49a26ed Mon Sep 17 00:00:00 2001 From: lukas leufen <l.leufen@fz-juelich.de> Date: Mon, 13 Jul 2020 17:18:03 +0200 Subject: [PATCH] renamed interpolate to interpolation --- mlair/configuration/defaults.py | 4 ++-- mlair/data_handling/data_generator.py | 24 +++++++++---------- mlair/run_modules/experiment_setup.py | 18 +++++++------- mlair/run_modules/pre_processing.py | 6 ++--- .../test_data_handling/test_data_generator.py | 6 ++--- test/test_modules/test_experiment_setup.py | 10 ++++---- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py index 0038bb55..3f38e14f 100644 --- a/mlair/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -28,8 +28,8 @@ DEFAULT_TARGET_VAR = "o3" DEFAULT_TARGET_DIM = "variables" DEFAULT_WINDOW_LEAD_TIME = 3 DEFAULT_DIMENSIONS = {"new_index": ["datetime", "Stations"]} -DEFAULT_INTERPOLATE_DIM = "datetime" -DEFAULT_INTERPOLATE_METHOD = "linear" +DEFAULT_INTERPOLATION_DIM = "datetime" +DEFAULT_INTERPOLATION_METHOD = "linear" DEFAULT_LIMIT_NAN_FILL = 1 DEFAULT_TRAIN_START = "1997-01-01" DEFAULT_TRAIN_END = "2007-12-31" diff --git a/mlair/data_handling/data_generator.py b/mlair/data_handling/data_generator.py index c0a32771..0088d00a 100644 --- a/mlair/data_handling/data_generator.py +++ b/mlair/data_handling/data_generator.py @@ -58,8 +58,8 @@ class DataGenerator(keras.utils.Sequence): """ def __init__(self, data_path: str, stations: Union[str, List[str]], variables: List[str], - interpolate_dim: str, target_dim: str, target_var: str, station_type: str = None, - interpolate_method: str = "linear", limit_nan_fill: int = 1, window_history_size: int = 7, + interpolation_dim: str, target_dim: str, target_var: str, station_type: str = None, + interpolation_method: str = "linear", limit_nan_fill: int = 1, window_history_size: int = 7, window_lead_time: int = 4, transformation: Dict = None, extreme_values: num_or_list = None, data_preparation=None, **kwargs): """ @@ -68,11 +68,11 @@ class DataGenerator(keras.utils.Sequence): :param data_path: path to data :param stations: list with all stations to include :param variables: list with all used variables - :param interpolate_dim: dimension along which interpolation is applied + :param interpolation_dim: dimension along which interpolation is applied :param target_dim: dimension of target variable :param target_var: name of target variable :param station_type: TOAR station type classification (background, traffic) - :param interpolate_method: method of interpolation + :param interpolation_method: method of interpolation :param limit_nan_fill: maximum gab in data to fill by interpolation :param window_history_size: length of the history window :param window_lead_time: lenght of the label window @@ -87,11 +87,11 @@ class DataGenerator(keras.utils.Sequence): os.makedirs(self.data_path_tmp) self.stations = helpers.to_list(stations) self.variables = variables - self.interpolate_dim = interpolate_dim + self.interpolation_dim = interpolation_dim self.target_dim = target_dim self.target_var = target_var self.station_type = station_type - self.interpolate_method = interpolate_method + self.interpolation_method = interpolation_method self.limit_nan_fill = limit_nan_fill self.window_history_size = window_history_size self.window_lead_time = window_lead_time @@ -104,7 +104,7 @@ class DataGenerator(keras.utils.Sequence): """Display all class attributes.""" return f"DataGenerator(path='{self.data_path}', stations={self.stations}, " \ f"variables={self.variables}, station_type={self.station_type}, " \ - f"interpolate_dim='{self.interpolate_dim}', target_dim='{self.target_dim}', " \ + f"interpolation_dim='{self.interpolation_dim}', target_dim='{self.target_dim}', " \ f"target_var='{self.target_var}', **{self.kwargs})" def __len__(self): @@ -292,11 +292,11 @@ class DataGenerator(keras.utils.Sequence): **self.kwargs) if self.transformation is not None: data.transform("datetime", **helpers.remove_items(self.transformation, "scope")) - data.interpolate(self.interpolate_dim, method=self.interpolate_method, limit=self.limit_nan_fill) - data.make_history_window(self.target_dim, self.window_history_size, self.interpolate_dim) - data.make_labels(self.target_dim, self.target_var, self.interpolate_dim, self.window_lead_time) - data.make_observation(self.target_dim, self.target_var, self.interpolate_dim) - data.remove_nan(self.interpolate_dim) + data.interpolate(self.interpolation_dim, method=self.interpolation_method, limit=self.limit_nan_fill) + data.make_history_window(self.target_dim, self.window_history_size, self.interpolation_dim) + data.make_labels(self.target_dim, self.target_var, self.interpolation_dim, self.window_lead_time) + data.make_observation(self.target_dim, self.target_var, self.interpolation_dim) + data.remove_nan(self.interpolation_dim) if self.extreme_values is not None: kwargs = {"extremes_on_right_tail_only": self.kwargs.get("extremes_on_right_tail_only", False)} data.multiply_extremes(self.extreme_values, **kwargs) diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py index 54a51d42..d93b8c02 100644 --- a/mlair/run_modules/experiment_setup.py +++ b/mlair/run_modules/experiment_setup.py @@ -13,7 +13,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \ DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \ DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \ - DEFAULT_DIMENSIONS, DEFAULT_INTERPOLATE_DIM, DEFAULT_INTERPOLATE_METHOD, DEFAULT_LIMIT_NAN_FILL, \ + DEFAULT_DIMENSIONS, DEFAULT_INTERPOLATION_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_LIMIT_NAN_FILL, \ DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \ DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \ DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \ @@ -66,8 +66,8 @@ class ExperimentSetup(RunEnvironment): # interpolation self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']}) - self._set_param("interpolate_dim", interpolate_dim, default='datetime') - self._set_param("interpolate_method", interpolate_method, default='linear') + self._set_param("interpolation_dim", interpolation_dim, default='datetime') + self._set_param("interpolation_method", interpolation_method, default='linear') self._set_param("limit_nan_fill", limit_nan_fill, default=1) # train set parameters @@ -140,8 +140,8 @@ class ExperimentSetup(RunEnvironment): :param window_lead_time: number of time steps to predict by model (default 3). Time steps `t_0+1` to `t_0+w` are predicted. :param dimensions: - :param interpolate_dim: - :param interpolate_method: + :param interpolation_dim: + :param interpolation_method: :param limit_nan_fill: :param train_start: :param train_end: @@ -220,8 +220,8 @@ class ExperimentSetup(RunEnvironment): target_dim=None, window_lead_time: int = None, dimensions=None, - interpolate_dim=None, - interpolate_method=None, + interpolation_dim=None, + interpolation_method=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None, experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data = None, sampling: str = "daily", @@ -306,8 +306,8 @@ class ExperimentSetup(RunEnvironment): # interpolation self._set_param("dimensions", dimensions, default=DEFAULT_DIMENSIONS) - self._set_param("interpolate_dim", interpolate_dim, default=DEFAULT_INTERPOLATE_DIM) - self._set_param("interpolate_method", interpolate_method, default=DEFAULT_INTERPOLATE_METHOD) + self._set_param("interpolation_dim", interpolation_dim, default=DEFAULT_INTERPOLATION_DIM) + self._set_param("interpolation_method", interpolation_method, default=DEFAULT_INTERPOLATION_METHOD) self._set_param("limit_nan_fill", limit_nan_fill, default=DEFAULT_LIMIT_NAN_FILL) # train set parameters diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index c0d53aed..243daf20 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -16,7 +16,7 @@ from mlair.configuration import path_config from mlair.helpers.join import EmptyQueryResult from mlair.run_modules.run_environment import RunEnvironment -DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "interpolate_dim", "target_dim", "target_var"] +DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "interpolation_dim", "target_dim", "target_var"] DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var", "min_length", "station_type", "overwrite_local_data", "start", "end", "sampling", "transformation", "extreme_values", "extremes_on_right_tail_only", "network", "data_preparation"] @@ -225,8 +225,8 @@ class PreProcessing(RunEnvironment): loading time are logged in debug mode. :param args: Dictionary with required parameters for DataGenerator class (`data_path`, `network`, `stations`, - `variables`, `interpolate_dim`, `target_dim`, `target_var`). - :param kwargs: positional parameters for the DataGenerator class (e.g. `start`, `interpolate_method`, + `variables`, `interpolation_dim`, `target_dim`, `target_var`). + :param kwargs: positional parameters for the DataGenerator class (e.g. `start`, `interpolation_method`, `window_lead_time`). :param all_stations: All stations to check. :param name: name to display in the logging info message diff --git a/test/test_data_handling/test_data_generator.py b/test/test_data_handling/test_data_generator.py index cb86d174..413d25dd 100644 --- a/test/test_data_handling/test_data_generator.py +++ b/test/test_data_handling/test_data_generator.py @@ -80,10 +80,10 @@ class TestDataGenerator: assert gen.stations == ['DEBW107'] assert gen.variables == ['o3', 'temp'] assert gen.station_type is None - assert gen.interpolate_dim == 'datetime' + assert gen.interpolation_dim == 'datetime' assert gen.target_dim == 'variables' assert gen.target_var == 'o3' - assert gen.interpolate_method == "linear" + assert gen.interpolation_method == "linear" assert gen.limit_nan_fill == 1 assert gen.window_history_size == 7 assert gen.window_lead_time == 4 @@ -93,7 +93,7 @@ class TestDataGenerator: def test_repr(self, gen): path = os.path.join(os.path.dirname(__file__), 'data') assert gen.__repr__().rstrip() == f"DataGenerator(path='{path}', stations=['DEBW107'], " \ - f"variables=['o3', 'temp'], station_type=None, interpolate_dim='datetime', " \ + f"variables=['o3', 'temp'], station_type=None, interpolation_dim='datetime', " \ f"target_dim='variables', target_var='o3', **{{'start': 2010, 'end': 2014}})" \ .rstrip() diff --git a/test/test_modules/test_experiment_setup.py b/test/test_modules/test_experiment_setup.py index 60f0d2f9..0f1f7a0c 100644 --- a/test/test_modules/test_experiment_setup.py +++ b/test/test_modules/test_experiment_setup.py @@ -64,8 +64,8 @@ class TestExperimentSetup: assert data_store.get("window_lead_time", "general") == 3 # interpolation assert data_store.get("dimensions", "general") == {'new_index': ['datetime', 'Stations']} - assert data_store.get("interpolate_dim", "general") == "datetime" - assert data_store.get("interpolate_method", "general") == "linear" + assert data_store.get("interpolation_dim", "general") == "datetime" + assert data_store.get("interpolation_method", "general") == "linear" assert data_store.get("limit_nan_fill", "general") == 1 # train parameters assert data_store.get("start", "general.train") == "1997-01-01" @@ -93,7 +93,7 @@ class TestExperimentSetup: stations=['DEBY053', 'DEBW059', 'DEBW027'], network="INTERNET", station_type="background", variables=["o3", "temp"], start="1999-01-01", end="2001-01-01", window_history_size=4, target_var="relhum", target_dim="target", window_lead_time=10, dimensions="dim1", - interpolate_dim="int_dim", interpolate_method="cubic", limit_nan_fill=5, train_start="2000-01-01", + interpolation_dim="int_dim", interpolation_method="cubic", limit_nan_fill=5, train_start="2000-01-01", train_end="2000-01-02", val_start="2000-01-03", val_end="2000-01-04", test_start="2000-01-05", test_end="2000-01-06", use_all_stations_on_all_data_sets=False, trainable=False, fraction_of_train=0.5, experiment_path=experiment_path, create_new_model=True, val_min_length=20) @@ -125,8 +125,8 @@ class TestExperimentSetup: assert data_store.get("window_lead_time", "general") == 10 # interpolation assert data_store.get("dimensions", "general") == "dim1" - assert data_store.get("interpolate_dim", "general") == "int_dim" - assert data_store.get("interpolate_method", "general") == "cubic" + assert data_store.get("interpolation_dim", "general") == "int_dim" + assert data_store.get("interpolation_method", "general") == "cubic" assert data_store.get("limit_nan_fill", "general") == 5 # train parameters assert data_store.get("start", "general.train") == "2000-01-01" -- GitLab