diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py index cd3d0065d845c8dd498b38f347d59f5a39d7162b..31746ec889cc82ebbae8de82a05c5cff02a22ac0 100644 --- a/mlair/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -31,7 +31,7 @@ DEFAULT_WINDOW_LEAD_TIME = 3 DEFAULT_DIMENSIONS = {"new_index": ["datetime", "Stations"]} DEFAULT_TIME_DIM = "datetime" DEFAULT_INTERPOLATION_METHOD = "linear" -DEFAULT_LIMIT_NAN_FILL = 1 +DEFAULT_INTERPOLATION_LIMIT = 1 DEFAULT_TRAIN_START = "1997-01-01" DEFAULT_TRAIN_END = "2007-12-31" DEFAULT_TRAIN_MIN_LENGTH = 90 diff --git a/mlair/data_handler/advanced_data_handler.py b/mlair/data_handler/advanced_data_handler.py index 695553f08c80d027a87bec9ba4e14ebb850c61c8..57a9667f2a42575faa02d50e439252738a8dc8bb 100644 --- a/mlair/data_handler/advanced_data_handler.py +++ b/mlair/data_handler/advanced_data_handler.py @@ -95,7 +95,7 @@ class DefaultDataPreparation(AbstractDataPreparation): extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None): super().__init__() self.id_class = id_class - self.interpolate_dim = "datetime" + self.interpolation_dim = "datetime" self.min_length = min_length self._X = None self._Y = None @@ -105,7 +105,7 @@ class DefaultDataPreparation(AbstractDataPreparation): self._save_file = os.path.join(data_path, f"data_preparation_{_name_affix}.pickle") self._collection = self._create_collection() self.harmonise_X() - self.multiply_extremes(extreme_values, extremes_on_right_tail_only, dim=self.interpolate_dim) + self.multiply_extremes(extreme_values, extremes_on_right_tail_only, dim=self.interpolation_dim) self._store(fresh_store=True) @classmethod @@ -190,7 +190,7 @@ class DefaultDataPreparation(AbstractDataPreparation): def harmonise_X(self): X_original, Y_original = self.get_X_original(), self.get_Y_original() - dim = self.interpolate_dim + dim = self.interpolation_dim intersect = reduce(np.intersect1d, map(lambda x: x.coords[dim].values, X_original)) if len(intersect) < max(self.min_length, 1): X, Y = None, None @@ -326,15 +326,15 @@ def create_data_prep(): sampling = 'daily' target_dim = 'variables' target_var = 'o3' - interpolate_dim = 'datetime' + interpolation_dim = 'datetime' window_history_size = 7 window_lead_time = 3 central_station = StationPrep("DEBW011", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {},station_type, network, sampling, target_dim, - target_var, interpolate_dim, window_history_size, window_lead_time) + target_var, interpolation_dim, window_history_size, window_lead_time) neighbor1 = StationPrep("DEBW013", path, {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}, {},station_type, network, sampling, target_dim, - target_var, interpolate_dim, window_history_size, window_lead_time) + target_var, interpolation_dim, window_history_size, window_lead_time) neighbor2 = StationPrep("DEBW034", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim, - target_var, interpolate_dim, window_history_size, window_lead_time) + target_var, interpolation_dim, window_history_size, window_lead_time) data_prep = [] data_prep.append(DataPreparationNeighbors(central_station, path, neighbors=[neighbor1, neighbor2])) diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py index 259cb7fe86466fa90a09335f4c8ecfef91565b44..407465ad4cd99b85c3c5b37eb2aef6e9e71c6424 100644 --- a/mlair/run_modules/experiment_setup.py +++ b/mlair/run_modules/experiment_setup.py @@ -13,7 +13,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \ DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \ DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \ - DEFAULT_DIMENSIONS, DEFAULT_TIME_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_LIMIT_NAN_FILL, \ + DEFAULT_DIMENSIONS, DEFAULT_TIME_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_INTERPOLATION_LIMIT, \ DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \ DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \ DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \ @@ -214,7 +214,7 @@ class ExperimentSetup(RunEnvironment): dimensions=None, time_dim=None, interpolation_method=None, - limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, + interpolation_limit=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None, experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data = None, sampling: str = "daily", create_new_model = None, bootstrap_path=None, permute_data_on_training = None, transformation=None, @@ -280,8 +280,6 @@ class ExperimentSetup(RunEnvironment): # setup for data self._set_param("stations", stations, default=DEFAULT_STATIONS) - # self._set_param("network", network, default=DEFAULT_NETWORK) - # self._set_param("station_type", station_type, default=DEFAULT_STATION_TYPE) self._set_param("statistics_per_var", statistics_per_var, default=DEFAULT_VAR_ALL_DICT) self._set_param("variables", variables, default=list(self.data_store.get("statistics_per_var").keys())) self._set_param("start", start, default=DEFAULT_START) @@ -303,7 +301,7 @@ class ExperimentSetup(RunEnvironment): self._set_param("dimensions", dimensions, default=DEFAULT_DIMENSIONS) self._set_param("time_dim", time_dim, default=DEFAULT_TIME_DIM) self._set_param("interpolation_method", interpolation_method, default=DEFAULT_INTERPOLATION_METHOD) - self._set_param("interpolation_limit", limit_nan_fill, default=DEFAULT_LIMIT_NAN_FILL) + self._set_param("interpolation_limit", interpolation_limit, default=DEFAULT_INTERPOLATION_LIMIT) # train set parameters self._set_param("start", train_start, default=DEFAULT_TRAIN_START, scope="train") @@ -352,7 +350,7 @@ class ExperimentSetup(RunEnvironment): if len(kwargs) > 0: for k, v in kwargs.items(): if len(self.data_store.search_name(k)) == 0: - self._set_param("k", v) + self._set_param(k, v) else: raise KeyError(f"Given argument {k} with value {v} cannot be set for this experiment due to a " f"conflict with an existing entry with same naming: {k}={self.data_store.get(k)}") diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index 05c62aa35d9f8542ced94ae9cfc29719f3903bc8..529efec56c89d61dea1530c2c92e07cd2c1d3a18 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -5,7 +5,7 @@ __date__ = '2019-11-25' import logging import os -from typing import Tuple, Dict, List +from typing import Tuple import numpy as np import pandas as pd @@ -16,11 +16,6 @@ from mlair.configuration import path_config from mlair.helpers.join import EmptyQueryResult from mlair.run_modules.run_environment import RunEnvironment -DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "time_dim", "target_dim", "target_var"] -DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var", "min_length", - "station_type", "overwrite_local_data", "start", "end", "sampling", "transformation", - "extreme_values", "extremes_on_right_tail_only", "network", "data_preparation"] - class PreProcessing(RunEnvironment): """ diff --git a/mlair/run_script.py b/mlair/run_script.py index 10ce46f8c0a9e89df5b46a51ab7288b7b2908772..00a28f686bf392f76787b56a48790999e9fa5c05 100644 --- a/mlair/run_script.py +++ b/mlair/run_script.py @@ -14,7 +14,7 @@ def run(stations=None, target_var=None, target_dim=None, window_lead_time=None, dimensions=None, - interpolate_method=None, interpolate_dim=None, limit_nan_fill=None, + interpolation_method=None, interpolation_dim=None, interpolation_limit=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, use_all_stations_on_all_data_sets=None, fraction_of_train=None, experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None, diff --git a/mlair/workflows/default_workflow.py b/mlair/workflows/default_workflow.py index 4bb7e4b7bc0aff89394edad90eff9ccb6149db40..3dba7e6c5c5773fa4d74860b2cba67a5804123b7 100644 --- a/mlair/workflows/default_workflow.py +++ b/mlair/workflows/default_workflow.py @@ -22,7 +22,7 @@ class DefaultWorkflow(Workflow): target_var=None, target_dim=None, window_lead_time=None, dimensions=None, - interpolate_method=None, time_dim=None, limit_nan_fill=None, + interpolation_method=None, time_dim=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, use_all_stations_on_all_data_sets=None, fraction_of_train=None, experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None, @@ -66,7 +66,7 @@ class DefaultWorkflowHPC(Workflow): target_var=None, target_dim=None, window_lead_time=None, dimensions=None, - interpolate_method=None, time_dim=None, limit_nan_fill=None, + interpolation_method=None, time_dim=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, use_all_stations_on_all_data_sets=None, fraction_of_train=None, experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None, diff --git a/test/test_run_modules/test_experiment_setup.py b/test/test_run_modules/test_experiment_setup.py index 102bf32749bd2b0dcc5b1fb5b3c838543109100d..abd265f5815d974d6edb474e5a03ed08dc5843cc 100644 --- a/test/test_run_modules/test_experiment_setup.py +++ b/test/test_run_modules/test_experiment_setup.py @@ -51,8 +51,6 @@ class TestExperimentSetup: # setup for data default_stations = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087'] assert data_store.get("stations", "general") == default_stations - assert data_store.get("network", "general") == "AIRBASE" - assert data_store.get("station_type", "general") == "background" assert data_store.get("variables", "general") == list(default_statistics_per_var.keys()) assert data_store.get("statistics_per_var", "general") == default_statistics_per_var assert data_store.get("start", "general") == "1997-01-01" @@ -66,7 +64,7 @@ class TestExperimentSetup: assert data_store.get("dimensions", "general") == {'new_index': ['datetime', 'Stations']} assert data_store.get("time_dim", "general") == "datetime" assert data_store.get("interpolation_method", "general") == "linear" - assert data_store.get("limit_nan_fill", "general") == 1 + assert data_store.get("interpolation_limit", "general") == 1 # train parameters assert data_store.get("start", "general.train") == "1997-01-01" assert data_store.get("end", "general.train") == "2007-12-31" @@ -93,7 +91,7 @@ class TestExperimentSetup: stations=['DEBY053', 'DEBW059', 'DEBW027'], network="INTERNET", station_type="background", variables=["o3", "temp"], start="1999-01-01", end="2001-01-01", window_history_size=4, target_var="relhum", target_dim="target", window_lead_time=10, dimensions="dim1", - time_dim="int_dim", interpolation_method="cubic", limit_nan_fill=5, train_start="2000-01-01", + time_dim="int_dim", interpolation_method="cubic", interpolation_limit=5, train_start="2000-01-01", train_end="2000-01-02", val_start="2000-01-03", val_end="2000-01-04", test_start="2000-01-05", test_end="2000-01-06", use_all_stations_on_all_data_sets=False, trainable=False, fraction_of_train=0.5, experiment_path=experiment_path, create_new_model=True, val_min_length=20) @@ -127,7 +125,7 @@ class TestExperimentSetup: assert data_store.get("dimensions", "general") == "dim1" assert data_store.get("time_dim", "general") == "int_dim" assert data_store.get("interpolation_method", "general") == "cubic" - assert data_store.get("limit_nan_fill", "general") == 5 + assert data_store.get("interpolation_limit", "general") == 5 # train parameters assert data_store.get("start", "general.train") == "2000-01-01" assert data_store.get("end", "general.train") == "2000-01-02"