diff --git a/mlair/configuration/defaults.py b/mlair/configuration/defaults.py index a83b3f597e202dde44447a301e6ca10688ff1b79..cd3d0065d845c8dd498b38f347d59f5a39d7162b 100644 --- a/mlair/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -29,7 +29,7 @@ DEFAULT_TARGET_VAR = "o3" DEFAULT_TARGET_DIM = "variables" DEFAULT_WINDOW_LEAD_TIME = 3 DEFAULT_DIMENSIONS = {"new_index": ["datetime", "Stations"]} -DEFAULT_INTERPOLATION_DIM = "datetime" +DEFAULT_TIME_DIM = "datetime" DEFAULT_INTERPOLATION_METHOD = "linear" DEFAULT_LIMIT_NAN_FILL = 1 DEFAULT_TRAIN_START = "1997-01-01" diff --git a/mlair/data_handler/data_preparation_neighbors.py b/mlair/data_handler/data_preparation_neighbors.py index 93d21f3ae2cb8a8b287bfc23f38b427bb56ec677..0c95b242e1046618403ebb6592407ef8b680e890 100644 --- a/mlair/data_handler/data_preparation_neighbors.py +++ b/mlair/data_handler/data_preparation_neighbors.py @@ -53,7 +53,7 @@ if __name__ == "__main__": "sampling": 'daily', "target_dim": 'variables', "target_var": 'o3', - "interpolation_dim": 'datetime', + "time_dim": 'datetime', "window_history_size": 7, "window_lead_time": 3, "neighbors": ["DEBW034"], diff --git a/mlair/data_handler/station_preparation.py b/mlair/data_handler/station_preparation.py index 57dd60653908d76815742324e4d78c3344a1465f..90138838ccdfd54e5c7a39bf5b28b2ba47575d30 100644 --- a/mlair/data_handler/station_preparation.py +++ b/mlair/data_handler/station_preparation.py @@ -39,7 +39,7 @@ class AbstractStationPrep(object): class StationPrep(AbstractStationPrep): def __init__(self, station, data_path, statistics_per_var, station_type, network, sampling, - target_dim, target_var, interpolation_dim, window_history_size, window_lead_time, + target_dim, target_var, time_dim, window_history_size, window_lead_time, overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True, min_length: int = 0, start=None, end=None, **kwargs): super().__init__() # path, station, statistics_per_var, transformation, **kwargs) @@ -53,7 +53,7 @@ class StationPrep(AbstractStationPrep): self.sampling = sampling self.target_dim = target_dim self.target_var = target_var - self.interpolation_dim = interpolation_dim + self.time_dim = time_dim self.window_history_size = window_history_size self.window_lead_time = window_lead_time self.overwrite_local_data = overwrite_local_data @@ -99,7 +99,7 @@ class StationPrep(AbstractStationPrep): f"statistics_per_var={self.statistics_per_var}, " \ f"station_type='{self.station_type}', network='{self.network}', " \ f"sampling='{self.sampling}', target_dim='{self.target_dim}', target_var='{self.target_var}', " \ - f"interpolate_dim='{self.interpolation_dim}', window_history_size={self.window_history_size}, " \ + f"time_dim='{self.time_dim}', window_history_size={self.window_history_size}, " \ f"window_lead_time={self.window_lead_time}, overwrite_local_data={self.overwrite_local_data}, " \ f"transformation={self._print_transformation_as_string}, **{self.kwargs})" @@ -144,7 +144,7 @@ class StationPrep(AbstractStationPrep): return coords.rename(index={"station_lon": "lon", "station_lat": "lat"}).to_dict()[str(self)] def call_transform(self, inverse=False): - self.transform(dim=self.interpolation_dim, method=self.transformation["method"], + self.transform(dim=self.time_dim, method=self.transformation["method"], mean=self.transformation['mean'], std=self.transformation["std"], min_val=self.transformation["min"], max_val=self.transformation["max"], inverse=inverse @@ -164,10 +164,10 @@ class StationPrep(AbstractStationPrep): self.make_samples() def make_samples(self): - self.make_history_window(self.target_dim, self.window_history_size, self.interpolation_dim) - self.make_labels(self.target_dim, self.target_var, self.interpolation_dim, self.window_lead_time) - self.make_observation(self.target_dim, self.target_var, self.interpolation_dim) - self.remove_nan(self.interpolation_dim) + self.make_history_window(self.target_dim, self.window_history_size, self.time_dim) + self.make_labels(self.target_dim, self.target_var, self.time_dim, self.window_lead_time) + self.make_observation(self.target_dim, self.target_var, self.time_dim) + self.remove_nan(self.time_dim) def read_data_from_disk(self, source_name=""): """ @@ -658,13 +658,13 @@ if __name__ == "__main__": sp = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122', statistics_per_var=statistics_per_var, station_type='background', network='UBA', sampling='daily', target_dim='variables', target_var='o3', - interpolation_dim='datetime', window_history_size=7, window_lead_time=3, + time_dim='datetime', window_history_size=7, window_lead_time=3, ) # transformation={'method': 'standardise'}) # sp.set_transformation({'method': 'standardise', 'mean': sp.mean+2, 'std': sp.std+1}) sp2 = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122', statistics_per_var=statistics_per_var, station_type='background', network='UBA', sampling='daily', target_dim='variables', target_var='o3', - interpolation_dim='datetime', window_history_size=7, window_lead_time=3, + time_dim='datetime', window_history_size=7, window_lead_time=3, transformation={'method': 'standardise'}) sp2.transform(inverse=True) sp.get_X() diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py index 9b74c473c3c1bf33efdbff3a8f38ee482250cbed..fe58cd3797eed9b5979e0b57ec00d76ce53a68d6 100644 --- a/mlair/run_modules/experiment_setup.py +++ b/mlair/run_modules/experiment_setup.py @@ -13,7 +13,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \ DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \ DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \ - DEFAULT_DIMENSIONS, DEFAULT_INTERPOLATION_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_LIMIT_NAN_FILL, \ + DEFAULT_DIMENSIONS, DEFAULT_TIME_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_LIMIT_NAN_FILL, \ DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \ DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \ DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \ @@ -66,7 +66,7 @@ class ExperimentSetup(RunEnvironment): # interpolation self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']}) - self._set_param("interpolation_dim", interpolation_dim, default='datetime') + self._set_param("time_dim", time_dim, default='datetime') self._set_param("interpolation_method", interpolation_method, default='linear') self._set_param("limit_nan_fill", limit_nan_fill, default=1) @@ -140,7 +140,7 @@ class ExperimentSetup(RunEnvironment): :param window_lead_time: number of time steps to predict by model (default 3). Time steps `t_0+1` to `t_0+w` are predicted. :param dimensions: - :param interpolation_dim: + :param time_dim: :param interpolation_method: :param limit_nan_fill: :param train_start: @@ -220,7 +220,7 @@ class ExperimentSetup(RunEnvironment): target_dim=None, window_lead_time: int = None, dimensions=None, - interpolation_dim=None, + time_dim=None, interpolation_method=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None, @@ -309,7 +309,7 @@ class ExperimentSetup(RunEnvironment): # interpolation self._set_param("dimensions", dimensions, default=DEFAULT_DIMENSIONS) - self._set_param("interpolation_dim", interpolation_dim, default=DEFAULT_INTERPOLATION_DIM) + self._set_param("time_dim", time_dim, default=DEFAULT_TIME_DIM) self._set_param("interpolation_method", interpolation_method, default=DEFAULT_INTERPOLATION_METHOD) self._set_param("limit_nan_fill", limit_nan_fill, default=DEFAULT_LIMIT_NAN_FILL) diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index c781d593d9bf8d8747ebc823fc15038c083ac81a..8bbef1ec2159f532818d4dc4ff597cdbc57dfb07 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -264,7 +264,7 @@ class PostProcessing(RunEnvironment): path = self.data_store.get("forecast_path") plot_list = self.data_store.get("plot_list", "postprocessing") - time_dimension = self.data_store.get("interpolation_dim") + time_dimension = self.data_store.get("time_dim") if self.bootstrap_skill_scores is not None and "PlotBootstrapSkillScore" in plot_list: PlotBootstrapSkillScore(self.bootstrap_skill_scores, plot_folder=self.plot_path, model_setup="CNN") @@ -317,7 +317,7 @@ class PostProcessing(RunEnvironment): be found inside `forecast_path`. """ logging.debug("start make_prediction") - time_dimension = self.data_store.get("interpolation_dim") + time_dimension = self.data_store.get("time_dim") for i, data in enumerate(self.test_data): input_data = data.get_X() target_data = data.get_Y(as_numpy=False) diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index 6b8c0f3c7003b194265308d580ba0f2b4df76df1..05c62aa35d9f8542ced94ae9cfc29719f3903bc8 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -16,7 +16,7 @@ from mlair.configuration import path_config from mlair.helpers.join import EmptyQueryResult from mlair.run_modules.run_environment import RunEnvironment -DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "interpolation_dim", "target_dim", "target_var"] +DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "time_dim", "target_dim", "target_var"] DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var", "min_length", "station_type", "overwrite_local_data", "start", "end", "sampling", "transformation", "extreme_values", "extremes_on_right_tail_only", "network", "data_preparation"] @@ -203,7 +203,7 @@ class PreProcessing(RunEnvironment): loading time are logged in debug mode. :param args: Dictionary with required parameters for DataGenerator class (`data_path`, `network`, `stations`, - `variables`, `interpolation_dim`, `target_dim`, `target_var`). + `variables`, `time_dim`, `target_dim`, `target_var`). :param kwargs: positional parameters for the DataGenerator class (e.g. `start`, `interpolation_method`, `window_lead_time`). :param all_stations: All stations to check. diff --git a/mlair/workflows/default_workflow.py b/mlair/workflows/default_workflow.py index f42c0389d81f655fb0c8582a15e42acc853f757d..006a2c98421d3e205bcf63df159d802cb88ebd38 100644 --- a/mlair/workflows/default_workflow.py +++ b/mlair/workflows/default_workflow.py @@ -24,7 +24,7 @@ class DefaultWorkflow(Workflow): target_var=None, target_dim=None, window_lead_time=None, dimensions=None, - interpolate_method=None, interpolate_dim=None, limit_nan_fill=None, + interpolate_method=None, time_dim=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, use_all_stations_on_all_data_sets=None, fraction_of_train=None, experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None, @@ -69,7 +69,7 @@ class DefaultWorkflowHPC(Workflow): target_var=None, target_dim=None, window_lead_time=None, dimensions=None, - interpolate_method=None, interpolate_dim=None, limit_nan_fill=None, + interpolate_method=None, time_dim=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, use_all_stations_on_all_data_sets=None, fraction_of_train=None, experiment_path=None, plot_path=None, forecast_path=None, bootstrap_path=None, overwrite_local_data=None, diff --git a/test/test_data_handler/old_t_data_generator.py b/test/test_data_handler/old_t_data_generator.py index cd2a849ec2d24af940fcf5731597cc8e9a16f517..9198923e2f75601f2ce7e6dc18a663da647eaadb 100644 --- a/test/test_data_handler/old_t_data_generator.py +++ b/test/test_data_handler/old_t_data_generator.py @@ -79,7 +79,7 @@ class TestDataGenerator: assert gen.stations == ['DEBW107'] assert gen.variables == ['o3', 'temp'] assert gen.station_type is None - assert gen.interpolation_dim == 'datetime' + assert gen.time_dim == 'datetime' assert gen.target_dim == 'variables' assert gen.target_var == 'o3' assert gen.interpolation_method == "linear" diff --git a/test/test_run_modules/test_experiment_setup.py b/test/test_run_modules/test_experiment_setup.py index 0f1f7a0cb918b4a1ab4e776fe9f9a563eb244149..102bf32749bd2b0dcc5b1fb5b3c838543109100d 100644 --- a/test/test_run_modules/test_experiment_setup.py +++ b/test/test_run_modules/test_experiment_setup.py @@ -64,7 +64,7 @@ class TestExperimentSetup: assert data_store.get("window_lead_time", "general") == 3 # interpolation assert data_store.get("dimensions", "general") == {'new_index': ['datetime', 'Stations']} - assert data_store.get("interpolation_dim", "general") == "datetime" + assert data_store.get("time_dim", "general") == "datetime" assert data_store.get("interpolation_method", "general") == "linear" assert data_store.get("limit_nan_fill", "general") == 1 # train parameters @@ -93,7 +93,7 @@ class TestExperimentSetup: stations=['DEBY053', 'DEBW059', 'DEBW027'], network="INTERNET", station_type="background", variables=["o3", "temp"], start="1999-01-01", end="2001-01-01", window_history_size=4, target_var="relhum", target_dim="target", window_lead_time=10, dimensions="dim1", - interpolation_dim="int_dim", interpolation_method="cubic", limit_nan_fill=5, train_start="2000-01-01", + time_dim="int_dim", interpolation_method="cubic", limit_nan_fill=5, train_start="2000-01-01", train_end="2000-01-02", val_start="2000-01-03", val_end="2000-01-04", test_start="2000-01-05", test_end="2000-01-06", use_all_stations_on_all_data_sets=False, trainable=False, fraction_of_train=0.5, experiment_path=experiment_path, create_new_model=True, val_min_length=20) @@ -125,7 +125,7 @@ class TestExperimentSetup: assert data_store.get("window_lead_time", "general") == 10 # interpolation assert data_store.get("dimensions", "general") == "dim1" - assert data_store.get("interpolation_dim", "general") == "int_dim" + assert data_store.get("time_dim", "general") == "int_dim" assert data_store.get("interpolation_method", "general") == "cubic" assert data_store.get("limit_nan_fill", "general") == 5 # train parameters diff --git a/test/test_run_modules/test_training.py b/test/test_run_modules/test_training.py index fddcdfdd9d7ecc73052e0038c8e7692104b249e2..1fec8f4e56e2925bff0bc4af859dac1fe5fbb2b6 100644 --- a/test/test_run_modules/test_training.py +++ b/test/test_run_modules/test_training.py @@ -128,7 +128,7 @@ class TestTraining: data_prep = DefaultDataPreparation.build(['DEBW107'], data_path=os.path.join(os.path.dirname(__file__), 'data'), statistics_per_var=statistics_per_var, station_type="background", network="AIRBASE", sampling="daily", target_dim="variables", - target_var="o3", interpolation_dim="datetime", + target_var="o3", time_dim="datetime", window_history_size=window_history_size, window_lead_time=window_lead_time, name_affix="train") return DataCollection([data_prep])