Skip to content
Snippets Groups Projects
Commit 041b1239 authored by lukas leufen's avatar lukas leufen
Browse files

Merge branch 'lukas_issue146_refac_rename-interpolate' into 'develop'

Resolve "REFAC: rename parameters interpolate_... with interpolation_...."

See merge request toar/machinelearningtools!120
parents 771e2f47 1de5655b
Branches
Tags
4 merge requests!125Release v0.10.0,!124Update Master to new version v0.10.0,!120Resolve "REFAC: rename parameters interpolate_... with interpolation_....",!119Resolve "Include advanced data handling in workflow"
Pipeline #40963 passed
...@@ -28,8 +28,8 @@ DEFAULT_TARGET_VAR = "o3" ...@@ -28,8 +28,8 @@ DEFAULT_TARGET_VAR = "o3"
DEFAULT_TARGET_DIM = "variables" DEFAULT_TARGET_DIM = "variables"
DEFAULT_WINDOW_LEAD_TIME = 3 DEFAULT_WINDOW_LEAD_TIME = 3
DEFAULT_DIMENSIONS = {"new_index": ["datetime", "Stations"]} DEFAULT_DIMENSIONS = {"new_index": ["datetime", "Stations"]}
DEFAULT_INTERPOLATE_DIM = "datetime" DEFAULT_INTERPOLATION_DIM = "datetime"
DEFAULT_INTERPOLATE_METHOD = "linear" DEFAULT_INTERPOLATION_METHOD = "linear"
DEFAULT_LIMIT_NAN_FILL = 1 DEFAULT_LIMIT_NAN_FILL = 1
DEFAULT_TRAIN_START = "1997-01-01" DEFAULT_TRAIN_START = "1997-01-01"
DEFAULT_TRAIN_END = "2007-12-31" DEFAULT_TRAIN_END = "2007-12-31"
......
...@@ -58,8 +58,8 @@ class DataGenerator(keras.utils.Sequence): ...@@ -58,8 +58,8 @@ class DataGenerator(keras.utils.Sequence):
""" """
def __init__(self, data_path: str, stations: Union[str, List[str]], variables: List[str], def __init__(self, data_path: str, stations: Union[str, List[str]], variables: List[str],
interpolate_dim: str, target_dim: str, target_var: str, station_type: str = None, interpolation_dim: str, target_dim: str, target_var: str, station_type: str = None,
interpolate_method: str = "linear", limit_nan_fill: int = 1, window_history_size: int = 7, interpolation_method: str = "linear", limit_nan_fill: int = 1, window_history_size: int = 7,
window_lead_time: int = 4, transformation: Dict = None, extreme_values: num_or_list = None, window_lead_time: int = 4, transformation: Dict = None, extreme_values: num_or_list = None,
data_preparation=None, **kwargs): data_preparation=None, **kwargs):
""" """
...@@ -68,11 +68,11 @@ class DataGenerator(keras.utils.Sequence): ...@@ -68,11 +68,11 @@ class DataGenerator(keras.utils.Sequence):
:param data_path: path to data :param data_path: path to data
:param stations: list with all stations to include :param stations: list with all stations to include
:param variables: list with all used variables :param variables: list with all used variables
:param interpolate_dim: dimension along which interpolation is applied :param interpolation_dim: dimension along which interpolation is applied
:param target_dim: dimension of target variable :param target_dim: dimension of target variable
:param target_var: name of target variable :param target_var: name of target variable
:param station_type: TOAR station type classification (background, traffic) :param station_type: TOAR station type classification (background, traffic)
:param interpolate_method: method of interpolation :param interpolation_method: method of interpolation
:param limit_nan_fill: maximum gab in data to fill by interpolation :param limit_nan_fill: maximum gab in data to fill by interpolation
:param window_history_size: length of the history window :param window_history_size: length of the history window
:param window_lead_time: lenght of the label window :param window_lead_time: lenght of the label window
...@@ -87,11 +87,11 @@ class DataGenerator(keras.utils.Sequence): ...@@ -87,11 +87,11 @@ class DataGenerator(keras.utils.Sequence):
os.makedirs(self.data_path_tmp) os.makedirs(self.data_path_tmp)
self.stations = helpers.to_list(stations) self.stations = helpers.to_list(stations)
self.variables = variables self.variables = variables
self.interpolate_dim = interpolate_dim self.interpolation_dim = interpolation_dim
self.target_dim = target_dim self.target_dim = target_dim
self.target_var = target_var self.target_var = target_var
self.station_type = station_type self.station_type = station_type
self.interpolate_method = interpolate_method self.interpolation_method = interpolation_method
self.limit_nan_fill = limit_nan_fill self.limit_nan_fill = limit_nan_fill
self.window_history_size = window_history_size self.window_history_size = window_history_size
self.window_lead_time = window_lead_time self.window_lead_time = window_lead_time
...@@ -104,7 +104,7 @@ class DataGenerator(keras.utils.Sequence): ...@@ -104,7 +104,7 @@ class DataGenerator(keras.utils.Sequence):
"""Display all class attributes.""" """Display all class attributes."""
return f"DataGenerator(path='{self.data_path}', stations={self.stations}, " \ return f"DataGenerator(path='{self.data_path}', stations={self.stations}, " \
f"variables={self.variables}, station_type={self.station_type}, " \ f"variables={self.variables}, station_type={self.station_type}, " \
f"interpolate_dim='{self.interpolate_dim}', target_dim='{self.target_dim}', " \ f"interpolation_dim='{self.interpolation_dim}', target_dim='{self.target_dim}', " \
f"target_var='{self.target_var}', **{self.kwargs})" f"target_var='{self.target_var}', **{self.kwargs})"
def __len__(self): def __len__(self):
...@@ -292,11 +292,11 @@ class DataGenerator(keras.utils.Sequence): ...@@ -292,11 +292,11 @@ class DataGenerator(keras.utils.Sequence):
**self.kwargs) **self.kwargs)
if self.transformation is not None: if self.transformation is not None:
data.transform("datetime", **helpers.remove_items(self.transformation, "scope")) data.transform("datetime", **helpers.remove_items(self.transformation, "scope"))
data.interpolate(self.interpolate_dim, method=self.interpolate_method, limit=self.limit_nan_fill) data.interpolate(self.interpolation_dim, method=self.interpolation_method, limit=self.limit_nan_fill)
data.make_history_window(self.target_dim, self.window_history_size, self.interpolate_dim) data.make_history_window(self.target_dim, self.window_history_size, self.interpolation_dim)
data.make_labels(self.target_dim, self.target_var, self.interpolate_dim, self.window_lead_time) data.make_labels(self.target_dim, self.target_var, self.interpolation_dim, self.window_lead_time)
data.make_observation(self.target_dim, self.target_var, self.interpolate_dim) data.make_observation(self.target_dim, self.target_var, self.interpolation_dim)
data.remove_nan(self.interpolate_dim) data.remove_nan(self.interpolation_dim)
if self.extreme_values is not None: if self.extreme_values is not None:
kwargs = {"extremes_on_right_tail_only": self.kwargs.get("extremes_on_right_tail_only", False)} kwargs = {"extremes_on_right_tail_only": self.kwargs.get("extremes_on_right_tail_only", False)}
data.multiply_extremes(self.extreme_values, **kwargs) data.multiply_extremes(self.extreme_values, **kwargs)
......
...@@ -13,7 +13,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, ...@@ -13,7 +13,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \ DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \
DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \ DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \
DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \ DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \
DEFAULT_DIMENSIONS, DEFAULT_INTERPOLATE_DIM, DEFAULT_INTERPOLATE_METHOD, DEFAULT_LIMIT_NAN_FILL, \ DEFAULT_DIMENSIONS, DEFAULT_INTERPOLATION_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_LIMIT_NAN_FILL, \
DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \ DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \
DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \ DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \ DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
...@@ -66,8 +66,8 @@ class ExperimentSetup(RunEnvironment): ...@@ -66,8 +66,8 @@ class ExperimentSetup(RunEnvironment):
# interpolation # interpolation
self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']}) self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']})
self._set_param("interpolate_dim", interpolate_dim, default='datetime') self._set_param("interpolation_dim", interpolation_dim, default='datetime')
self._set_param("interpolate_method", interpolate_method, default='linear') self._set_param("interpolation_method", interpolation_method, default='linear')
self._set_param("limit_nan_fill", limit_nan_fill, default=1) self._set_param("limit_nan_fill", limit_nan_fill, default=1)
# train set parameters # train set parameters
...@@ -140,8 +140,8 @@ class ExperimentSetup(RunEnvironment): ...@@ -140,8 +140,8 @@ class ExperimentSetup(RunEnvironment):
:param window_lead_time: number of time steps to predict by model (default 3). Time steps `t_0+1` to `t_0+w` are :param window_lead_time: number of time steps to predict by model (default 3). Time steps `t_0+1` to `t_0+w` are
predicted. predicted.
:param dimensions: :param dimensions:
:param interpolate_dim: :param interpolation_dim:
:param interpolate_method: :param interpolation_method:
:param limit_nan_fill: :param limit_nan_fill:
:param train_start: :param train_start:
:param train_end: :param train_end:
...@@ -220,8 +220,8 @@ class ExperimentSetup(RunEnvironment): ...@@ -220,8 +220,8 @@ class ExperimentSetup(RunEnvironment):
target_dim=None, target_dim=None,
window_lead_time: int = None, window_lead_time: int = None,
dimensions=None, dimensions=None,
interpolate_dim=None, interpolation_dim=None,
interpolate_method=None, interpolation_method=None,
limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None,
test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None, test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None,
experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data = None, sampling: str = "daily", experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data = None, sampling: str = "daily",
...@@ -306,8 +306,8 @@ class ExperimentSetup(RunEnvironment): ...@@ -306,8 +306,8 @@ class ExperimentSetup(RunEnvironment):
# interpolation # interpolation
self._set_param("dimensions", dimensions, default=DEFAULT_DIMENSIONS) self._set_param("dimensions", dimensions, default=DEFAULT_DIMENSIONS)
self._set_param("interpolate_dim", interpolate_dim, default=DEFAULT_INTERPOLATE_DIM) self._set_param("interpolation_dim", interpolation_dim, default=DEFAULT_INTERPOLATION_DIM)
self._set_param("interpolate_method", interpolate_method, default=DEFAULT_INTERPOLATE_METHOD) self._set_param("interpolation_method", interpolation_method, default=DEFAULT_INTERPOLATION_METHOD)
self._set_param("limit_nan_fill", limit_nan_fill, default=DEFAULT_LIMIT_NAN_FILL) self._set_param("limit_nan_fill", limit_nan_fill, default=DEFAULT_LIMIT_NAN_FILL)
# train set parameters # train set parameters
......
...@@ -16,7 +16,7 @@ from mlair.configuration import path_config ...@@ -16,7 +16,7 @@ from mlair.configuration import path_config
from mlair.helpers.join import EmptyQueryResult from mlair.helpers.join import EmptyQueryResult
from mlair.run_modules.run_environment import RunEnvironment from mlair.run_modules.run_environment import RunEnvironment
DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "interpolate_dim", "target_dim", "target_var"] DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "interpolation_dim", "target_dim", "target_var"]
DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var", "min_length", DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var", "min_length",
"station_type", "overwrite_local_data", "start", "end", "sampling", "transformation", "station_type", "overwrite_local_data", "start", "end", "sampling", "transformation",
"extreme_values", "extremes_on_right_tail_only", "network", "data_preparation"] "extreme_values", "extremes_on_right_tail_only", "network", "data_preparation"]
...@@ -225,8 +225,8 @@ class PreProcessing(RunEnvironment): ...@@ -225,8 +225,8 @@ class PreProcessing(RunEnvironment):
loading time are logged in debug mode. loading time are logged in debug mode.
:param args: Dictionary with required parameters for DataGenerator class (`data_path`, `network`, `stations`, :param args: Dictionary with required parameters for DataGenerator class (`data_path`, `network`, `stations`,
`variables`, `interpolate_dim`, `target_dim`, `target_var`). `variables`, `interpolation_dim`, `target_dim`, `target_var`).
:param kwargs: positional parameters for the DataGenerator class (e.g. `start`, `interpolate_method`, :param kwargs: positional parameters for the DataGenerator class (e.g. `start`, `interpolation_method`,
`window_lead_time`). `window_lead_time`).
:param all_stations: All stations to check. :param all_stations: All stations to check.
:param name: name to display in the logging info message :param name: name to display in the logging info message
......
...@@ -80,10 +80,10 @@ class TestDataGenerator: ...@@ -80,10 +80,10 @@ class TestDataGenerator:
assert gen.stations == ['DEBW107'] assert gen.stations == ['DEBW107']
assert gen.variables == ['o3', 'temp'] assert gen.variables == ['o3', 'temp']
assert gen.station_type is None assert gen.station_type is None
assert gen.interpolate_dim == 'datetime' assert gen.interpolation_dim == 'datetime'
assert gen.target_dim == 'variables' assert gen.target_dim == 'variables'
assert gen.target_var == 'o3' assert gen.target_var == 'o3'
assert gen.interpolate_method == "linear" assert gen.interpolation_method == "linear"
assert gen.limit_nan_fill == 1 assert gen.limit_nan_fill == 1
assert gen.window_history_size == 7 assert gen.window_history_size == 7
assert gen.window_lead_time == 4 assert gen.window_lead_time == 4
...@@ -93,7 +93,7 @@ class TestDataGenerator: ...@@ -93,7 +93,7 @@ class TestDataGenerator:
def test_repr(self, gen): def test_repr(self, gen):
path = os.path.join(os.path.dirname(__file__), 'data') path = os.path.join(os.path.dirname(__file__), 'data')
assert gen.__repr__().rstrip() == f"DataGenerator(path='{path}', stations=['DEBW107'], " \ assert gen.__repr__().rstrip() == f"DataGenerator(path='{path}', stations=['DEBW107'], " \
f"variables=['o3', 'temp'], station_type=None, interpolate_dim='datetime', " \ f"variables=['o3', 'temp'], station_type=None, interpolation_dim='datetime', " \
f"target_dim='variables', target_var='o3', **{{'start': 2010, 'end': 2014}})" \ f"target_dim='variables', target_var='o3', **{{'start': 2010, 'end': 2014}})" \
.rstrip() .rstrip()
......
...@@ -64,8 +64,8 @@ class TestExperimentSetup: ...@@ -64,8 +64,8 @@ class TestExperimentSetup:
assert data_store.get("window_lead_time", "general") == 3 assert data_store.get("window_lead_time", "general") == 3
# interpolation # interpolation
assert data_store.get("dimensions", "general") == {'new_index': ['datetime', 'Stations']} assert data_store.get("dimensions", "general") == {'new_index': ['datetime', 'Stations']}
assert data_store.get("interpolate_dim", "general") == "datetime" assert data_store.get("interpolation_dim", "general") == "datetime"
assert data_store.get("interpolate_method", "general") == "linear" assert data_store.get("interpolation_method", "general") == "linear"
assert data_store.get("limit_nan_fill", "general") == 1 assert data_store.get("limit_nan_fill", "general") == 1
# train parameters # train parameters
assert data_store.get("start", "general.train") == "1997-01-01" assert data_store.get("start", "general.train") == "1997-01-01"
...@@ -93,7 +93,7 @@ class TestExperimentSetup: ...@@ -93,7 +93,7 @@ class TestExperimentSetup:
stations=['DEBY053', 'DEBW059', 'DEBW027'], network="INTERNET", station_type="background", stations=['DEBY053', 'DEBW059', 'DEBW027'], network="INTERNET", station_type="background",
variables=["o3", "temp"], start="1999-01-01", end="2001-01-01", window_history_size=4, variables=["o3", "temp"], start="1999-01-01", end="2001-01-01", window_history_size=4,
target_var="relhum", target_dim="target", window_lead_time=10, dimensions="dim1", target_var="relhum", target_dim="target", window_lead_time=10, dimensions="dim1",
interpolate_dim="int_dim", interpolate_method="cubic", limit_nan_fill=5, train_start="2000-01-01", interpolation_dim="int_dim", interpolation_method="cubic", limit_nan_fill=5, train_start="2000-01-01",
train_end="2000-01-02", val_start="2000-01-03", val_end="2000-01-04", test_start="2000-01-05", train_end="2000-01-02", val_start="2000-01-03", val_end="2000-01-04", test_start="2000-01-05",
test_end="2000-01-06", use_all_stations_on_all_data_sets=False, trainable=False, test_end="2000-01-06", use_all_stations_on_all_data_sets=False, trainable=False,
fraction_of_train=0.5, experiment_path=experiment_path, create_new_model=True, val_min_length=20) fraction_of_train=0.5, experiment_path=experiment_path, create_new_model=True, val_min_length=20)
...@@ -125,8 +125,8 @@ class TestExperimentSetup: ...@@ -125,8 +125,8 @@ class TestExperimentSetup:
assert data_store.get("window_lead_time", "general") == 10 assert data_store.get("window_lead_time", "general") == 10
# interpolation # interpolation
assert data_store.get("dimensions", "general") == "dim1" assert data_store.get("dimensions", "general") == "dim1"
assert data_store.get("interpolate_dim", "general") == "int_dim" assert data_store.get("interpolation_dim", "general") == "int_dim"
assert data_store.get("interpolate_method", "general") == "cubic" assert data_store.get("interpolation_method", "general") == "cubic"
assert data_store.get("limit_nan_fill", "general") == 5 assert data_store.get("limit_nan_fill", "general") == 5
# train parameters # train parameters
assert data_store.get("start", "general.train") == "2000-01-01" assert data_store.get("start", "general.train") == "2000-01-01"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment