From 2d254bb427168d1c2e5fff899fc97e518974c17f Mon Sep 17 00:00:00 2001 From: lukas leufen <l.leufen@fz-juelich.de> Date: Wed, 26 Aug 2020 11:43:36 +0200 Subject: [PATCH] during doc creation we decided to rename classes like DataPreparation with DataHandler --- mlair/data_handler/__init__.py | 4 +- mlair/data_handler/advanced_data_handler.py | 44 +++++++++---------- mlair/data_handler/bootstraps.py | 6 +-- .../data_preparation_neighbors.py | 14 +++--- mlair/data_handler/station_preparation.py | 26 +++++------ mlair/run_modules/experiment_setup.py | 4 +- test/test_run_modules/test_pre_processing.py | 6 +-- test/test_run_modules/test_training.py | 14 +++--- 8 files changed, 59 insertions(+), 59 deletions(-) diff --git a/mlair/data_handler/__init__.py b/mlair/data_handler/__init__.py index 451868b8..6510b336 100644 --- a/mlair/data_handler/__init__.py +++ b/mlair/data_handler/__init__.py @@ -11,5 +11,5 @@ __date__ = '2020-04-17' from .bootstraps import BootStraps from .iterator import KerasIterator, DataCollection -from .advanced_data_handler import DefaultDataPreparation, AbstractDataPreparation -from .data_preparation_neighbors import DataPreparationNeighbors +from .advanced_data_handler import DefaultDataHandler, AbstractDataHandler +from .data_preparation_neighbors import DataHandlerNeighbors diff --git a/mlair/data_handler/advanced_data_handler.py b/mlair/data_handler/advanced_data_handler.py index 57a9667f..1c6ff142 100644 --- a/mlair/data_handler/advanced_data_handler.py +++ b/mlair/data_handler/advanced_data_handler.py @@ -17,7 +17,7 @@ import copy from typing import Union, List, Tuple, Dict import logging from functools import reduce -from mlair.data_handler.station_preparation import StationPrep +from mlair.data_handler.station_preparation import DataHandlerSingleStation from mlair.helpers.join import EmptyQueryResult @@ -49,7 +49,7 @@ class DummyDataSingleStation: # pragma: no cover return self.name -class AbstractDataPreparation: +class AbstractDataHandler: _requirements = [] @@ -87,9 +87,9 @@ class AbstractDataPreparation: return None -class DefaultDataPreparation(AbstractDataPreparation): +class DefaultDataHandler(AbstractDataHandler): - _requirements = remove_items(inspect.getfullargspec(StationPrep).args, ["self", "station"]) + _requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"]) def __init__(self, id_class, data_path, min_length=0, extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None): @@ -111,7 +111,7 @@ class DefaultDataPreparation(AbstractDataPreparation): @classmethod def build(cls, station, **kwargs): sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs} - sp = StationPrep(station, **sp_keys) + sp = DataHandlerSingleStation(station, **sp_keys) dp_args = {k: copy.deepcopy(kwargs[k]) for k in cls.own_args("id_class") if k in kwargs} return cls(sp, **dp_args) @@ -286,7 +286,7 @@ class DefaultDataPreparation(AbstractDataPreparation): mean, std = None, None for station in set_stations: try: - sp = StationPrep(station, transformation={"method": method}, **sp_keys) + sp = DataHandlerSingleStation(station, transformation={"method": method}, **sp_keys) mean = sp.mean.copy(deep=True) if mean is None else mean.combine_first(sp.mean) std = sp.std.copy(deep=True) if std is None else std.combine_first(sp.std) except (AttributeError, EmptyQueryResult): @@ -303,23 +303,23 @@ class DefaultDataPreparation(AbstractDataPreparation): def run_data_prep(): - from .data_preparation_neighbors import DataPreparationNeighbors + from .data_preparation_neighbors import DataHandlerNeighbors data = DummyDataSingleStation("main_class") data.get_X() data.get_Y() path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata") - data_prep = DataPreparationNeighbors(DummyDataSingleStation("main_class"), - path, - neighbors=[DummyDataSingleStation("neighbor1"), + data_prep = DataHandlerNeighbors(DummyDataSingleStation("main_class"), + path, + neighbors=[DummyDataSingleStation("neighbor1"), DummyDataSingleStation("neighbor2")], - extreme_values=[1., 1.2]) + extreme_values=[1., 1.2]) data_prep.get_data(upsampling=False) def create_data_prep(): - from .data_preparation_neighbors import DataPreparationNeighbors + from .data_preparation_neighbors import DataHandlerNeighbors path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata") station_type = None network = 'UBA' @@ -329,22 +329,22 @@ def create_data_prep(): interpolation_dim = 'datetime' window_history_size = 7 window_lead_time = 3 - central_station = StationPrep("DEBW011", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {},station_type, network, sampling, target_dim, - target_var, interpolation_dim, window_history_size, window_lead_time) - neighbor1 = StationPrep("DEBW013", path, {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}, {},station_type, network, sampling, target_dim, - target_var, interpolation_dim, window_history_size, window_lead_time) - neighbor2 = StationPrep("DEBW034", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim, - target_var, interpolation_dim, window_history_size, window_lead_time) + central_station = DataHandlerSingleStation("DEBW011", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim, + target_var, interpolation_dim, window_history_size, window_lead_time) + neighbor1 = DataHandlerSingleStation("DEBW013", path, {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}, {}, station_type, network, sampling, target_dim, + target_var, interpolation_dim, window_history_size, window_lead_time) + neighbor2 = DataHandlerSingleStation("DEBW034", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim, + target_var, interpolation_dim, window_history_size, window_lead_time) data_prep = [] - data_prep.append(DataPreparationNeighbors(central_station, path, neighbors=[neighbor1, neighbor2])) - data_prep.append(DataPreparationNeighbors(neighbor1, path, neighbors=[central_station, neighbor2])) - data_prep.append(DataPreparationNeighbors(neighbor2, path, neighbors=[neighbor1, central_station])) + data_prep.append(DataHandlerNeighbors(central_station, path, neighbors=[neighbor1, neighbor2])) + data_prep.append(DataHandlerNeighbors(neighbor1, path, neighbors=[central_station, neighbor2])) + data_prep.append(DataHandlerNeighbors(neighbor2, path, neighbors=[neighbor1, central_station])) return data_prep if __name__ == "__main__": - from mlair.data_handler.station_preparation import StationPrep + from mlair.data_handler.station_preparation import DataHandlerSingleStation from mlair.data_handler.iterator import KerasIterator, DataCollection data_prep = create_data_prep() data_collection = DataCollection(data_prep) diff --git a/mlair/data_handler/bootstraps.py b/mlair/data_handler/bootstraps.py index 91603b41..f7f5c3c7 100644 --- a/mlair/data_handler/bootstraps.py +++ b/mlair/data_handler/bootstraps.py @@ -19,7 +19,7 @@ from itertools import chain import numpy as np import xarray as xr -from mlair.data_handler.advanced_data_handler import AbstractDataPreparation +from mlair.data_handler.advanced_data_handler import AbstractDataHandler class BootstrapIterator(Iterator): @@ -82,7 +82,7 @@ class BootStraps(Iterable): """ Main class to perform bootstrap operations. - This class requires a data handler following the definition of the AbstractDataPreparation, the number of bootstraps + This class requires a data handler following the definition of the AbstractDataHandler, the number of bootstraps to create and the dimension along this bootstrapping is performed (default dimension is `variables`). When iterating on this class, it returns the bootstrapped X, Y and a tuple with (position of variable in X, name of @@ -91,7 +91,7 @@ class BootStraps(Iterable): retrieved by calling the .bootstraps() method. Further more, by calling the .get_orig_prediction() this class imitates according to the set number of bootstraps the original prediction """ - def __init__(self, data: AbstractDataPreparation, number_of_bootstraps: int = 10, + def __init__(self, data: AbstractDataHandler, number_of_bootstraps: int = 10, bootstrap_dimension: str = "variables"): """ Create iterable class to be ready to iter. diff --git a/mlair/data_handler/data_preparation_neighbors.py b/mlair/data_handler/data_preparation_neighbors.py index 0c95b242..37e19225 100644 --- a/mlair/data_handler/data_preparation_neighbors.py +++ b/mlair/data_handler/data_preparation_neighbors.py @@ -4,8 +4,8 @@ __date__ = '2020-07-17' from mlair.helpers import to_list -from mlair.data_handler.station_preparation import StationPrep -from mlair.data_handler.advanced_data_handler import DefaultDataPreparation +from mlair.data_handler.station_preparation import DataHandlerSingleStation +from mlair.data_handler.advanced_data_handler import DefaultDataHandler import os from typing import Union, List @@ -14,7 +14,7 @@ number = Union[float, int] num_or_list = Union[number, List[number]] -class DataPreparationNeighbors(DefaultDataPreparation): +class DataHandlerNeighbors(DefaultDataHandler): def __init__(self, id_class, data_path, neighbors=None, min_length=0, extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False): @@ -25,10 +25,10 @@ class DataPreparationNeighbors(DefaultDataPreparation): @classmethod def build(cls, station, **kwargs): sp_keys = {k: kwargs[k] for k in cls._requirements if k in kwargs} - sp = StationPrep(station, **sp_keys) + sp = DataHandlerSingleStation(station, **sp_keys) n_list = [] for neighbor in kwargs.get("neighbors", []): - n_list.append(StationPrep(neighbor, **sp_keys)) + n_list.append(DataHandlerSingleStation(neighbor, **sp_keys)) else: kwargs["neighbors"] = n_list if len(n_list) > 0 else None dp_args = {k: kwargs[k] for k in cls.own_args("id_class") if k in kwargs} @@ -39,12 +39,12 @@ class DataPreparationNeighbors(DefaultDataPreparation): def get_coordinates(self, include_neighbors=False): neighbors = list(map(lambda n: n.get_coordinates(), self.neighbors)) if include_neighbors is True else [] - return [super(DataPreparationNeighbors, self).get_coordinates()].append(neighbors) + return [super(DataHandlerNeighbors, self).get_coordinates()].append(neighbors) if __name__ == "__main__": - a = DataPreparationNeighbors + a = DataHandlerNeighbors requirements = a.requirements() kwargs = {"path": os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata"), diff --git a/mlair/data_handler/station_preparation.py b/mlair/data_handler/station_preparation.py index ff8496ab..0f7dbd26 100644 --- a/mlair/data_handler/station_preparation.py +++ b/mlair/data_handler/station_preparation.py @@ -39,7 +39,7 @@ DEFAULT_SAMPLING = "daily" DEFAULT_INTERPOLATION_METHOD = "linear" -class AbstractStationPrep(object): +class AbstractDataHandlerSingleStation(object): def __init__(self): #, path, station, statistics_per_var, transformation, **kwargs): pass @@ -50,7 +50,7 @@ class AbstractStationPrep(object): raise NotImplementedError -class StationPrep(AbstractStationPrep): +class DataHandlerSingleStation(AbstractDataHandlerSingleStation): def __init__(self, station, data_path, statistics_per_var, station_type=DEFAULT_STATION_TYPE, network=DEFAULT_NETWORK, sampling=DEFAULT_SAMPLING, target_dim=DEFAULT_TARGET_DIM, @@ -681,18 +681,18 @@ if __name__ == "__main__": # dp = AbstractDataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) # print(dp) statistics_per_var = {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'} - sp = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122', - statistics_per_var=statistics_per_var, station_type='background', - network='UBA', sampling='daily', target_dim='variables', target_var='o3', - time_dim='datetime', window_history_size=7, window_lead_time=3, - interpolation_limit=0 - ) # transformation={'method': 'standardise'}) + sp = DataHandlerSingleStation(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122', + statistics_per_var=statistics_per_var, station_type='background', + network='UBA', sampling='daily', target_dim='variables', target_var='o3', + time_dim='datetime', window_history_size=7, window_lead_time=3, + interpolation_limit=0 + ) # transformation={'method': 'standardise'}) # sp.set_transformation({'method': 'standardise', 'mean': sp.mean+2, 'std': sp.std+1}) - sp2 = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122', - statistics_per_var=statistics_per_var, station_type='background', - network='UBA', sampling='daily', target_dim='variables', target_var='o3', - time_dim='datetime', window_history_size=7, window_lead_time=3, - transformation={'method': 'standardise'}) + sp2 = DataHandlerSingleStation(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122', + statistics_per_var=statistics_per_var, station_type='background', + network='UBA', sampling='daily', target_dim='variables', target_var='o3', + time_dim='datetime', window_history_size=7, window_lead_time=3, + transformation={'method': 'standardise'}) sp2.transform(inverse=True) sp.get_X() sp.get_Y() diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py index 407465ad..d7ecbac5 100644 --- a/mlair/run_modules/experiment_setup.py +++ b/mlair/run_modules/experiment_setup.py @@ -18,7 +18,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \ DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \ DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST -from mlair.data_handler.advanced_data_handler import DefaultDataPreparation +from mlair.data_handler.advanced_data_handler import DefaultDataHandler from mlair.run_modules.run_environment import RunEnvironment from mlair.model_modules.model_class import MyLittleModel as VanillaModel @@ -290,7 +290,7 @@ class ExperimentSetup(RunEnvironment): self._set_param("sampling", sampling) self._set_param("transformation", transformation, default=DEFAULT_TRANSFORMATION) self._set_param("transformation", None, scope="preprocessing") - self._set_param("data_preparation", data_preparation, default=DefaultDataPreparation) + self._set_param("data_preparation", data_preparation, default=DefaultDataHandler) # target self._set_param("target_var", target_var, default=DEFAULT_TARGET_VAR) diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py index 97e73204..e62c8758 100644 --- a/test/test_run_modules/test_pre_processing.py +++ b/test/test_run_modules/test_pre_processing.py @@ -2,7 +2,7 @@ import logging import pytest -from mlair.data_handler import DefaultDataPreparation, DataCollection, AbstractDataPreparation +from mlair.data_handler import DefaultDataHandler, DataCollection, AbstractDataHandler from mlair.helpers.datastore import NameNotFoundInScope from mlair.helpers import PyTestRegex from mlair.run_modules.experiment_setup import ExperimentSetup @@ -28,7 +28,7 @@ class TestPreProcessing: def obj_with_exp_setup(self): ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}, station_type="background", - data_preparation=DefaultDataPreparation) + data_preparation=DefaultDataHandler) pre = object.__new__(PreProcessing) super(PreProcessing, pre).__init__() yield pre @@ -110,7 +110,7 @@ class TestPreProcessing: def test_transformation(self): pre = object.__new__(PreProcessing) - data_preparation = AbstractDataPreparation + data_preparation = AbstractDataHandler stations = ['DEBW107', 'DEBY081'] assert pre.transformation(data_preparation, stations) is None class data_preparation_no_trans: pass diff --git a/test/test_run_modules/test_training.py b/test/test_run_modules/test_training.py index 1fec8f4e..c5f1ba9d 100644 --- a/test/test_run_modules/test_training.py +++ b/test/test_run_modules/test_training.py @@ -9,7 +9,7 @@ import mock import pytest from keras.callbacks import History -from mlair.data_handler import DataCollection, KerasIterator, DefaultDataPreparation +from mlair.data_handler import DataCollection, KerasIterator, DefaultDataHandler from mlair.helpers import PyTestRegex from mlair.model_modules.flatten import flatten_tail from mlair.model_modules.inception_model import InceptionModelBase @@ -125,12 +125,12 @@ class TestTraining: @pytest.fixture def data_collection(self, path, window_history_size, window_lead_time, statistics_per_var): - data_prep = DefaultDataPreparation.build(['DEBW107'], data_path=os.path.join(os.path.dirname(__file__), 'data'), - statistics_per_var=statistics_per_var, station_type="background", - network="AIRBASE", sampling="daily", target_dim="variables", - target_var="o3", time_dim="datetime", - window_history_size=window_history_size, - window_lead_time=window_lead_time, name_affix="train") + data_prep = DefaultDataHandler.build(['DEBW107'], data_path=os.path.join(os.path.dirname(__file__), 'data'), + statistics_per_var=statistics_per_var, station_type="background", + network="AIRBASE", sampling="daily", target_dim="variables", + target_var="o3", time_dim="datetime", + window_history_size=window_history_size, + window_lead_time=window_lead_time, name_affix="train") return DataCollection([data_prep]) @pytest.fixture -- GitLab