From 2d254bb427168d1c2e5fff899fc97e518974c17f Mon Sep 17 00:00:00 2001
From: lukas leufen <l.leufen@fz-juelich.de>
Date: Wed, 26 Aug 2020 11:43:36 +0200
Subject: [PATCH] during doc creation we decided to rename classes like
 DataPreparation with DataHandler

---
 mlair/data_handler/__init__.py                |  4 +-
 mlair/data_handler/advanced_data_handler.py   | 44 +++++++++----------
 mlair/data_handler/bootstraps.py              |  6 +--
 .../data_preparation_neighbors.py             | 14 +++---
 mlair/data_handler/station_preparation.py     | 26 +++++------
 mlair/run_modules/experiment_setup.py         |  4 +-
 test/test_run_modules/test_pre_processing.py  |  6 +--
 test/test_run_modules/test_training.py        | 14 +++---
 8 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/mlair/data_handler/__init__.py b/mlair/data_handler/__init__.py
index 451868b8..6510b336 100644
--- a/mlair/data_handler/__init__.py
+++ b/mlair/data_handler/__init__.py
@@ -11,5 +11,5 @@ __date__ = '2020-04-17'
 
 from .bootstraps import BootStraps
 from .iterator import KerasIterator, DataCollection
-from .advanced_data_handler import DefaultDataPreparation, AbstractDataPreparation
-from .data_preparation_neighbors import DataPreparationNeighbors
+from .advanced_data_handler import DefaultDataHandler, AbstractDataHandler
+from .data_preparation_neighbors import DataHandlerNeighbors
diff --git a/mlair/data_handler/advanced_data_handler.py b/mlair/data_handler/advanced_data_handler.py
index 57a9667f..1c6ff142 100644
--- a/mlair/data_handler/advanced_data_handler.py
+++ b/mlair/data_handler/advanced_data_handler.py
@@ -17,7 +17,7 @@ import copy
 from typing import Union, List, Tuple, Dict
 import logging
 from functools import reduce
-from mlair.data_handler.station_preparation import StationPrep
+from mlair.data_handler.station_preparation import DataHandlerSingleStation
 from mlair.helpers.join import EmptyQueryResult
 
 
@@ -49,7 +49,7 @@ class DummyDataSingleStation:  # pragma: no cover
         return self.name
 
 
-class AbstractDataPreparation:
+class AbstractDataHandler:
 
     _requirements = []
 
@@ -87,9 +87,9 @@ class AbstractDataPreparation:
         return None
 
 
-class DefaultDataPreparation(AbstractDataPreparation):
+class DefaultDataHandler(AbstractDataHandler):
 
-    _requirements = remove_items(inspect.getfullargspec(StationPrep).args, ["self", "station"])
+    _requirements = remove_items(inspect.getfullargspec(DataHandlerSingleStation).args, ["self", "station"])
 
     def __init__(self, id_class, data_path, min_length=0,
                  extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None):
@@ -111,7 +111,7 @@ class DefaultDataPreparation(AbstractDataPreparation):
     @classmethod
     def build(cls, station, **kwargs):
         sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs}
-        sp = StationPrep(station, **sp_keys)
+        sp = DataHandlerSingleStation(station, **sp_keys)
         dp_args = {k: copy.deepcopy(kwargs[k]) for k in cls.own_args("id_class") if k in kwargs}
         return cls(sp, **dp_args)
 
@@ -286,7 +286,7 @@ class DefaultDataPreparation(AbstractDataPreparation):
         mean, std = None, None
         for station in set_stations:
             try:
-                sp = StationPrep(station, transformation={"method": method}, **sp_keys)
+                sp = DataHandlerSingleStation(station, transformation={"method": method}, **sp_keys)
                 mean = sp.mean.copy(deep=True) if mean is None else mean.combine_first(sp.mean)
                 std = sp.std.copy(deep=True) if std is None else std.combine_first(sp.std)
             except (AttributeError, EmptyQueryResult):
@@ -303,23 +303,23 @@ class DefaultDataPreparation(AbstractDataPreparation):
 
 def run_data_prep():
 
-    from .data_preparation_neighbors import DataPreparationNeighbors
+    from .data_preparation_neighbors import DataHandlerNeighbors
     data = DummyDataSingleStation("main_class")
     data.get_X()
     data.get_Y()
 
     path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata")
-    data_prep = DataPreparationNeighbors(DummyDataSingleStation("main_class"),
-                                         path,
-                                         neighbors=[DummyDataSingleStation("neighbor1"),
+    data_prep = DataHandlerNeighbors(DummyDataSingleStation("main_class"),
+                                     path,
+                                     neighbors=[DummyDataSingleStation("neighbor1"),
                                                     DummyDataSingleStation("neighbor2")],
-                                         extreme_values=[1., 1.2])
+                                     extreme_values=[1., 1.2])
     data_prep.get_data(upsampling=False)
 
 
 def create_data_prep():
 
-    from .data_preparation_neighbors import DataPreparationNeighbors
+    from .data_preparation_neighbors import DataHandlerNeighbors
     path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata")
     station_type = None
     network = 'UBA'
@@ -329,22 +329,22 @@ def create_data_prep():
     interpolation_dim = 'datetime'
     window_history_size = 7
     window_lead_time = 3
-    central_station = StationPrep("DEBW011", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {},station_type, network, sampling, target_dim,
-                                  target_var, interpolation_dim, window_history_size, window_lead_time)
-    neighbor1 = StationPrep("DEBW013", path, {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}, {},station_type, network, sampling, target_dim,
-                                  target_var, interpolation_dim, window_history_size, window_lead_time)
-    neighbor2 = StationPrep("DEBW034", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim,
-                                  target_var, interpolation_dim, window_history_size, window_lead_time)
+    central_station = DataHandlerSingleStation("DEBW011", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim,
+                                               target_var, interpolation_dim, window_history_size, window_lead_time)
+    neighbor1 = DataHandlerSingleStation("DEBW013", path, {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}, {}, station_type, network, sampling, target_dim,
+                                         target_var, interpolation_dim, window_history_size, window_lead_time)
+    neighbor2 = DataHandlerSingleStation("DEBW034", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim,
+                                         target_var, interpolation_dim, window_history_size, window_lead_time)
 
     data_prep = []
-    data_prep.append(DataPreparationNeighbors(central_station, path, neighbors=[neighbor1, neighbor2]))
-    data_prep.append(DataPreparationNeighbors(neighbor1, path, neighbors=[central_station, neighbor2]))
-    data_prep.append(DataPreparationNeighbors(neighbor2, path, neighbors=[neighbor1, central_station]))
+    data_prep.append(DataHandlerNeighbors(central_station, path, neighbors=[neighbor1, neighbor2]))
+    data_prep.append(DataHandlerNeighbors(neighbor1, path, neighbors=[central_station, neighbor2]))
+    data_prep.append(DataHandlerNeighbors(neighbor2, path, neighbors=[neighbor1, central_station]))
     return data_prep
 
 
 if __name__ == "__main__":
-    from mlair.data_handler.station_preparation import StationPrep
+    from mlair.data_handler.station_preparation import DataHandlerSingleStation
     from mlair.data_handler.iterator import KerasIterator, DataCollection
     data_prep = create_data_prep()
     data_collection = DataCollection(data_prep)
diff --git a/mlair/data_handler/bootstraps.py b/mlair/data_handler/bootstraps.py
index 91603b41..f7f5c3c7 100644
--- a/mlair/data_handler/bootstraps.py
+++ b/mlair/data_handler/bootstraps.py
@@ -19,7 +19,7 @@ from itertools import chain
 import numpy as np
 import xarray as xr
 
-from mlair.data_handler.advanced_data_handler import AbstractDataPreparation
+from mlair.data_handler.advanced_data_handler import AbstractDataHandler
 
 
 class BootstrapIterator(Iterator):
@@ -82,7 +82,7 @@ class BootStraps(Iterable):
     """
     Main class to perform bootstrap operations.
 
-    This class requires a data handler following the definition of the AbstractDataPreparation, the number of bootstraps
+    This class requires a data handler following the definition of the AbstractDataHandler, the number of bootstraps
     to create and the dimension along this bootstrapping is performed (default dimension is `variables`).
 
     When iterating on this class, it returns the bootstrapped X, Y and a tuple with (position of variable in X, name of
@@ -91,7 +91,7 @@ class BootStraps(Iterable):
     retrieved by calling the .bootstraps() method. Further more, by calling the .get_orig_prediction() this class
     imitates according to the set number of bootstraps the original prediction
     """
-    def __init__(self, data: AbstractDataPreparation, number_of_bootstraps: int = 10,
+    def __init__(self, data: AbstractDataHandler, number_of_bootstraps: int = 10,
                  bootstrap_dimension: str = "variables"):
         """
         Create iterable class to be ready to iter.
diff --git a/mlair/data_handler/data_preparation_neighbors.py b/mlair/data_handler/data_preparation_neighbors.py
index 0c95b242..37e19225 100644
--- a/mlair/data_handler/data_preparation_neighbors.py
+++ b/mlair/data_handler/data_preparation_neighbors.py
@@ -4,8 +4,8 @@ __date__ = '2020-07-17'
 
 
 from mlair.helpers import to_list
-from mlair.data_handler.station_preparation import StationPrep
-from mlair.data_handler.advanced_data_handler import DefaultDataPreparation
+from mlair.data_handler.station_preparation import DataHandlerSingleStation
+from mlair.data_handler.advanced_data_handler import DefaultDataHandler
 import os
 
 from typing import Union, List
@@ -14,7 +14,7 @@ number = Union[float, int]
 num_or_list = Union[number, List[number]]
 
 
-class DataPreparationNeighbors(DefaultDataPreparation):
+class DataHandlerNeighbors(DefaultDataHandler):
 
     def __init__(self, id_class, data_path, neighbors=None, min_length=0,
                  extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False):
@@ -25,10 +25,10 @@ class DataPreparationNeighbors(DefaultDataPreparation):
     @classmethod
     def build(cls, station, **kwargs):
         sp_keys = {k: kwargs[k] for k in cls._requirements if k in kwargs}
-        sp = StationPrep(station, **sp_keys)
+        sp = DataHandlerSingleStation(station, **sp_keys)
         n_list = []
         for neighbor in kwargs.get("neighbors", []):
-            n_list.append(StationPrep(neighbor, **sp_keys))
+            n_list.append(DataHandlerSingleStation(neighbor, **sp_keys))
         else:
             kwargs["neighbors"] = n_list if len(n_list) > 0 else None
         dp_args = {k: kwargs[k] for k in cls.own_args("id_class") if k in kwargs}
@@ -39,12 +39,12 @@ class DataPreparationNeighbors(DefaultDataPreparation):
 
     def get_coordinates(self, include_neighbors=False):
         neighbors = list(map(lambda n: n.get_coordinates(), self.neighbors)) if include_neighbors is True else []
-        return [super(DataPreparationNeighbors, self).get_coordinates()].append(neighbors)
+        return [super(DataHandlerNeighbors, self).get_coordinates()].append(neighbors)
 
 
 if __name__ == "__main__":
 
-    a = DataPreparationNeighbors
+    a = DataHandlerNeighbors
     requirements = a.requirements()
 
     kwargs = {"path": os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata"),
diff --git a/mlair/data_handler/station_preparation.py b/mlair/data_handler/station_preparation.py
index ff8496ab..0f7dbd26 100644
--- a/mlair/data_handler/station_preparation.py
+++ b/mlair/data_handler/station_preparation.py
@@ -39,7 +39,7 @@ DEFAULT_SAMPLING = "daily"
 DEFAULT_INTERPOLATION_METHOD = "linear"
 
 
-class AbstractStationPrep(object):
+class AbstractDataHandlerSingleStation(object):
     def __init__(self): #, path, station, statistics_per_var, transformation, **kwargs):
         pass
 
@@ -50,7 +50,7 @@ class AbstractStationPrep(object):
         raise NotImplementedError
 
 
-class StationPrep(AbstractStationPrep):
+class DataHandlerSingleStation(AbstractDataHandlerSingleStation):
 
     def __init__(self, station, data_path, statistics_per_var, station_type=DEFAULT_STATION_TYPE,
                  network=DEFAULT_NETWORK, sampling=DEFAULT_SAMPLING, target_dim=DEFAULT_TARGET_DIM,
@@ -681,18 +681,18 @@ if __name__ == "__main__":
     # dp = AbstractDataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
     # print(dp)
     statistics_per_var = {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}
-    sp = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122',
-                     statistics_per_var=statistics_per_var, station_type='background',
-                     network='UBA', sampling='daily', target_dim='variables', target_var='o3',
-                     time_dim='datetime', window_history_size=7, window_lead_time=3,
-                     interpolation_limit=0
-                     )  # transformation={'method': 'standardise'})
+    sp = DataHandlerSingleStation(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122',
+                                  statistics_per_var=statistics_per_var, station_type='background',
+                                  network='UBA', sampling='daily', target_dim='variables', target_var='o3',
+                                  time_dim='datetime', window_history_size=7, window_lead_time=3,
+                                  interpolation_limit=0
+                                  )  # transformation={'method': 'standardise'})
     # sp.set_transformation({'method': 'standardise', 'mean': sp.mean+2, 'std': sp.std+1})
-    sp2 = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122',
-                      statistics_per_var=statistics_per_var, station_type='background',
-                      network='UBA', sampling='daily', target_dim='variables', target_var='o3',
-                      time_dim='datetime', window_history_size=7, window_lead_time=3,
-                      transformation={'method': 'standardise'})
+    sp2 = DataHandlerSingleStation(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122',
+                                   statistics_per_var=statistics_per_var, station_type='background',
+                                   network='UBA', sampling='daily', target_dim='variables', target_var='o3',
+                                   time_dim='datetime', window_history_size=7, window_lead_time=3,
+                                   transformation={'method': 'standardise'})
     sp2.transform(inverse=True)
     sp.get_X()
     sp.get_Y()
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index 407465ad..d7ecbac5 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -18,7 +18,7 @@ from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT,
     DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \
     DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \
     DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST
-from mlair.data_handler.advanced_data_handler import DefaultDataPreparation
+from mlair.data_handler.advanced_data_handler import DefaultDataHandler
 from mlair.run_modules.run_environment import RunEnvironment
 from mlair.model_modules.model_class import MyLittleModel as VanillaModel
 
@@ -290,7 +290,7 @@ class ExperimentSetup(RunEnvironment):
         self._set_param("sampling", sampling)
         self._set_param("transformation", transformation, default=DEFAULT_TRANSFORMATION)
         self._set_param("transformation", None, scope="preprocessing")
-        self._set_param("data_preparation", data_preparation, default=DefaultDataPreparation)
+        self._set_param("data_preparation", data_preparation, default=DefaultDataHandler)
 
         # target
         self._set_param("target_var", target_var, default=DEFAULT_TARGET_VAR)
diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py
index 97e73204..e62c8758 100644
--- a/test/test_run_modules/test_pre_processing.py
+++ b/test/test_run_modules/test_pre_processing.py
@@ -2,7 +2,7 @@ import logging
 
 import pytest
 
-from mlair.data_handler import DefaultDataPreparation, DataCollection, AbstractDataPreparation
+from mlair.data_handler import DefaultDataHandler, DataCollection, AbstractDataHandler
 from mlair.helpers.datastore import NameNotFoundInScope
 from mlair.helpers import PyTestRegex
 from mlair.run_modules.experiment_setup import ExperimentSetup
@@ -28,7 +28,7 @@ class TestPreProcessing:
     def obj_with_exp_setup(self):
         ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'],
                         statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}, station_type="background",
-                        data_preparation=DefaultDataPreparation)
+                        data_preparation=DefaultDataHandler)
         pre = object.__new__(PreProcessing)
         super(PreProcessing, pre).__init__()
         yield pre
@@ -110,7 +110,7 @@ class TestPreProcessing:
 
     def test_transformation(self):
         pre = object.__new__(PreProcessing)
-        data_preparation = AbstractDataPreparation
+        data_preparation = AbstractDataHandler
         stations = ['DEBW107', 'DEBY081']
         assert pre.transformation(data_preparation, stations) is None
         class data_preparation_no_trans: pass
diff --git a/test/test_run_modules/test_training.py b/test/test_run_modules/test_training.py
index 1fec8f4e..c5f1ba9d 100644
--- a/test/test_run_modules/test_training.py
+++ b/test/test_run_modules/test_training.py
@@ -9,7 +9,7 @@ import mock
 import pytest
 from keras.callbacks import History
 
-from mlair.data_handler import DataCollection, KerasIterator, DefaultDataPreparation
+from mlair.data_handler import DataCollection, KerasIterator, DefaultDataHandler
 from mlair.helpers import PyTestRegex
 from mlair.model_modules.flatten import flatten_tail
 from mlair.model_modules.inception_model import InceptionModelBase
@@ -125,12 +125,12 @@ class TestTraining:
 
     @pytest.fixture
     def data_collection(self, path, window_history_size, window_lead_time, statistics_per_var):
-        data_prep = DefaultDataPreparation.build(['DEBW107'], data_path=os.path.join(os.path.dirname(__file__), 'data'),
-                                                 statistics_per_var=statistics_per_var, station_type="background",
-                                                 network="AIRBASE", sampling="daily", target_dim="variables",
-                                                 target_var="o3", time_dim="datetime",
-                                                 window_history_size=window_history_size,
-                                                 window_lead_time=window_lead_time, name_affix="train")
+        data_prep = DefaultDataHandler.build(['DEBW107'], data_path=os.path.join(os.path.dirname(__file__), 'data'),
+                                             statistics_per_var=statistics_per_var, station_type="background",
+                                             network="AIRBASE", sampling="daily", target_dim="variables",
+                                             target_var="o3", time_dim="datetime",
+                                             window_history_size=window_history_size,
+                                             window_lead_time=window_lead_time, name_affix="train")
         return DataCollection([data_prep])
 
     @pytest.fixture
-- 
GitLab