From 82c83089a45e2bded47be2aaaeeba6779f93da0b Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 17 Dec 2020 15:50:49 +0100
Subject: [PATCH] file advanced data handler was outdated and just a collection
 of "testing" scripts, moved some content to the data handler neighbors for a
 better start on implementing tests, /close #238

---
 mlair/data_handler/advanced_data_handler.py  | 112 -------------------
 mlair/data_handler/data_handler_neighbors.py | 107 +++++++++++++++++-
 2 files changed, 102 insertions(+), 117 deletions(-)
 delete mode 100644 mlair/data_handler/advanced_data_handler.py

diff --git a/mlair/data_handler/advanced_data_handler.py b/mlair/data_handler/advanced_data_handler.py
deleted file mode 100644
index f04748e8..00000000
--- a/mlair/data_handler/advanced_data_handler.py
+++ /dev/null
@@ -1,112 +0,0 @@
-
-__author__ = 'Lukas Leufen'
-__date__ = '2020-07-08'
-
-import numpy as np
-import xarray as xr
-import os
-import pandas as pd
-import datetime as dt
-
-from mlair.data_handler import AbstractDataHandler
-
-from typing import Union, List, Tuple, Dict
-import logging
-from functools import reduce
-from mlair.helpers.join import EmptyQueryResult
-from mlair.helpers import TimeTracking
-
-number = Union[float, int]
-num_or_list = Union[number, List[number]]
-
-
-def run_data_prep():
-    from .data_handler_neighbors import DataHandlerNeighbors
-    data = DummyDataHandler("main_class")
-    data.get_X()
-    data.get_Y()
-
-    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata")
-    data_prep = DataHandlerNeighbors(DummyDataHandler("main_class"),
-                                     path,
-                                     neighbors=[DummyDataHandler("neighbor1"),
-                                                DummyDataHandler("neighbor2")],
-                                     extreme_values=[1., 1.2])
-    data_prep.get_data(upsampling=False)
-
-
-def create_data_prep():
-    from .data_handler_neighbors import DataHandlerNeighbors
-    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata")
-    station_type = None
-    network = 'UBA'
-    sampling = 'daily'
-    target_dim = 'variables'
-    target_var = 'o3'
-    interpolation_dim = 'datetime'
-    window_history_size = 7
-    window_lead_time = 3
-    central_station = DataHandlerSingleStation("DEBW011", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim,
-                                               target_var, interpolation_dim, window_history_size, window_lead_time)
-    neighbor1 = DataHandlerSingleStation("DEBW013", path, {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}, {}, station_type, network, sampling, target_dim,
-                                         target_var, interpolation_dim, window_history_size, window_lead_time)
-    neighbor2 = DataHandlerSingleStation("DEBW034", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type, network, sampling, target_dim,
-                                         target_var, interpolation_dim, window_history_size, window_lead_time)
-
-    data_prep = []
-    data_prep.append(DataHandlerNeighbors(central_station, path, neighbors=[neighbor1, neighbor2]))
-    data_prep.append(DataHandlerNeighbors(neighbor1, path, neighbors=[central_station, neighbor2]))
-    data_prep.append(DataHandlerNeighbors(neighbor2, path, neighbors=[neighbor1, central_station]))
-    return data_prep
-
-
-class DummyDataHandler(AbstractDataHandler):
-
-    def __init__(self, name, number_of_samples=None):
-        """This data handler takes a name argument and the number of samples to generate. If not provided, a random
-        number between 100 and 150 is set."""
-        super().__init__()
-        self.name = name
-        self.number_of_samples = number_of_samples if number_of_samples is not None else np.random.randint(100, 150)
-        self._X = self.create_X()
-        self._Y = self.create_Y()
-
-    def create_X(self):
-        """Inputs are random numbers between 0 and 10 with shape (no_samples, window=14, variables=5)."""
-        X = np.random.randint(0, 10, size=(self.number_of_samples, 14, 5))  # samples, window, variables
-        datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
-        return xr.DataArray(X, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
-                                                                                 "window": range(14),
-                                                                                 "variables": range(5)})
-
-    def create_Y(self):
-        """Targets are normal distributed random numbers with shape (no_samples, window=5, variables=1)."""
-        Y = np.round(0.5 * np.random.randn(self.number_of_samples, 5, 1), 1)  # samples, window, variables
-        datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
-        return xr.DataArray(Y, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
-                                                                                 "window": range(5),
-                                                                                 "variables": range(1)})
-
-    def get_X(self, upsampling=False, as_numpy=False):
-        """Upsampling parameter is not used for X."""
-        return np.copy(self._X) if as_numpy is True else self._X
-
-    def get_Y(self, upsampling=False, as_numpy=False):
-        """Upsampling parameter is not used for Y."""
-        return np.copy(self._Y) if as_numpy is True else self._Y
-
-    def __str__(self):
-        return self.name
-
-
-if __name__ == "__main__":
-    from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
-    from mlair.data_handler.iterator import KerasIterator, DataCollection
-    data_prep = create_data_prep()
-    data_collection = DataCollection(data_prep)
-    for data in data_collection:
-        print(data)
-    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata", "keras")
-    keras_it = KerasIterator(data_collection, 100, path, upsampling=True)
-    keras_it[2]
-
diff --git a/mlair/data_handler/data_handler_neighbors.py b/mlair/data_handler/data_handler_neighbors.py
index a004e659..6c87946e 100644
--- a/mlair/data_handler/data_handler_neighbors.py
+++ b/mlair/data_handler/data_handler_neighbors.py
@@ -1,10 +1,15 @@
-
 __author__ = 'Lukas Leufen'
 __date__ = '2020-07-17'
 
+import datetime as dt
+
+import numpy as np
+import pandas as pd
+import xarray as xr
 
+from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
 from mlair.helpers import to_list
-from mlair.data_handler import DefaultDataHandler
+from mlair.data_handler import DefaultDataHandler, AbstractDataHandler
 import os
 import copy
 
@@ -43,8 +48,90 @@ class DataHandlerNeighbors(DefaultDataHandler):
         return [super(DataHandlerNeighbors, self).get_coordinates()].append(neighbors)
 
 
-if __name__ == "__main__":
+def run_data_prep():
+    """Comment: methods just to start write meaningful test routines."""
+    data = DummyDataHandler("main_class")
+    data.get_X()
+    data.get_Y()
+
+    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata")
+    data_prep = DataHandlerNeighbors(DummyDataHandler("main_class"),
+                                     path,
+                                     neighbors=[DummyDataHandler("neighbor1"),
+                                                DummyDataHandler("neighbor2")],
+                                     extreme_values=[1., 1.2])
+    data_prep.get_data(upsampling=False)
+
+
+def create_data_prep():
+    """Comment: methods just to start write meaningful test routines."""
+    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata")
+    station_type = None
+    network = 'UBA'
+    sampling = 'daily'
+    target_dim = 'variables'
+    target_var = 'o3'
+    interpolation_dim = 'datetime'
+    window_history_size = 7
+    window_lead_time = 3
+    central_station = DataHandlerSingleStation("DEBW011", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type,
+                                               network, sampling, target_dim,
+                                               target_var, interpolation_dim, window_history_size, window_lead_time)
+    neighbor1 = DataHandlerSingleStation("DEBW013", path, {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'}, {},
+                                         station_type, network, sampling, target_dim,
+                                         target_var, interpolation_dim, window_history_size, window_lead_time)
+    neighbor2 = DataHandlerSingleStation("DEBW034", path, {'o3': 'dma8eu', 'temp': 'maximum'}, {}, station_type,
+                                         network, sampling, target_dim,
+                                         target_var, interpolation_dim, window_history_size, window_lead_time)
+
+    data_prep = []
+    data_prep.append(DataHandlerNeighbors(central_station, path, neighbors=[neighbor1, neighbor2]))
+    data_prep.append(DataHandlerNeighbors(neighbor1, path, neighbors=[central_station, neighbor2]))
+    data_prep.append(DataHandlerNeighbors(neighbor2, path, neighbors=[neighbor1, central_station]))
+    return data_prep
+
+
+class DummyDataHandler(AbstractDataHandler):
+
+    def __init__(self, name, number_of_samples=None):
+        """This data handler takes a name argument and the number of samples to generate. If not provided, a random
+        number between 100 and 150 is set."""
+        super().__init__()
+        self.name = name
+        self.number_of_samples = number_of_samples if number_of_samples is not None else np.random.randint(100, 150)
+        self._X = self.create_X()
+        self._Y = self.create_Y()
 
+    def create_X(self):
+        """Inputs are random numbers between 0 and 10 with shape (no_samples, window=14, variables=5)."""
+        X = np.random.randint(0, 10, size=(self.number_of_samples, 14, 5))  # samples, window, variables
+        datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
+        return xr.DataArray(X, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
+                                                                                 "window": range(14),
+                                                                                 "variables": range(5)})
+
+    def create_Y(self):
+        """Targets are normal distributed random numbers with shape (no_samples, window=5, variables=1)."""
+        Y = np.round(0.5 * np.random.randn(self.number_of_samples, 5, 1), 1)  # samples, window, variables
+        datelist = pd.date_range(dt.datetime.today().date(), periods=self.number_of_samples, freq="H").tolist()
+        return xr.DataArray(Y, dims=['datetime', 'window', 'variables'], coords={"datetime": datelist,
+                                                                                 "window": range(5),
+                                                                                 "variables": range(1)})
+
+    def get_X(self, upsampling=False, as_numpy=False):
+        """Upsampling parameter is not used for X."""
+        return np.copy(self._X) if as_numpy is True else self._X
+
+    def get_Y(self, upsampling=False, as_numpy=False):
+        """Upsampling parameter is not used for Y."""
+        return np.copy(self._Y) if as_numpy is True else self._Y
+
+    def __str__(self):
+        return self.name
+
+
+if __name__ == "__main__":
+    """Comment: This is more for testing. Maybe reuse parts of this code for the testing routines."""
     a = DataHandlerNeighbors
     requirements = a.requirements()
 
@@ -59,7 +146,17 @@ if __name__ == "__main__":
               "window_lead_time": 3,
               "neighbors": ["DEBW034"],
               "data_path": os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata"),
-              "statistics_per_var":  {'o3': 'dma8eu', 'temp': 'maximum'},
-              "transformation": None,}
+              "statistics_per_var": {'o3': 'dma8eu', 'temp': 'maximum'},
+              "transformation": None, }
     a_inst = a.build("DEBW011", **kwargs)
     print(a_inst)
+
+    from mlair.data_handler.iterator import KerasIterator, DataCollection
+
+    data_prep = create_data_prep()
+    data_collection = DataCollection(data_prep)
+    for data in data_collection:
+        print(data)
+    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testdata", "keras")
+    keras_it = KerasIterator(data_collection, 100, path, upsampling=True)
+    keras_it[2]
-- 
GitLab