From dd024db95ccce380f461f8060389b8f26aaacc58 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Thu, 10 Dec 2020 16:26:03 +0100
Subject: [PATCH] introduce window_history_offset parameter

---
 .../data_handler/data_handler_single_station.py | 17 ++++++++++++-----
 run_mixed_sampling.py                           |  5 +++--
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 8131566a..30995c16 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -34,6 +34,7 @@ DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'max
                         'pblheight': 'maximum'}
 DEFAULT_WINDOW_LEAD_TIME = 3
 DEFAULT_WINDOW_HISTORY_SIZE = 13
+DEFAULT_WINDOW_HISTORY_OFFSET = 0
 DEFAULT_TIME_DIM = "datetime"
 DEFAULT_TARGET_VAR = "o3"
 DEFAULT_TARGET_DIM = "variables"
@@ -46,7 +47,8 @@ class DataHandlerSingleStation(AbstractDataHandler):
     def __init__(self, station, data_path, statistics_per_var, station_type=DEFAULT_STATION_TYPE,
                  network=DEFAULT_NETWORK, sampling=DEFAULT_SAMPLING, target_dim=DEFAULT_TARGET_DIM,
                  target_var=DEFAULT_TARGET_VAR, time_dim=DEFAULT_TIME_DIM,
-                 window_history_size=DEFAULT_WINDOW_HISTORY_SIZE, window_lead_time=DEFAULT_WINDOW_LEAD_TIME,
+                 window_history_size=DEFAULT_WINDOW_HISTORY_SIZE, window_history_offset=DEFAULT_WINDOW_HISTORY_OFFSET,
+                 window_lead_time=DEFAULT_WINDOW_LEAD_TIME,
                  interpolation_limit: int = 0, interpolation_method: str = DEFAULT_INTERPOLATION_METHOD,
                  overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True,
                  min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None, **kwargs):
@@ -65,6 +67,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
         self.target_var = target_var
         self.time_dim = time_dim
         self.window_history_size = window_history_size
+        self.window_history_offset = window_history_offset
         self.window_lead_time = window_lead_time
 
         self.interpolation_limit = interpolation_limit
@@ -278,13 +281,16 @@ class DataHandlerSingleStation(AbstractDataHandler):
     def setup_data_path(self, data_path: str, sampling: str):
         return os.path.join(os.path.abspath(data_path), sampling)
 
-    def shift(self, data: xr.DataArray, dim: str, window: int) -> xr.DataArray:
+    def shift(self, data: xr.DataArray, dim: str, window: int, offset: int = 0) -> xr.DataArray:
         """
         Shift data multiple times to represent history (if window <= 0) or lead time (if window > 0).
 
         :param data: data set to shift
         :param dim: dimension along shift is applied
         :param window: number of steps to shift (corresponds to the window length)
+        :param offset: use offset to move the window by as many time steps as given in offset. This can be used, if the
+            index time of a history element is not the last timestamp. E.g. you could use offset=23 when dealing with
+            hourly data in combination with daily data (values from 00 to 23 are aggregated on 00 the same day).
 
         :return: shifted data
         """
@@ -295,9 +301,10 @@ class DataHandlerSingleStation(AbstractDataHandler):
         else:
             end = window + 1
         res = []
-        for w in range(start, end):
+        _range = list(map(lambda x: x + offset, range(start, end)))
+        for w in _range:
             res.append(data.shift({dim: -w}))
-        window_array = self.create_index_array('window', range(start, end), squeeze_dim=self.target_dim)
+        window_array = self.create_index_array('window', _range, squeeze_dim=self.target_dim)
         res = xr.concat(res, dim=window_array)
         return res
 
@@ -387,7 +394,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
         """
         window = -abs(window)
         data = self.input_data.data
-        self.history = self.shift(data, dim_name_of_shift, window)
+        self.history = self.shift(data, dim_name_of_shift, window, offset=self.window_history_offset)
 
     def make_labels(self, dim_name_of_target: str, target_var: str_or_list, dim_name_of_shift: str,
                     window: int) -> None:
diff --git a/run_mixed_sampling.py b/run_mixed_sampling.py
index a87e9f38..dbc94ef9 100644
--- a/run_mixed_sampling.py
+++ b/run_mixed_sampling.py
@@ -12,8 +12,9 @@ def main(parser_args):
     args = dict(sampling="daily",
                 sampling_inputs="hourly",
                 window_history_size=24,
+                window_history_offset=17,
                 **parser_args.__dict__,
-                data_handler=DataHandlerSeparationOfScales,
+                data_handler=DataHandlerMixedSampling,
                 kz_filter_length=[100 * 24, 15 * 24],
                 kz_filter_iter=[4, 5],
                 start="2006-01-01",
@@ -21,7 +22,7 @@ def main(parser_args):
                 end="2011-12-31",
                 test_end="2011-12-31",
                 stations=["DEBW107", "DEBW013"],
-                epochs=100,
+                epochs=1,
                 network="UBA",
                 )
     workflow = DefaultWorkflow(**args)
-- 
GitLab