From dd024db95ccce380f461f8060389b8f26aaacc58 Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz-juelich.de> Date: Thu, 10 Dec 2020 16:26:03 +0100 Subject: [PATCH] introduce window_history_offset parameter --- .../data_handler/data_handler_single_station.py | 17 ++++++++++++----- run_mixed_sampling.py | 5 +++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py index 8131566a..30995c16 100644 --- a/mlair/data_handler/data_handler_single_station.py +++ b/mlair/data_handler/data_handler_single_station.py @@ -34,6 +34,7 @@ DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'max 'pblheight': 'maximum'} DEFAULT_WINDOW_LEAD_TIME = 3 DEFAULT_WINDOW_HISTORY_SIZE = 13 +DEFAULT_WINDOW_HISTORY_OFFSET = 0 DEFAULT_TIME_DIM = "datetime" DEFAULT_TARGET_VAR = "o3" DEFAULT_TARGET_DIM = "variables" @@ -46,7 +47,8 @@ class DataHandlerSingleStation(AbstractDataHandler): def __init__(self, station, data_path, statistics_per_var, station_type=DEFAULT_STATION_TYPE, network=DEFAULT_NETWORK, sampling=DEFAULT_SAMPLING, target_dim=DEFAULT_TARGET_DIM, target_var=DEFAULT_TARGET_VAR, time_dim=DEFAULT_TIME_DIM, - window_history_size=DEFAULT_WINDOW_HISTORY_SIZE, window_lead_time=DEFAULT_WINDOW_LEAD_TIME, + window_history_size=DEFAULT_WINDOW_HISTORY_SIZE, window_history_offset=DEFAULT_WINDOW_HISTORY_OFFSET, + window_lead_time=DEFAULT_WINDOW_LEAD_TIME, interpolation_limit: int = 0, interpolation_method: str = DEFAULT_INTERPOLATION_METHOD, overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True, min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None, **kwargs): @@ -65,6 +67,7 @@ class DataHandlerSingleStation(AbstractDataHandler): self.target_var = target_var self.time_dim = time_dim self.window_history_size = window_history_size + self.window_history_offset = window_history_offset self.window_lead_time = window_lead_time self.interpolation_limit = interpolation_limit @@ -278,13 +281,16 @@ class DataHandlerSingleStation(AbstractDataHandler): def setup_data_path(self, data_path: str, sampling: str): return os.path.join(os.path.abspath(data_path), sampling) - def shift(self, data: xr.DataArray, dim: str, window: int) -> xr.DataArray: + def shift(self, data: xr.DataArray, dim: str, window: int, offset: int = 0) -> xr.DataArray: """ Shift data multiple times to represent history (if window <= 0) or lead time (if window > 0). :param data: data set to shift :param dim: dimension along shift is applied :param window: number of steps to shift (corresponds to the window length) + :param offset: use offset to move the window by as many time steps as given in offset. This can be used, if the + index time of a history element is not the last timestamp. E.g. you could use offset=23 when dealing with + hourly data in combination with daily data (values from 00 to 23 are aggregated on 00 the same day). :return: shifted data """ @@ -295,9 +301,10 @@ class DataHandlerSingleStation(AbstractDataHandler): else: end = window + 1 res = [] - for w in range(start, end): + _range = list(map(lambda x: x + offset, range(start, end))) + for w in _range: res.append(data.shift({dim: -w})) - window_array = self.create_index_array('window', range(start, end), squeeze_dim=self.target_dim) + window_array = self.create_index_array('window', _range, squeeze_dim=self.target_dim) res = xr.concat(res, dim=window_array) return res @@ -387,7 +394,7 @@ class DataHandlerSingleStation(AbstractDataHandler): """ window = -abs(window) data = self.input_data.data - self.history = self.shift(data, dim_name_of_shift, window) + self.history = self.shift(data, dim_name_of_shift, window, offset=self.window_history_offset) def make_labels(self, dim_name_of_target: str, target_var: str_or_list, dim_name_of_shift: str, window: int) -> None: diff --git a/run_mixed_sampling.py b/run_mixed_sampling.py index a87e9f38..dbc94ef9 100644 --- a/run_mixed_sampling.py +++ b/run_mixed_sampling.py @@ -12,8 +12,9 @@ def main(parser_args): args = dict(sampling="daily", sampling_inputs="hourly", window_history_size=24, + window_history_offset=17, **parser_args.__dict__, - data_handler=DataHandlerSeparationOfScales, + data_handler=DataHandlerMixedSampling, kz_filter_length=[100 * 24, 15 * 24], kz_filter_iter=[4, 5], start="2006-01-01", @@ -21,7 +22,7 @@ def main(parser_args): end="2011-12-31", test_end="2011-12-31", stations=["DEBW107", "DEBW013"], - epochs=100, + epochs=1, network="UBA", ) workflow = DefaultWorkflow(**args) -- GitLab