Skip to content
Snippets Groups Projects
Commit dd024db9 authored by leufen1's avatar leufen1
Browse files

introduce window_history_offset parameter

parent 5a13a1b7
No related branches found
No related tags found
3 merge requests!226Develop,!225Resolve "release v1.2.0",!207Resolve "REFAC: history for mixed sampling data handler"
Pipeline #54874 passed
...@@ -34,6 +34,7 @@ DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'max ...@@ -34,6 +34,7 @@ DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'max
'pblheight': 'maximum'} 'pblheight': 'maximum'}
DEFAULT_WINDOW_LEAD_TIME = 3 DEFAULT_WINDOW_LEAD_TIME = 3
DEFAULT_WINDOW_HISTORY_SIZE = 13 DEFAULT_WINDOW_HISTORY_SIZE = 13
DEFAULT_WINDOW_HISTORY_OFFSET = 0
DEFAULT_TIME_DIM = "datetime" DEFAULT_TIME_DIM = "datetime"
DEFAULT_TARGET_VAR = "o3" DEFAULT_TARGET_VAR = "o3"
DEFAULT_TARGET_DIM = "variables" DEFAULT_TARGET_DIM = "variables"
...@@ -46,7 +47,8 @@ class DataHandlerSingleStation(AbstractDataHandler): ...@@ -46,7 +47,8 @@ class DataHandlerSingleStation(AbstractDataHandler):
def __init__(self, station, data_path, statistics_per_var, station_type=DEFAULT_STATION_TYPE, def __init__(self, station, data_path, statistics_per_var, station_type=DEFAULT_STATION_TYPE,
network=DEFAULT_NETWORK, sampling=DEFAULT_SAMPLING, target_dim=DEFAULT_TARGET_DIM, network=DEFAULT_NETWORK, sampling=DEFAULT_SAMPLING, target_dim=DEFAULT_TARGET_DIM,
target_var=DEFAULT_TARGET_VAR, time_dim=DEFAULT_TIME_DIM, target_var=DEFAULT_TARGET_VAR, time_dim=DEFAULT_TIME_DIM,
window_history_size=DEFAULT_WINDOW_HISTORY_SIZE, window_lead_time=DEFAULT_WINDOW_LEAD_TIME, window_history_size=DEFAULT_WINDOW_HISTORY_SIZE, window_history_offset=DEFAULT_WINDOW_HISTORY_OFFSET,
window_lead_time=DEFAULT_WINDOW_LEAD_TIME,
interpolation_limit: int = 0, interpolation_method: str = DEFAULT_INTERPOLATION_METHOD, interpolation_limit: int = 0, interpolation_method: str = DEFAULT_INTERPOLATION_METHOD,
overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True, overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True,
min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None, **kwargs): min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None, **kwargs):
...@@ -65,6 +67,7 @@ class DataHandlerSingleStation(AbstractDataHandler): ...@@ -65,6 +67,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
self.target_var = target_var self.target_var = target_var
self.time_dim = time_dim self.time_dim = time_dim
self.window_history_size = window_history_size self.window_history_size = window_history_size
self.window_history_offset = window_history_offset
self.window_lead_time = window_lead_time self.window_lead_time = window_lead_time
self.interpolation_limit = interpolation_limit self.interpolation_limit = interpolation_limit
...@@ -278,13 +281,16 @@ class DataHandlerSingleStation(AbstractDataHandler): ...@@ -278,13 +281,16 @@ class DataHandlerSingleStation(AbstractDataHandler):
def setup_data_path(self, data_path: str, sampling: str): def setup_data_path(self, data_path: str, sampling: str):
return os.path.join(os.path.abspath(data_path), sampling) return os.path.join(os.path.abspath(data_path), sampling)
def shift(self, data: xr.DataArray, dim: str, window: int) -> xr.DataArray: def shift(self, data: xr.DataArray, dim: str, window: int, offset: int = 0) -> xr.DataArray:
""" """
Shift data multiple times to represent history (if window <= 0) or lead time (if window > 0). Shift data multiple times to represent history (if window <= 0) or lead time (if window > 0).
:param data: data set to shift :param data: data set to shift
:param dim: dimension along shift is applied :param dim: dimension along shift is applied
:param window: number of steps to shift (corresponds to the window length) :param window: number of steps to shift (corresponds to the window length)
:param offset: use offset to move the window by as many time steps as given in offset. This can be used, if the
index time of a history element is not the last timestamp. E.g. you could use offset=23 when dealing with
hourly data in combination with daily data (values from 00 to 23 are aggregated on 00 the same day).
:return: shifted data :return: shifted data
""" """
...@@ -295,9 +301,10 @@ class DataHandlerSingleStation(AbstractDataHandler): ...@@ -295,9 +301,10 @@ class DataHandlerSingleStation(AbstractDataHandler):
else: else:
end = window + 1 end = window + 1
res = [] res = []
for w in range(start, end): _range = list(map(lambda x: x + offset, range(start, end)))
for w in _range:
res.append(data.shift({dim: -w})) res.append(data.shift({dim: -w}))
window_array = self.create_index_array('window', range(start, end), squeeze_dim=self.target_dim) window_array = self.create_index_array('window', _range, squeeze_dim=self.target_dim)
res = xr.concat(res, dim=window_array) res = xr.concat(res, dim=window_array)
return res return res
...@@ -387,7 +394,7 @@ class DataHandlerSingleStation(AbstractDataHandler): ...@@ -387,7 +394,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
""" """
window = -abs(window) window = -abs(window)
data = self.input_data.data data = self.input_data.data
self.history = self.shift(data, dim_name_of_shift, window) self.history = self.shift(data, dim_name_of_shift, window, offset=self.window_history_offset)
def make_labels(self, dim_name_of_target: str, target_var: str_or_list, dim_name_of_shift: str, def make_labels(self, dim_name_of_target: str, target_var: str_or_list, dim_name_of_shift: str,
window: int) -> None: window: int) -> None:
......
...@@ -12,8 +12,9 @@ def main(parser_args): ...@@ -12,8 +12,9 @@ def main(parser_args):
args = dict(sampling="daily", args = dict(sampling="daily",
sampling_inputs="hourly", sampling_inputs="hourly",
window_history_size=24, window_history_size=24,
window_history_offset=17,
**parser_args.__dict__, **parser_args.__dict__,
data_handler=DataHandlerSeparationOfScales, data_handler=DataHandlerMixedSampling,
kz_filter_length=[100 * 24, 15 * 24], kz_filter_length=[100 * 24, 15 * 24],
kz_filter_iter=[4, 5], kz_filter_iter=[4, 5],
start="2006-01-01", start="2006-01-01",
...@@ -21,7 +22,7 @@ def main(parser_args): ...@@ -21,7 +22,7 @@ def main(parser_args):
end="2011-12-31", end="2011-12-31",
test_end="2011-12-31", test_end="2011-12-31",
stations=["DEBW107", "DEBW013"], stations=["DEBW107", "DEBW013"],
epochs=100, epochs=1,
network="UBA", network="UBA",
) )
workflow = DefaultWorkflow(**args) workflow = DefaultWorkflow(**args)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment