Skip to content
Snippets Groups Projects
Commit b10bca2b authored by leufen1's avatar leufen1
Browse files

data handler single station has now a submethod make_input_target

parent 7aefc11f
Branches
Tags
8 merge requests!319add all changes of dev into release v1.4.0 branch,!318Resolve "release v1.4.0",!283Merge latest develop into falcos issue,!279include Develop,!278Felix issue295 transformation parameters in data handler,!275include lazy preprocessing,!274Resolve "implement lazy data preprocessing",!259Draft: Resolve "WRF-Datahandler should inherit from SingleStationDatahandler"
Pipeline #63200 failed
This commit is part of merge request !318. Comments created here will be created in the context of that merge request.
......@@ -38,10 +38,7 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
def _check_sampling(self, **kwargs):
assert kwargs.get("sampling") == "hourly" # This data handler requires hourly data resolution
def setup_samples(self):
"""
Setup samples. This method prepares and creates samples X, and labels Y.
"""
def make_input_target(self):
data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
self.station_type, self.network, self.store_data_locally, self.data_origin)
self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
......@@ -54,9 +51,6 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
# import matplotlib.pyplot as plt
# self.input_data.sel(filter="74d", variables="temp", Stations="DEBW107").plot()
# self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter")
if self.do_transformation is True:
self.call_transform()
self.make_samples()
@TimeTrackingWrapper
def apply_kz_filter(self):
......@@ -88,6 +82,7 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation):
return self.history.transpose(self.time_dim, self.window_dim, self.iter_dim, self.target_dim,
self.filter_dim).copy()
class DataHandlerKzFilter(DefaultDataHandler):
"""Data handler using kz filtered data."""
......
......@@ -54,15 +54,9 @@ class DataHandlerMixedSamplingSingleStation(DataHandlerSingleStation):
assert len(parameter) == 2 # (inputs, targets)
kwargs.update({parameter_name: parameter})
def setup_samples(self):
"""
Setup samples. This method prepares and creates samples X, and labels Y.
"""
def make_input_target(self):
self._data = list(map(self.load_and_interpolate, [0, 1])) # load input (0) and target (1) data
self.set_inputs_and_targets()
if self.do_transformation is True:
self.call_transform()
self.make_samples()
def load_and_interpolate(self, ind) -> [xr.DataArray, pd.DataFrame]:
vars = [self.variables, self.target_var]
......@@ -104,19 +98,14 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
def _check_sampling(self, **kwargs):
assert kwargs.get("sampling") == ("hourly", "daily")
def setup_samples(self):
def make_input_target(self):
"""
Setup samples. This method prepares and creates samples X, and labels Y.
A KZ filter is applied on the input data that has hourly resolution. Lables Y are provided as aggregated values
with daily resolution.
"""
self._data = list(map(self.load_and_interpolate, [0, 1])) # load input (0) and target (1) data
self.set_inputs_and_targets()
self.apply_kz_filter()
if self.do_transformation is True:
self.call_transform()
self.make_samples()
def estimate_filter_width(self):
"""
......
......@@ -5,6 +5,7 @@ __date__ = '2020-07-20'
import copy
import datetime as dt
import hashlib
import logging
import os
from functools import reduce
......@@ -54,10 +55,16 @@ class DataHandlerSingleStation(AbstractDataHandler):
interpolation_limit: Union[int, Tuple[int]] = DEFAULT_INTERPOLATION_LIMIT,
interpolation_method: Union[str, Tuple[str]] = DEFAULT_INTERPOLATION_METHOD,
overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True,
min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None, **kwargs):
min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None,
lazy_loading: bool = False, **kwargs):
super().__init__()
self.station = helpers.to_list(station)
self.path = self.setup_data_path(data_path, sampling)
self.lazy = lazy_loading
self.lazy_path = None
if self.lazy is True:
self.lazy_path = os.path.join(data_path, "lazy_data", self.__class__.__name__)
check_path_and_create(self.lazy_path)
self.statistics_per_var = statistics_per_var
self.data_origin = data_origin
self.do_transformation = transformation is not None
......@@ -94,6 +101,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
self.observation = None
# create samples
# self.hash()
self.setup_samples()
def __str__(self):
......@@ -215,15 +223,18 @@ class DataHandlerSingleStation(AbstractDataHandler):
"""
Setup samples. This method prepares and creates samples X, and labels Y.
"""
self.make_input_target()
if self.do_transformation is True:
self.call_transform()
self.make_samples()
def make_input_target(self):
data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
self.station_type, self.network, self.store_data_locally, self.data_origin,
self.start, self.end)
self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
limit=self.interpolation_limit)
self.set_inputs_and_targets()
if self.do_transformation is True:
self.call_transform()
self.make_samples()
def set_inputs_and_targets(self):
inputs = self._data.sel({self.target_dim: helpers.to_list(self.variables)})
......@@ -658,6 +669,17 @@ class DataHandlerSingleStation(AbstractDataHandler):
return self.transform(data, dim=dim, opts=self._transformation[pos], inverse=inverse,
transformation_dim=self.target_dim)
def _get_hash(self):
hash_list = [self.station, self.statistics_per_var, self.data_origin, self.station_type, self.network,
self.sampling, self.target_dim, self.target_var, self.time_dim, self.iter_dim, self.window_dim,
self.window_history_size, self.window_history_offset, self.window_lead_time,
self.interpolation_limit, self.interpolation_method, self.min_length, self.start, self.end]
hash = "".join([str(e) for e in hash_list]).encode("utf-8")
m = hashlib.sha256()
m.update(hash)
return m.hexdigest()
if __name__ == "__main__":
# dp = AbstractDataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment