diff --git a/mlair/data_handler/data_handler_kz_filter.py b/mlair/data_handler/data_handler_kz_filter.py index 78638a13b4ea50cd073ca4599a291342fad849d4..face8f3c400b702209c03fefd7818481a0fb2038 100644 --- a/mlair/data_handler/data_handler_kz_filter.py +++ b/mlair/data_handler/data_handler_kz_filter.py @@ -38,10 +38,7 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation): def _check_sampling(self, **kwargs): assert kwargs.get("sampling") == "hourly" # This data handler requires hourly data resolution - def setup_samples(self): - """ - Setup samples. This method prepares and creates samples X, and labels Y. - """ + def make_input_target(self): data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling, self.station_type, self.network, self.store_data_locally, self.data_origin) self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method, @@ -54,9 +51,6 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation): # import matplotlib.pyplot as plt # self.input_data.sel(filter="74d", variables="temp", Stations="DEBW107").plot() # self.input_data.sel(variables="temp", Stations="DEBW107").plot.line(hue="filter") - if self.do_transformation is True: - self.call_transform() - self.make_samples() @TimeTrackingWrapper def apply_kz_filter(self): @@ -88,6 +82,7 @@ class DataHandlerKzFilterSingleStation(DataHandlerSingleStation): return self.history.transpose(self.time_dim, self.window_dim, self.iter_dim, self.target_dim, self.filter_dim).copy() + class DataHandlerKzFilter(DefaultDataHandler): """Data handler using kz filtered data.""" diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py index caaa7a62d1b772808dcaf58abdfa5483e80861e7..ebcfbb4286f40ab2f8be2e1f8e46c7fa5ee45b14 100644 --- a/mlair/data_handler/data_handler_mixed_sampling.py +++ b/mlair/data_handler/data_handler_mixed_sampling.py @@ -54,15 +54,9 @@ class DataHandlerMixedSamplingSingleStation(DataHandlerSingleStation): assert len(parameter) == 2 # (inputs, targets) kwargs.update({parameter_name: parameter}) - def setup_samples(self): - """ - Setup samples. This method prepares and creates samples X, and labels Y. - """ + def make_input_target(self): self._data = list(map(self.load_and_interpolate, [0, 1])) # load input (0) and target (1) data self.set_inputs_and_targets() - if self.do_transformation is True: - self.call_transform() - self.make_samples() def load_and_interpolate(self, ind) -> [xr.DataArray, pd.DataFrame]: vars = [self.variables, self.target_var] @@ -104,19 +98,14 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi def _check_sampling(self, **kwargs): assert kwargs.get("sampling") == ("hourly", "daily") - def setup_samples(self): + def make_input_target(self): """ - Setup samples. This method prepares and creates samples X, and labels Y. - A KZ filter is applied on the input data that has hourly resolution. Lables Y are provided as aggregated values with daily resolution. """ self._data = list(map(self.load_and_interpolate, [0, 1])) # load input (0) and target (1) data self.set_inputs_and_targets() self.apply_kz_filter() - if self.do_transformation is True: - self.call_transform() - self.make_samples() def estimate_filter_width(self): """ diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py index a894c635282b5879d79426168eb96d64ff5fa2a2..820e601f25e1caa9b1860ed8f2f12efb1f0aa299 100644 --- a/mlair/data_handler/data_handler_single_station.py +++ b/mlair/data_handler/data_handler_single_station.py @@ -5,6 +5,7 @@ __date__ = '2020-07-20' import copy import datetime as dt +import hashlib import logging import os from functools import reduce @@ -54,10 +55,16 @@ class DataHandlerSingleStation(AbstractDataHandler): interpolation_limit: Union[int, Tuple[int]] = DEFAULT_INTERPOLATION_LIMIT, interpolation_method: Union[str, Tuple[str]] = DEFAULT_INTERPOLATION_METHOD, overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True, - min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None, **kwargs): + min_length: int = 0, start=None, end=None, variables=None, data_origin: Dict = None, + lazy_loading: bool = False, **kwargs): super().__init__() self.station = helpers.to_list(station) self.path = self.setup_data_path(data_path, sampling) + self.lazy = lazy_loading + self.lazy_path = None + if self.lazy is True: + self.lazy_path = os.path.join(data_path, "lazy_data", self.__class__.__name__) + check_path_and_create(self.lazy_path) self.statistics_per_var = statistics_per_var self.data_origin = data_origin self.do_transformation = transformation is not None @@ -94,6 +101,7 @@ class DataHandlerSingleStation(AbstractDataHandler): self.observation = None # create samples + # self.hash() self.setup_samples() def __str__(self): @@ -215,15 +223,18 @@ class DataHandlerSingleStation(AbstractDataHandler): """ Setup samples. This method prepares and creates samples X, and labels Y. """ + self.make_input_target() + if self.do_transformation is True: + self.call_transform() + self.make_samples() + + def make_input_target(self): data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling, self.station_type, self.network, self.store_data_locally, self.data_origin, self.start, self.end) self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method, limit=self.interpolation_limit) self.set_inputs_and_targets() - if self.do_transformation is True: - self.call_transform() - self.make_samples() def set_inputs_and_targets(self): inputs = self._data.sel({self.target_dim: helpers.to_list(self.variables)}) @@ -658,6 +669,17 @@ class DataHandlerSingleStation(AbstractDataHandler): return self.transform(data, dim=dim, opts=self._transformation[pos], inverse=inverse, transformation_dim=self.target_dim) + def _get_hash(self): + hash_list = [self.station, self.statistics_per_var, self.data_origin, self.station_type, self.network, + self.sampling, self.target_dim, self.target_var, self.time_dim, self.iter_dim, self.window_dim, + self.window_history_size, self.window_history_offset, self.window_lead_time, + self.interpolation_limit, self.interpolation_method, self.min_length, self.start, self.end] + + hash = "".join([str(e) for e in hash_list]).encode("utf-8") + m = hashlib.sha256() + m.update(hash) + return m.hexdigest() + if __name__ == "__main__": # dp = AbstractDataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})