From 24bc55c37b0b7362f7ce8510d37f528e3a5fc6f1 Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz.juelich.de> Date: Fri, 18 Oct 2019 13:11:08 +0200 Subject: [PATCH] use new join class to load data in data preparation, data directory is excluded from git tracking --- src/data_preparation.py | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/data_preparation.py b/src/data_preparation.py index ba8180f9..978315fd 100644 --- a/src/data_preparation.py +++ b/src/data_preparation.py @@ -6,11 +6,12 @@ import xarray as xr import pandas as pd import logging import os +from src import join class DataPrep: - def __init__(self, path, network, stations, variables, **kwargs): + def __init__(self, path: str, network: str, stations, variables, **kwargs): self.path = path self.network = network self.stations = stations @@ -19,26 +20,29 @@ class DataPrep: if self.statistics_per_var is not None: self.load_data() else: - self.data, self.meta = Fkf.read_hourly_data_from_csv_to_xarray(self.path, self.network, self.stations, - self.variables, **kwargs) + raise NotImplementedError + # self.data, self.meta = Fkf.read_hourly_data_from_csv_to_xarray(self.path, self.network, self.stations, + # self.variables, **kwargs) self.mean = None self.std = None self.df = None self.history = None self.label = None self.kwargs = kwargs + self.data = None + self.meta = None def load_data(self): self.check_path_and_create(self.path) - file_name = "{}{}_{}.nc".format(self.path, ''.join(self.stations), '_'.join(sorted(self.variables))) - meta_file = "{}{}_{}_meta.csv".format(self.path, ''.join(self.stations), '_'.join(sorted(self.variables))) + file_name = self._set_file_name() + meta_file = self._set_meta_file_name() try: self.data = xr.open_dataarray(file_name) self.meta = pd.read_csv(meta_file, index_col=0) - except FileExistsError as e: + except FileNotFoundError as e: logging.warning(e) df_all = {} - df, self.meta = Fkf.download_join(station_name=self.stations, statvar=self.statistics_per_var) + df, self.meta = join.download_join(station_name=self.stations, statvar=self.statistics_per_var) df_all[self.stations[0]] = df # convert df_all to xarray xarr = {k: xr.DataArray(v, dims=['datetime', 'variables']) for k, v in df_all.items()} @@ -48,18 +52,18 @@ class DataPrep: xarr.to_netcdf(path=file_name) self.meta.to_csv(meta_file) + def _set_file_name(self): + return f"{self.path}{''.join(self.stations)}_{'_'.join(sorted(self.variables))}.nc" + + def _set_meta_file_name(self): + return f"{self.path}{''.join(self.stations)}_{'_'.join(sorted(self.variables))}_meta.csv" + def __repr__(self): - return "DataPrep(path='{}', network='{}', stations={}, variables={}, **{}".format(self.path, self.network, - self.stations, self.variables, - self.kwargs) + return f"Dataprep(path='{self.path}', network='{self.network}', stations={self.stations}, " \ + f"variables={self.variables}, **{self.kwargs}" @staticmethod def check_path_and_create(path): - """ - - :param path: - :return: - """ try: os.makedirs(path) logging.info("Created path: {}".format(path)) @@ -91,3 +95,8 @@ class DataPrep: def create_indexarray(index_name, index_values): raise NotImplementedError + +if __name__ == "__main__": + + dp = DataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) + print(dp) -- GitLab