From b4c6d786372648f985efdcaa60b14728969223ef Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz.juelich.de> Date: Mon, 21 Oct 2019 10:42:46 +0200 Subject: [PATCH] rename stations to station, create helpers module for general functions --- .gitignore | 5 +++-- src/data_preparation.py | 28 ++++++++++++++-------------- src/helpers.py | 8 ++++++++ 3 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 src/helpers.py diff --git a/.gitignore b/.gitignore index 56b52c1e..22113e4e 100644 --- a/.gitignore +++ b/.gitignore @@ -40,8 +40,9 @@ Thumbs.db .idea/ /venv/ -# check plot folder # -##################### +# don't check data and plot folder # +#################################### +/data/ /plots/ # tmp folder # diff --git a/src/data_preparation.py b/src/data_preparation.py index 978315fd..cc2abdf1 100644 --- a/src/data_preparation.py +++ b/src/data_preparation.py @@ -6,22 +6,23 @@ import xarray as xr import pandas as pd import logging import os -from src import join +from src import join, helpers +from typing import Union, List class DataPrep: - def __init__(self, path: str, network: str, stations, variables, **kwargs): + def __init__(self, path: str, network: str, station: Union[str, List[str]], variables, **kwargs): self.path = path self.network = network - self.stations = stations + self.station = helpers.to_list(station) self.variables = variables self.statistics_per_var = kwargs.get("statistics_per_var", None) if self.statistics_per_var is not None: self.load_data() else: raise NotImplementedError - # self.data, self.meta = Fkf.read_hourly_data_from_csv_to_xarray(self.path, self.network, self.stations, + # self.data, self.meta = Fkf.read_hourly_data_from_csv_to_xarray(self.path, self.network, self.station, # self.variables, **kwargs) self.mean = None self.std = None @@ -33,7 +34,7 @@ class DataPrep: self.meta = None def load_data(self): - self.check_path_and_create(self.path) + self.check_path_and_create() file_name = self._set_file_name() meta_file = self._set_meta_file_name() try: @@ -42,8 +43,8 @@ class DataPrep: except FileNotFoundError as e: logging.warning(e) df_all = {} - df, self.meta = join.download_join(station_name=self.stations, statvar=self.statistics_per_var) - df_all[self.stations[0]] = df + df, self.meta = join.download_join(station_name=self.station, statvar=self.statistics_per_var) + df_all[self.station[0]] = df # convert df_all to xarray xarr = {k: xr.DataArray(v, dims=['datetime', 'variables']) for k, v in df_all.items()} xarr = xr.Dataset(xarr).to_array(dim='Stations') @@ -53,20 +54,19 @@ class DataPrep: self.meta.to_csv(meta_file) def _set_file_name(self): - return f"{self.path}{''.join(self.stations)}_{'_'.join(sorted(self.variables))}.nc" + return f"{self.path}{''.join(self.station)}_{'_'.join(sorted(self.variables))}.nc" def _set_meta_file_name(self): - return f"{self.path}{''.join(self.stations)}_{'_'.join(sorted(self.variables))}_meta.csv" + return f"{self.path}{''.join(self.station)}_{'_'.join(sorted(self.variables))}_meta.csv" def __repr__(self): - return f"Dataprep(path='{self.path}', network='{self.network}', stations={self.stations}, " \ + return f"Dataprep(path='{self.path}', network='{self.network}', station={self.station}, " \ f"variables={self.variables}, **{self.kwargs}" - @staticmethod - def check_path_and_create(path): + def check_path_and_create(self): try: - os.makedirs(path) - logging.info("Created path: {}".format(path)) + os.makedirs(self.path) + logging.info("Created path: {}".format(self.path)) except FileExistsError: pass diff --git a/src/helpers.py b/src/helpers.py new file mode 100644 index 00000000..424b2fb5 --- /dev/null +++ b/src/helpers.py @@ -0,0 +1,8 @@ +__author__ = 'Lukas Leufen' +__date__ = '2019-10-21' + + +def to_list(arg): + if not isinstance(arg, list): + arg = [arg] + return arg -- GitLab