diff --git a/.gitignore b/.gitignore index 56b52c1e8a076839cfd155613b04a95ffaa19541..22113e4edc78c3bd956b15cd31b80ecc26754c46 100644 --- a/.gitignore +++ b/.gitignore @@ -40,8 +40,9 @@ Thumbs.db .idea/ /venv/ -# check plot folder # -##################### +# don't check data and plot folder # +#################################### +/data/ /plots/ # tmp folder # diff --git a/src/data_preparation.py b/src/data_preparation.py index 978315fd369d5b91141ce26ad4291fd90a9db71b..cc2abdf1f0d3bd1fe55fc69d2ace9eb42ead2038 100644 --- a/src/data_preparation.py +++ b/src/data_preparation.py @@ -6,22 +6,23 @@ import xarray as xr import pandas as pd import logging import os -from src import join +from src import join, helpers +from typing import Union, List class DataPrep: - def __init__(self, path: str, network: str, stations, variables, **kwargs): + def __init__(self, path: str, network: str, station: Union[str, List[str]], variables, **kwargs): self.path = path self.network = network - self.stations = stations + self.station = helpers.to_list(station) self.variables = variables self.statistics_per_var = kwargs.get("statistics_per_var", None) if self.statistics_per_var is not None: self.load_data() else: raise NotImplementedError - # self.data, self.meta = Fkf.read_hourly_data_from_csv_to_xarray(self.path, self.network, self.stations, + # self.data, self.meta = Fkf.read_hourly_data_from_csv_to_xarray(self.path, self.network, self.station, # self.variables, **kwargs) self.mean = None self.std = None @@ -33,7 +34,7 @@ class DataPrep: self.meta = None def load_data(self): - self.check_path_and_create(self.path) + self.check_path_and_create() file_name = self._set_file_name() meta_file = self._set_meta_file_name() try: @@ -42,8 +43,8 @@ class DataPrep: except FileNotFoundError as e: logging.warning(e) df_all = {} - df, self.meta = join.download_join(station_name=self.stations, statvar=self.statistics_per_var) - df_all[self.stations[0]] = df + df, self.meta = join.download_join(station_name=self.station, statvar=self.statistics_per_var) + df_all[self.station[0]] = df # convert df_all to xarray xarr = {k: xr.DataArray(v, dims=['datetime', 'variables']) for k, v in df_all.items()} xarr = xr.Dataset(xarr).to_array(dim='Stations') @@ -53,20 +54,19 @@ class DataPrep: self.meta.to_csv(meta_file) def _set_file_name(self): - return f"{self.path}{''.join(self.stations)}_{'_'.join(sorted(self.variables))}.nc" + return f"{self.path}{''.join(self.station)}_{'_'.join(sorted(self.variables))}.nc" def _set_meta_file_name(self): - return f"{self.path}{''.join(self.stations)}_{'_'.join(sorted(self.variables))}_meta.csv" + return f"{self.path}{''.join(self.station)}_{'_'.join(sorted(self.variables))}_meta.csv" def __repr__(self): - return f"Dataprep(path='{self.path}', network='{self.network}', stations={self.stations}, " \ + return f"Dataprep(path='{self.path}', network='{self.network}', station={self.station}, " \ f"variables={self.variables}, **{self.kwargs}" - @staticmethod - def check_path_and_create(path): + def check_path_and_create(self): try: - os.makedirs(path) - logging.info("Created path: {}".format(path)) + os.makedirs(self.path) + logging.info("Created path: {}".format(self.path)) except FileExistsError: pass diff --git a/src/helpers.py b/src/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..424b2fb519726adde9d8d30fb610379f9b4dfed3 --- /dev/null +++ b/src/helpers.py @@ -0,0 +1,8 @@ +__author__ = 'Lukas Leufen' +__date__ = '2019-10-21' + + +def to_list(arg): + if not isinstance(arg, list): + arg = [arg] + return arg