Skip to content
Snippets Groups Projects
Commit 24bc55c3 authored by leufen1's avatar leufen1
Browse files

use new join class to load data in data preparation, data directory is excluded from git tracking

parent 9f562b91
No related branches found
No related tags found
2 merge requests!6updated inception model and data prep class,!4data prep class
...@@ -6,11 +6,12 @@ import xarray as xr ...@@ -6,11 +6,12 @@ import xarray as xr
import pandas as pd import pandas as pd
import logging import logging
import os import os
from src import join
class DataPrep: class DataPrep:
def __init__(self, path, network, stations, variables, **kwargs): def __init__(self, path: str, network: str, stations, variables, **kwargs):
self.path = path self.path = path
self.network = network self.network = network
self.stations = stations self.stations = stations
...@@ -19,26 +20,29 @@ class DataPrep: ...@@ -19,26 +20,29 @@ class DataPrep:
if self.statistics_per_var is not None: if self.statistics_per_var is not None:
self.load_data() self.load_data()
else: else:
self.data, self.meta = Fkf.read_hourly_data_from_csv_to_xarray(self.path, self.network, self.stations, raise NotImplementedError
self.variables, **kwargs) # self.data, self.meta = Fkf.read_hourly_data_from_csv_to_xarray(self.path, self.network, self.stations,
# self.variables, **kwargs)
self.mean = None self.mean = None
self.std = None self.std = None
self.df = None self.df = None
self.history = None self.history = None
self.label = None self.label = None
self.kwargs = kwargs self.kwargs = kwargs
self.data = None
self.meta = None
def load_data(self): def load_data(self):
self.check_path_and_create(self.path) self.check_path_and_create(self.path)
file_name = "{}{}_{}.nc".format(self.path, ''.join(self.stations), '_'.join(sorted(self.variables))) file_name = self._set_file_name()
meta_file = "{}{}_{}_meta.csv".format(self.path, ''.join(self.stations), '_'.join(sorted(self.variables))) meta_file = self._set_meta_file_name()
try: try:
self.data = xr.open_dataarray(file_name) self.data = xr.open_dataarray(file_name)
self.meta = pd.read_csv(meta_file, index_col=0) self.meta = pd.read_csv(meta_file, index_col=0)
except FileExistsError as e: except FileNotFoundError as e:
logging.warning(e) logging.warning(e)
df_all = {} df_all = {}
df, self.meta = Fkf.download_join(station_name=self.stations, statvar=self.statistics_per_var) df, self.meta = join.download_join(station_name=self.stations, statvar=self.statistics_per_var)
df_all[self.stations[0]] = df df_all[self.stations[0]] = df
# convert df_all to xarray # convert df_all to xarray
xarr = {k: xr.DataArray(v, dims=['datetime', 'variables']) for k, v in df_all.items()} xarr = {k: xr.DataArray(v, dims=['datetime', 'variables']) for k, v in df_all.items()}
...@@ -48,18 +52,18 @@ class DataPrep: ...@@ -48,18 +52,18 @@ class DataPrep:
xarr.to_netcdf(path=file_name) xarr.to_netcdf(path=file_name)
self.meta.to_csv(meta_file) self.meta.to_csv(meta_file)
def _set_file_name(self):
return f"{self.path}{''.join(self.stations)}_{'_'.join(sorted(self.variables))}.nc"
def _set_meta_file_name(self):
return f"{self.path}{''.join(self.stations)}_{'_'.join(sorted(self.variables))}_meta.csv"
def __repr__(self): def __repr__(self):
return "DataPrep(path='{}', network='{}', stations={}, variables={}, **{}".format(self.path, self.network, return f"Dataprep(path='{self.path}', network='{self.network}', stations={self.stations}, " \
self.stations, self.variables, f"variables={self.variables}, **{self.kwargs}"
self.kwargs)
@staticmethod @staticmethod
def check_path_and_create(path): def check_path_and_create(path):
"""
:param path:
:return:
"""
try: try:
os.makedirs(path) os.makedirs(path)
logging.info("Created path: {}".format(path)) logging.info("Created path: {}".format(path))
...@@ -91,3 +95,8 @@ class DataPrep: ...@@ -91,3 +95,8 @@ class DataPrep:
def create_indexarray(index_name, index_values): def create_indexarray(index_name, index_values):
raise NotImplementedError raise NotImplementedError
if __name__ == "__main__":
dp = DataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
print(dp)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment