Skip to content
Snippets Groups Projects

Resolve "load era5 data from toar db"

Merged Ghost User requested to merge lukas_issue449_refac_load-era5-data-from-toar-db into develop
Files
13
@@ -336,7 +336,7 @@ class DataHandlerSingleStation(AbstractDataHandler):
os.remove(file_name)
if os.path.exists(meta_file):
os.remove(meta_file)
data, meta = self.download_data(file_name, meta_file, station, statistics_per_var, sampling,
data, meta = data_sources.download_data(file_name, meta_file, station, statistics_per_var, sampling,
store_data_locally=store_data_locally, data_origin=data_origin,
time_dim=self.time_dim, target_dim=self.target_dim, iter_dim=self.iter_dim)
logging.debug(f"loaded new data")
@@ -350,75 +350,16 @@ class DataHandlerSingleStation(AbstractDataHandler):
except FileNotFoundError as e:
logging.debug(e)
logging.debug(f"load new data")
data, meta = self.download_data(file_name, meta_file, station, statistics_per_var, sampling,
store_data_locally=store_data_locally, data_origin=data_origin,
time_dim=self.time_dim, target_dim=self.target_dim,
iter_dim=self.iter_dim)
data, meta = data_sources.download_data(file_name, meta_file, station, statistics_per_var, sampling,
store_data_locally=store_data_locally, data_origin=data_origin,
time_dim=self.time_dim, target_dim=self.target_dim,
iter_dim=self.iter_dim)
logging.debug("loading finished")
# create slices and check for negative concentration.
data = self._slice_prep(data, start=start, end=end)
data = self.check_for_negative_concentrations(data)
return data, meta
def download_data(self, file_name: str, meta_file: str, station, statistics_per_var, sampling,
store_data_locally=True, data_origin: Dict = None, time_dim=DEFAULT_TIME_DIM,
target_dim=DEFAULT_TARGET_DIM, iter_dim=DEFAULT_ITER_DIM) -> [xr.DataArray, pd.DataFrame]:
"""
Download data from TOAR database using the JOIN interface or load local era5 data.
Data is transformed to a xarray dataset. If class attribute store_data_locally is true, data is additionally
stored locally using given names for file and meta file.
:param file_name: name of file to save data to (containing full path)
:param meta_file: name of the meta data file (also containing full path)
:return: downloaded data and its meta data
"""
df_all = {}
df_era5, df_toar = None, None
meta_era5, meta_toar = None, None
if data_origin is not None:
era5_origin = filter_dict_by_value(data_origin, "era5", True)
era5_stats = select_from_dict(statistics_per_var, era5_origin.keys())
toar_origin = filter_dict_by_value(data_origin, "era5", False)
toar_stats = select_from_dict(statistics_per_var, era5_origin.keys(), filter_cond=False)
assert len(era5_origin) + len(toar_origin) == len(data_origin)
assert len(era5_stats) + len(toar_stats) == len(statistics_per_var)
else:
era5_origin, toar_origin = None, None
era5_stats, toar_stats = statistics_per_var, statistics_per_var
# load data
if era5_origin is not None and len(era5_stats) > 0:
# load era5 data
df_era5, meta_era5 = data_sources.era5.load_era5(station_name=station, stat_var=era5_stats,
sampling=sampling, data_origin=era5_origin)
if toar_origin is None or len(toar_stats) > 0:
# load combined data from toar-data (v2 & v1)
df_toar, meta_toar = data_sources.toar_data.download_toar(station=station, toar_stats=toar_stats,
sampling=sampling, data_origin=toar_origin)
if df_era5 is None and df_toar is None:
raise data_sources.toar_data.EmptyQueryResult(f"No data available for era5 and toar-data")
df = pd.concat([df_era5, df_toar], axis=1, sort=True)
if meta_era5 is not None and meta_toar is not None:
meta = meta_era5.combine_first(meta_toar)
else:
meta = meta_era5 if meta_era5 is not None else meta_toar
meta.loc["data_origin"] = str(data_origin)
meta.loc["statistics_per_var"] = str(statistics_per_var)
df_all[station[0]] = df
# convert df_all to xarray
xarr = {k: xr.DataArray(v, dims=[time_dim, target_dim]) for k, v in df_all.items()}
xarr = xr.Dataset(xarr).to_array(dim=iter_dim)
if store_data_locally is True:
# save locally as nc/csv file
xarr.to_netcdf(path=file_name)
meta.to_csv(meta_file)
return xarr, meta
@staticmethod
def check_station_meta(meta, station, data_origin, statistics_per_var):
"""
Loading