From 26f0665220b6de966a4070413577a2e7f07bede5 Mon Sep 17 00:00:00 2001 From: Felix Kleinert <f.kleinert@fz-juelich.de> Date: Wed, 24 Aug 2022 18:20:09 +0200 Subject: [PATCH] update toar_data_v2 to process multiple stats per variable --- mlair/helpers/data_sources/join.py | 2 +- mlair/helpers/data_sources/toar_data_v2.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mlair/helpers/data_sources/join.py b/mlair/helpers/data_sources/join.py index 279585bf..30ba3196 100644 --- a/mlair/helpers/data_sources/join.py +++ b/mlair/helpers/data_sources/join.py @@ -336,7 +336,7 @@ def _select_distinct_data_origin(vars: List[Dict], data_origin: Dict) -> (Dict[s return selected, data_origin -def _save_to_pandas(df: Union[pd.DataFrame, None], data: dict, stat: str, var: str) -> pd.DataFrame: +def _save_to_pandas(df: Union[pd.DataFrame, None], data: dict, stat: Union[str, list], var: str) -> pd.DataFrame: """ Save given data in data frame. diff --git a/mlair/helpers/data_sources/toar_data_v2.py b/mlair/helpers/data_sources/toar_data_v2.py index 0fa53a7e..0d46229b 100644 --- a/mlair/helpers/data_sources/toar_data_v2.py +++ b/mlair/helpers/data_sources/toar_data_v2.py @@ -72,7 +72,8 @@ def download_toar(station_name: Union[str, List[str]], stat_var: dict, for var_meta, opts in meta_and_opts: data_var.extend(load_timeseries_data(var_meta, data_url_base, opts, headers, sampling)) data_dict[var] = merge_data(*data_var, sampling=sampling) - data = pd.DataFrame.from_dict(data_dict) + # data = pd.DataFrame.from_dict(data_dict) + data = pd.concat(data_dict.values(), axis=1) data = correct_timezone(data, station_meta, sampling) meta = combine_meta_data(station_meta, {k: v[0] for k, v in timeseries_meta.items()}) @@ -123,7 +124,7 @@ def prepare_meta(meta, sampling, stat_var, var): if sampling == "daily": opts["timeseries_id"] = m.pop("id") m["id"] = None - opts["names"] = stat_var[var] + opts["names"] = ','.join(to_list(stat_var[var])) opts["sampling"] = sampling out.append(([m], opts)) return out @@ -174,7 +175,8 @@ def load_timeseries_data(timeseries_meta, url_base, opts, headers, sampling): data = pd.read_csv(StringIO(res), comment="#", index_col="datetime", parse_dates=True, infer_datetime_format=True) if len(data.index) > 0: - data = data[correct_stat_name(opts.get("names", "value"))].rename(meta["variable"]["name"]) + stat_name = [correct_stat_name(s) for s in to_list(opts.get("names", "value").split(","))] + data.columns = [f'{meta["variable"]["name"]}_{stat}' for stat in stat_name] coll.append(data) return coll -- GitLab