diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py index eaa6a21175bd5f88c32c9c3cb74947c0cc0956a3..50f3928dc8d924dc9588d99c702283d72b3571b7 100644 --- a/mlair/data_handler/data_handler_mixed_sampling.py +++ b/mlair/data_handler/data_handler_mixed_sampling.py @@ -7,7 +7,7 @@ from mlair.data_handler.data_handler_with_filter import DataHandlerFirFilterSing from mlair.data_handler.data_handler_with_filter import DataHandlerClimateFirFilter, DataHandlerFirFilter from mlair.data_handler import DefaultDataHandler from mlair import helpers -from mlair.helpers import to_list, sort_like +from mlair.helpers import to_list, sort_like, data_sources from mlair.configuration.defaults import DEFAULT_SAMPLING, DEFAULT_INTERPOLATION_LIMIT, DEFAULT_INTERPOLATION_METHOD from mlair.helpers.filter import filter_width_kzf @@ -35,6 +35,26 @@ class DataHandlerMixedSamplingSingleStation(DataHandlerSingleStation): self.update_kwargs("interpolation_method", DEFAULT_INTERPOLATION_METHOD, kwargs) super().__init__(*args, **kwargs) + def _set_variables(self, sampling, statistics_per_var, variables): + vars_i = self._get_updated_vars_for_init(sampling[0], statistics_per_var, variables) + # vars_o = self._get_updated_vars_for_init(sampling[1], statistics_per_var, variables) + # self.variables = [vars_i, vars_o] + self.variables = vars_i + + @property + def target_var(self): + """ + Combine target var and corresponding first statistics to create combined targetvar name + :return: + :rtype: + """ + assert len(self.sampling) == 2 + if self.sampling[-1] == "hourly": + t_var = self._target_var + else: + t_var = data_sources.get_single_var_with_stat_name(self.statistics_per_var, self._target_var) + return t_var + @staticmethod def update_kwargs(parameter_name: str, default: Any, kwargs: dict): """ @@ -60,7 +80,7 @@ class DataHandlerMixedSamplingSingleStation(DataHandlerSingleStation): self.set_inputs_and_targets() def load_and_interpolate(self, ind) -> [xr.DataArray, pd.DataFrame]: - vars = [self.variables, self.target_var] + vars = [self.variables, self._target_var] stats_per_var = helpers.select_from_dict(self.statistics_per_var, vars[ind]) data, self.meta = self.load_data(self.path[ind], self.station, stats_per_var, self.sampling[ind], self.store_data_locally, self.data_origin, self.start, self.end) diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py index b4f7073337f36058c209403fa563ec976b6eec79..afd0631d4980e049ab95743f51207632d10fbf67 100644 --- a/mlair/data_handler/data_handler_single_station.py +++ b/mlair/data_handler/data_handler_single_station.py @@ -116,7 +116,7 @@ class DataHandlerSingleStation(AbstractDataHandler): # ) # # else: - self.variables = sorted(data_sources.get_vars_with_stat_name(statistics_per_var, variables=variables)) + self._set_variables(sampling, statistics_per_var, variables) self.history = None self.label = None @@ -128,6 +128,18 @@ class DataHandlerSingleStation(AbstractDataHandler): self.setup_samples() self.clean_up() + def _set_variables(self, sampling, statistics_per_var, variables): + self.variables = self._get_updated_vars_for_init(sampling, statistics_per_var, variables) + + @staticmethod + def _get_updated_vars_for_init(sampling, statistics_per_var, variables): + if sampling == "hourly": + variables = sorted(list(statistics_per_var.keys())) if variables is None else variables + else: + variables = sorted(data_sources.get_vars_with_stat_name(statistics_per_var, variables=variables)) + return variables + + def clean_up(self): self._data = None self.input_data = None @@ -330,7 +342,10 @@ class DataHandlerSingleStation(AbstractDataHandler): :return: :rtype: """ - t_var = helpers.data_sources.get_single_var_with_stat_name(self.statistics_per_var, self._target_var) + if self.sampling == "hourly": + t_var = self._target_var + else: + t_var = helpers.data_sources.get_single_var_with_stat_name(self.statistics_per_var, self._target_var) return t_var def make_samples(self): @@ -470,11 +485,13 @@ class DataHandlerSingleStation(AbstractDataHandler): :return: corrected data """ - # used_chem_vars = list(set(self.chem_vars) & set(data.coords[self.target_dim].values)) - # add "_" at end of all chemical variables to ensure differentiation between e.g. "no" and "no2" - chemical_starter = [f"{v}_" for v in self.chem_vars] - #check if variables from target_dim start with chemical variable type - used_chem_vars = [v for v in data.coords[self.target_dim].values if v.startswith(tuple(chemical_starter))] + if self.sampling == "hourly": + used_chem_vars = list(set(self.chem_vars) & set(data.coords[self.target_dim].values)) + else: + # add "_" at end of all chemical variables to ensure differentiation between e.g. "no" and "no2" + chemical_starter = [f"{v}_" for v in self.chem_vars] + #check if variables from target_dim start with chemical variable type + used_chem_vars = [v for v in data.coords[self.target_dim].values if v.startswith(tuple(chemical_starter))] if len(used_chem_vars) > 0: data.loc[..., used_chem_vars] = data.loc[..., used_chem_vars].clip(min=minimum) return data @@ -525,15 +542,28 @@ class DataHandlerSingleStation(AbstractDataHandler): res.name = index_name return res - @staticmethod - def _set_file_name(path, station, statistics_per_var): - all_vars = sorted(data_sources.get_vars_with_stat_name(statistics_per_var)) - return os.path.join(path, f"{''.join(station)}__{'__'.join(all_vars)}.nc") + + def _set_file_name(self, path, station, statistics_per_var): + file_start = self._prepare_set_file_name(path, station, statistics_per_var) + return f"{file_start}.nc" + @staticmethod - def _set_meta_file_name(path, station, statistics_per_var): - all_vars = sorted(data_sources.get_vars_with_stat_name(statistics_per_var)) - return os.path.join(path, f"{''.join(station)}__{'__'.join(all_vars)}_meta.csv") + def _prepare_set_file_name(path, station, statistics_per_var): + if path.endswith("hourly"): + all_vars = sorted(statistics_per_var.keys()) + combine_by = "_" + else: + all_vars = sorted(data_sources.get_vars_with_stat_name(statistics_per_var)) + combine_by = "__" + file_start = os.path.join(path, f"{''.join(station)}{combine_by}{f'{combine_by}'.join(all_vars)}") + return file_start + + def _set_meta_file_name(self, path, station, statistics_per_var): + # all_vars = sorted(data_sources.get_vars_with_stat_name(statistics_per_var)) + # return os.path.join(path, f"{''.join(station)}__{'__'.join(all_vars)}_meta.csv") + file_start = self._prepare_set_file_name(path, station, statistics_per_var) + return f"{file_start}_meta.csv" def interpolate(self, data, dim: str, method: str = 'linear', limit: int = None, use_coordinate: Union[bool, str] = True, sampling="daily", **kwargs): diff --git a/mlair/helpers/data_sources/toar_data_v2.py b/mlair/helpers/data_sources/toar_data_v2.py index 745d18ccab1395e006ca3b049cece5d81e949e71..0eebe1ad6f738cda765ff0f15fa2657660c3d0ff 100644 --- a/mlair/helpers/data_sources/toar_data_v2.py +++ b/mlair/helpers/data_sources/toar_data_v2.py @@ -177,8 +177,11 @@ def load_timeseries_data(timeseries_meta, url_base, opts, headers, sampling): data = pd.read_csv(StringIO(res), comment="#", index_col="datetime", parse_dates=True, infer_datetime_format=True) if len(data.index) > 0: - stat_name = [correct_stat_name(s) for s in to_list(opts.get("names", "value").split(","))] - data.columns = [f'{meta["variable"]["name"]}_{stat}' for stat in stat_name] + if sampling != "hourly": + stat_name = [correct_stat_name(s) for s in to_list(opts.get("names", "value").split(","))] + data.columns = [f'{meta["variable"]["name"]}_{stat}' for stat in stat_name] + else: + data = data[correct_stat_name(opts.get("names", "value"))].rename(meta["variable"]["name"]).to_frame() coll.append(data) return coll