diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py index eb3f78dc465247095d0114f3f41d4b8b70ba5480..3de749d02375243269f9eb51c08400840fd0656a 100644 --- a/mlair/data_handler/data_handler_mixed_sampling.py +++ b/mlair/data_handler/data_handler_mixed_sampling.py @@ -423,12 +423,18 @@ class DataHandlerMixedSamplingWithClimateAndFirFilter(DataHandlerMixedSamplingWi # combine all transformations transformation_res = {} - if len(transformation_chem) > 0: - transformation_res["filtered_chem"] = transformation_chem.pop("filtered") - transformation_res["unfiltered_chem"] = transformation_chem.pop("unfiltered") - if len(transformation_meteo) > 0: - transformation_res["filtered_meteo"] = transformation_meteo.pop("filtered") - transformation_res["unfiltered_meteo"] = transformation_meteo.pop("unfiltered") + if isinstance(transformation_chem, dict): + if len(transformation_chem) > 0: + transformation_res["filtered_chem"] = transformation_chem.pop("filtered") + transformation_res["unfiltered_chem"] = transformation_chem.pop("unfiltered") + else: # if no unfiltered chem branch + transformation_res["filtered_chem"] = transformation_chem + if isinstance(transformation_meteo, dict): + if len(transformation_meteo) > 0: + transformation_res["filtered_meteo"] = transformation_meteo.pop("filtered") + transformation_res["unfiltered_meteo"] = transformation_meteo.pop("unfiltered") + else: # if no unfiltered meteo branch + transformation_res["filtered_meteo"] = transformation_meteo return transformation_res if len(transformation_res) > 0 else None def get_X_original(self): diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py index c21d5b4126b8bc7564ef8855d7c1229c7f411df3..360ab6408345de83c21ee5d29031b3b5f131525b 100644 --- a/mlair/data_handler/data_handler_single_station.py +++ b/mlair/data_handler/data_handler_single_station.py @@ -276,6 +276,7 @@ class DataHandlerSingleStation(AbstractDataHandler): filename = os.path.join(self.lazy_path, hash + ".pickle") try: if self.overwrite_lazy_data is True: + os.remove(filename) raise FileNotFoundError with open(filename, "rb") as pickle_file: lazy_data = dill.load(pickle_file) diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py index 9a61de715bc02e57a41ab6c2b9a62de7157acf07..b25d6ee10f89bfa49c2147d1758a2d24b8e7687e 100644 --- a/mlair/helpers/filter.py +++ b/mlair/helpers/filter.py @@ -19,7 +19,7 @@ from mlair.helpers import to_list, TimeTrackingWrapper, TimeTracking class FIRFilter: from mlair.plotting.data_insight_plotting import PlotFirFilter - def __init__(self, data, fs, order, cutoff, window, var_dim, time_dim, station_name=None, minimum_length=0, offset=0, plot_path=None): + def __init__(self, data, fs, order, cutoff, window, var_dim, time_dim, station_name=None, minimum_length=None, offset=0, plot_path=None): self._filtered = [] self._h = [] self.data = data @@ -55,7 +55,7 @@ class FIRFilter: h.append(hi) # visualization - plot_data.extend(self.create_visualization(fi, input_data, plot_dates, self.time_dim, self.fs, hi, + plot_data.append(self.create_visualization(fi, input_data, plot_dates, self.time_dim, self.fs, hi, self.minimum_length, self.order, i, self.offset, self.var_dim)) # calculate residuum input_data = input_data - fi @@ -76,6 +76,7 @@ class FIRFilter: def create_visualization(self, filtered, filter_input_data, plot_dates, time_dim, sampling, h, minimum_length, order, i, offset, var_dim): # pragma: no cover plot_data = [] + minimum_length = minimum_length or 0 for viz_date in set(plot_dates).intersection(filtered.coords[time_dim].values): try: if i < len(order) - 1: @@ -187,21 +188,16 @@ class ClimateFIRFilter(FIRFilter): values (default None). Must either be a dictionary with keys available in var_dim or a single value that is applied to all data. """ - #todo add extend_length_opts - # adjust all parts of code marked as todos - # think about different behaviour when using different extend_length_opts (is this part of dh?) - self._apriori = apriori self.apriori_type = apriori_type self.apriori_diurnal = apriori_diurnal self._apriori_list = [] self.sel_opts = sel_opts - self.minimum_length = minimum_length self.new_dim = new_dim - self.plot_path = plot_path self.plot_data = [] self.extend_length_opts = extend_length_opts - super().__init__(data, fs, order, cutoff, window, var_dim, time_dim, station_name=station_name) + super().__init__(data, fs, order, cutoff, window, var_dim, time_dim, station_name=station_name, + minimum_length=minimum_length, plot_path=plot_path) def run(self): filtered = [] @@ -260,7 +256,7 @@ class ClimateFIRFilter(FIRFilter): input_data = self._shift_data(input_data, coord_range, self.time_dim, new_dim) - fi # create new apriori information for next iteration if no further apriori is provided - if len(apriori_list) <= i + 1: + if len(apriori_list) < len(self.order): logging.info(f"{self.station_name}: create diurnal_anomalies") if self.apriori_diurnal is True and sampling == "1H": diurnal_anomalies = self.create_seasonal_hourly_mean(input_data.sel({new_dim: 0}, drop=True), @@ -631,16 +627,17 @@ class ClimateFIRFilter(FIRFilter): def _trim_data_to_minimum_length(data: xr.DataArray, extend_length_history: int, dim: str, minimum_length: int = None, extend_length_opts: int = 0) -> xr.DataArray: """ - Trim data along given axis between either -minimum_length (if given) or -extend_length_history and 0. + Trim data along given axis between either -minimum_length (if given) or -extend_length_history and + extend_length_opts (which is default set to 0). :param data: data to trim :param extend_length_history: start number for trim range (transformed to negative), only used if parameter minimum_length is not provided :param dim: dim to apply trim on :param minimum_length: start number for trim range (transformed to negative), preferably used (default None) + :param extend_length_opts: number to use in "future" :returns: trimmed data """ - #todo update doc strings if minimum_length is None: return data.sel({dim: slice(-extend_length_history, extend_length_opts)}, drop=True) else: @@ -742,7 +739,7 @@ class ClimateFIRFilter(FIRFilter): # visualization plot_data.extend(self.create_visualization(filt, d, filter_input_data, plot_dates, time_dim, new_dim, sampling, extend_length_history, extend_length_future, - minimum_length, h, var, extend_length_opts)) + minimum_length, h, var, extend_opts_var)) # collect all filter results coll.append(xr.concat(filt_coll, time_dim)) @@ -750,7 +747,7 @@ class ClimateFIRFilter(FIRFilter): # concat all variables logging.debug(f"{station_name}: concat all variables") - res = xr.concat(coll, var_dim) #todo does this works with different extend_length_opts (is data trimmed or filled with nans, 2nd is target) + res = xr.concat(coll, var_dim) # create result array with same shape like input data, gaps are filled by nans res_full = self._create_full_filter_result_array(data, res, new_dim, station_name) diff --git a/mlair/helpers/join.py b/mlair/helpers/join.py index 93cb0e7b1b34d1ebc13b914ac9626fb4466a7201..67591b29a4e4bcc8b3083869825aed09ebebaf58 100644 --- a/mlair/helpers/join.py +++ b/mlair/helpers/join.py @@ -43,6 +43,9 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t # make sure station_name parameter is a list station_name = helpers.to_list(station_name) + # also ensure that given data_origin dict is no reference + data_origin = None if data_origin is None else {k: v for (k, v) in data_origin.items()} + # get data connection settings join_url_base, headers = join_settings(sampling) diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py index 096163451355cb5011dbb2cf39c48c963d51c03c..1eee96623d4fed6fcfb23fd1438a954a4aca230f 100644 --- a/mlair/plotting/data_insight_plotting.py +++ b/mlair/plotting/data_insight_plotting.py @@ -971,7 +971,7 @@ class PlotClimateFirFilter(AbstractPlotClass): # pragma: no cover """Restructure plot data.""" plot_dict = {} new_dim = None - for i, o in enumerate(range(len(data))): + for i in range(len(data)): plot_data = data[i] for p_d in plot_data: var = p_d.get("var") @@ -1187,22 +1187,23 @@ class PlotFirFilter(AbstractPlotClass): # pragma: no cover def _prepare_data(self, data): """Restructure plot data.""" plot_dict = {} - for i, o in enumerate(range(len(data))): - plot_data = data[i] - t0 = plot_data.get("t0") - filter_input = plot_data.get("filter_input") - filtered = plot_data.get("filtered") - var_dim = plot_data.get("var_dim") - time_dim = plot_data.get("time_dim") - for var in filtered.coords[var_dim].values: - plot_dict_var = plot_dict.get(var, {}) - plot_dict_t0 = plot_dict_var.get(t0, {}) - plot_dict_order = {"filter_input": filter_input.sel({var_dim: var}, drop=True), - "filtered": filtered.sel({var_dim: var}, drop=True), - "time_dim": time_dim} - plot_dict_t0[i] = plot_dict_order - plot_dict_var[t0] = plot_dict_t0 - plot_dict[var] = plot_dict_var + for i in range(len(data)): # filter component + for j in range(len(data[i])): # t0 counter + plot_data = data[i][j] + t0 = plot_data.get("t0") + filter_input = plot_data.get("filter_input") + filtered = plot_data.get("filtered") + var_dim = plot_data.get("var_dim") + time_dim = plot_data.get("time_dim") + for var in filtered.coords[var_dim].values: + plot_dict_var = plot_dict.get(var, {}) + plot_dict_t0 = plot_dict_var.get(t0, {}) + plot_dict_order = {"filter_input": filter_input.sel({var_dim: var}, drop=True), + "filtered": filtered.sel({var_dim: var}, drop=True), + "time_dim": time_dim} + plot_dict_t0[i] = plot_dict_order + plot_dict_var[t0] = plot_dict_t0 + plot_dict[var] = plot_dict_var return plot_dict def _plot(self, plot_dict):