diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py index d7c7b8e684be5d6b8bee14a984d4c842f8828ded..3ac086791c7fd3f2580a7135ff5c29b9350f847f 100644 --- a/mlair/helpers/filter.py +++ b/mlair/helpers/filter.py @@ -144,6 +144,9 @@ class ClimateFIRFilter(FIRFilter): :param apriori_diurnal: Use diurnal cycle as additional apriori information (only applicable for hourly resoluted data). The mean anomaly of each hour is added to the apriori_type information. """ + #todo add extend_length_opts + # adjust all parts of code marked as todos + # think about different behaviour when using different extend_length_opts (is this part of dh?) self._apriori = apriori self.apriori_type = apriori_type @@ -172,7 +175,7 @@ class ClimateFIRFilter(FIRFilter): logging.debug(f"{self.plot_name}: create monthly apriori") if self._apriori is None: self._apriori = self.create_monthly_mean(self.data, self.time_dim, sel_opts=self.sel_opts, - sampling=sampling) + diurnal_anomalies + sampling=sampling) + diurnal_anomalies logging.debug(f"{self.plot_name}: apriori shape = {self._apriori.shape}") apriori_list = to_list(self._apriori) input_data = self.data.__deepcopy__() @@ -193,13 +196,13 @@ class ClimateFIRFilter(FIRFilter): sampling=sampling, time_dim=self.time_dim, window=self.window, var_dim=self.var_dim, minimum_length=_minimum_length, new_dim=new_dim, - plot_dates=plot_dates, station_name=self.station_name) + plot_dates=plot_dates, station_name=self.station_name) #todo add extend_length_opts here logging.info(f"{self.plot_name}: finished clim_filter calculation") if self.minimum_length is None: filtered.append(fi) else: - filtered.append(fi.sel({new_dim: slice(-self.minimum_length, 0)})) + filtered.append(fi.sel({new_dim: slice(-self.minimum_length, 0)})) # todo adjust to extend_length_opts (how does it work with different lengths) h.append(hi) gc.collect() self.plot_data.append(plot_data) @@ -208,7 +211,7 @@ class ClimateFIRFilter(FIRFilter): # calculate residuum logging.info(f"{self.plot_name}: calculate residuum") coord_range = range(fi.coords[new_dim].values.min(), fi.coords[new_dim].values.max() + 1) - if new_dim in input_data.coords: + if new_dim in input_data.coords: #todo does it work for adding nans and values (should result in nans) input_data = input_data.sel({new_dim: coord_range}) - fi else: input_data = self._shift_data(input_data, coord_range, self.time_dim, new_dim) - fi @@ -236,7 +239,7 @@ class ClimateFIRFilter(FIRFilter): if self.minimum_length is None: filtered.append(input_data) else: - filtered.append(input_data.sel({new_dim: slice(-self.minimum_length, 0)})) + filtered.append(input_data.sel({new_dim: slice(-self.minimum_length, 0)})) #todo include extend_length_opts (how about different lengths again self._filtered = filtered self._h = h @@ -472,10 +475,13 @@ class ClimateFIRFilter(FIRFilter): return apriori def combine_observation_and_apriori(self, data: xr.DataArray, apriori: xr.DataArray, time_dim: str, new_dim: str, - extend_length_history: int, extend_length_future: int) -> xr.DataArray: + extend_length_history: int, extend_length_future: int, + extend_length_separator: int = 0) -> xr.DataArray: """ Combine historical data / observations ("data") and climatological statistics ("apriori"). Historical data are - used on interval [t0 - extend_length_history, t0] and apriori is used on [t0 + 1, t0 + extend_length_future]. + used on interval [t0 - extend_length_history, t0] and apriori is used on [t0 + 1, t0 + extend_length_future]. If + indicated by the extend_length_seperator, it is possible to shift end of history interval and start of apriori + interval by given number of time steps. :param data: historical data for past values, must contain dimensions time_dim and var_dim and might also have a new_dim dimension @@ -485,22 +491,34 @@ class ClimateFIRFilter(FIRFilter): :param new_dim: name of new dim on which data is combined along :param extend_length_history: number of time steps to use from data :param extend_length_future: number of time steps to use from apriori (minus 1) + :param extend_length_separator: position of last history value to use (default 0), this position indicates the + last value that is used from data (followed by values from apriori). In other words, end of history + interval and start of apriori interval are shifted by this value from t0 (positive or negative). :returns: combined data array """ - # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length] + # check if shift indicated by extend_length_seperator is inside the outer interval limits + assert (extend_length_separator > -extend_length_history) and (extend_length_separator < extend_length_future) + + # prepare historical data / observation if new_dim not in data.coords: - history = self._shift_data(data, range(int(-extend_length_history), 1), time_dim, new_dim) + history = self._shift_data(data, range(int(-extend_length_history), extend_length_seperator + 1), + time_dim, new_dim) else: - history = data.sel({new_dim: slice(int(-extend_length_history), 0)}) + history = data.sel({new_dim: slice(int(-extend_length_history), extend_length_seperator)}) + # prepare climatological statistics if new_dim not in apriori.coords: - future = self._shift_data(apriori, range(1, extend_length_future), time_dim, new_dim) + future = self._shift_data(apriori, range(extend_length_seperator + 1, extend_length_future), + time_dim, new_dim) else: - future = apriori.sel({new_dim: slice(1, extend_length_future)}) + future = apriori.sel({new_dim: slice(extend_length_seperator + 1, extend_length_future)}) + + # combine historical data [t0-length,t0+sep] and climatological statistics [t0+sep+1,t0+length] filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left") return filter_input_data - def create_visualization(self, filtered, data, filter_input_data, plot_dates, time_dim, new_dim, sampling, extend_length_history, - extend_length_future, minimum_length, h, variable_name): # pragma: no cover + def create_visualization(self, filtered, data, filter_input_data, plot_dates, time_dim, new_dim, sampling, + extend_length_history, extend_length_future, minimum_length, h, + variable_name): # pragma: no cover plot_data = [] for viz_date in set(plot_dates).intersection(filtered.coords[time_dim].values): try: @@ -566,7 +584,7 @@ class ClimateFIRFilter(FIRFilter): @staticmethod def _trim_data_to_minimum_length(data: xr.DataArray, extend_length_history: int, dim: str, - minimum_length: int = None) -> xr.DataArray: + minimum_length: int = None, extend_length_opts: int = 0) -> xr.DataArray: """ Trim data along given axis between either -minimum_length (if given) or -extend_length_history and 0. @@ -577,10 +595,11 @@ class ClimateFIRFilter(FIRFilter): :param minimum_length: start number for trim range (transformed to negative), preferably used (default None) :returns: trimmed data """ + #todo update doc strings if minimum_length is None: - return data.sel({dim: slice(-extend_length_history, 0)}, drop=True) + return data.sel({dim: slice(-extend_length_history, extend_length_opts)}, drop=True) else: - return data.sel({dim: slice(-minimum_length, 0)}, drop=True) + return data.sel({dim: slice(-minimum_length, extend_length_opts)}, drop=True) @staticmethod def _create_full_filter_result_array(template_array: xr.DataArray, result_array: xr.DataArray, new_dim: str, @@ -604,9 +623,11 @@ class ClimateFIRFilter(FIRFilter): @TimeTrackingWrapper def clim_filter(self, data, fs, cutoff_high, order, apriori=None, sel_opts=None, sampling="1d", time_dim="datetime", var_dim="variables", window: Union[str, Tuple] = "hamming", - minimum_length=None, new_dim="window", plot_dates=None, station_name=None): + minimum_length=None, new_dim="window", plot_dates=None, station_name=None, + extend_length_opts: dict = None): logging.debug(f"{station_name}: extend apriori") + extend_length_opts = extend_length_opts or {} # calculate apriori information from data if not given and extend its range if not sufficient long enough if apriori is None: @@ -635,6 +656,7 @@ class ClimateFIRFilter(FIRFilter): logging.info(f"{station_name} ({var}): sel data") _start, _end = self._get_year_interval(data, time_dim) + extend_length_opts_var = extend_length_opts.get(var, 0) filt_coll = [] for _year in range(_start, _end + 1): logging.debug(f"{station_name} ({var}): year={_year}") @@ -648,7 +670,8 @@ class ClimateFIRFilter(FIRFilter): # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length] filter_input_data = self.combine_observation_and_apriori(d, a, time_dim, new_dim, extend_length_history, - extend_length_future) + extend_length_future, + extend_length_separator=extend_length_opts_var) # select only data for current year try: @@ -666,13 +689,14 @@ class ClimateFIRFilter(FIRFilter): vectorize=True, kwargs={"h": h}, output_dtypes=[d.dtype]) # trim data if required - trimmed = self._trim_data_to_minimum_length(filt, extend_length_history, new_dim, minimum_length) + trimmed = self._trim_data_to_minimum_length(filt, extend_length_history, new_dim, minimum_length, + extend_length_opts=extend_length_opts_var) filt_coll.append(trimmed) # visualization plot_data.extend(self.create_visualization(filt, d, filter_input_data, plot_dates, time_dim, new_dim, sampling, extend_length_history, extend_length_future, - minimum_length, h, var)) + minimum_length, h, var)) # todo check if this still works with extend_length_opts # collect all filter results coll.append(xr.concat(filt_coll, time_dim)) @@ -680,10 +704,10 @@ class ClimateFIRFilter(FIRFilter): # concat all variables logging.debug(f"{station_name}: concat all variables") - res = xr.concat(coll, var_dim) + res = xr.concat(coll, var_dim) #todo does this works with different extend_length_opts (is data trimmed or filled with nans, 2nd is target) # create result array with same shape like input data, gaps are filled by nans - res_full = self._create_full_filter_result_array(data, res, new_dim, station_name) + res_full = self._create_full_filter_result_array(data, res, new_dim, station_name) #todo does it still works with extend_length_opts return res_full, h, apriori, plot_data @staticmethod