diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py index 097c0da73ba5f4759be7e63e771e7cea8d450e10..c33da2a9337642d891918462722d30f7fd12b772 100644 --- a/mlair/data_handler/data_handler_with_filter.py +++ b/mlair/data_handler/data_handler_with_filter.py @@ -9,7 +9,7 @@ import pandas as pd import xarray as xr from typing import List, Union, Tuple, Optional from functools import partial - +import logging from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation from mlair.data_handler import DefaultDataHandler from mlair.helpers import remove_items, to_list, TimeTrackingWrapper @@ -318,6 +318,7 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation def apply_filter(self): """Apply FIR filter only on inputs.""" self.apriori = self.apriori.get(str(self)) if isinstance(self.apriori, dict) else self.apriori + logging.info(f"{self.station}: call ClimateFIRFilter") climate_filter = ClimateFIRFilter(self.input_data, self.fs, self.filter_order, self.filter_cutoff_freq, self.filter_window_type, time_dim=self.time_dim, var_dim=self.target_dim, apriori_type=self.apriori_type, apriori=self.apriori, diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py index 9f7b5a6ebcb62b87b9eb059027bab4844fa2e67a..21dfcbeebe49119d4fb503f64c2e0435f306fe18 100644 --- a/mlair/helpers/filter.py +++ b/mlair/helpers/filter.py @@ -75,23 +75,27 @@ class ClimateFIRFilter: :param apriori_diurnal: Use diurnal cycle as additional apriori information (only applicable for hourly resoluted data). The mean anomaly of each hour is added to the apriori_type information. """ + logging.info(f"{plot_name}: start init ClimateFIRFilter") self.plot_path = plot_path self.plot_name = plot_name filtered = [] h = [] sel_opts = sel_opts if isinstance(sel_opts, dict) else {time_dim: sel_opts} sampling = {1: "1d", 24: "1H"}.get(int(fs)) + logging.info(f"{plot_name}: create diurnal_anomalies") if apriori_diurnal is True and sampling == "1H": diurnal_anomalies = self.create_hourly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, as_anomaly=True) else: diurnal_anomalies = 0 + logging.info(f"{plot_name}: create monthly apriori") if apriori is None: apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim) + diurnal_anomalies apriori_list = to_list(apriori) input_data = data.__deepcopy__() for i in range(len(order)): + logging.info(f"{plot_name}: start filter for order {order[i]}") # calculate climatological filter # clim_filter: Callable = {True: self.clim_filter_vectorized, False: self.clim_filter}[vectorized] clim_filter: Callable = {True: self.clim_filter_vectorized_less_memory, False: self.clim_filter}[vectorized] @@ -99,20 +103,25 @@ class ClimateFIRFilter: apriori=apriori_list[i], sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window, var_dim=var_dim, plot_index=i, padlen_factor=padlen_factor) + + logging.info(f"{plot_name}: finished clim_filter calculation") filtered.append(fi) h.append(hi) gc.collect() # calculate residuum + logging.info(f"{plot_name}: calculate residuum") input_data = input_data - fi # create new apriori information for next iteration if no further apriori is provided if len(apriori_list) <= i + 1: + logging.info(f"{plot_name}: create diurnal_anomalies") if apriori_diurnal is True and sampling == "1H": diurnal_anomalies = self.create_hourly_mean(input_data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, as_anomaly=True) else: diurnal_anomalies = 0 + logging.info(f"{plot_name}: create monthly apriori") if apriori_type is None or apriori_type == "zeros": # zero version apriori_list.append(xr.zeros_like(apriori_list[i]) + diurnal_anomalies) elif apriori_type == "residuum_stats": # calculate monthly statistic on residuum @@ -361,6 +370,8 @@ class ClimateFIRFilter: sampling="1d", time_dim="datetime", var_dim="variables", window="hamming", plot_index=None): + logging.info(f"{data.coords['Stations'].values[0]}: extend apriori") + # calculate apriori information from data if not given and extend its range if not sufficient long enough if apriori is None: apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim) @@ -376,12 +387,16 @@ class ClimateFIRFilter: coll = [] for var in data.coords[var_dim].values: + logging.info(f"{data.coords['Stations'].values[0]} ({var}): sel data") d = data.sel({var_dim: [var]}) a = apriori.sel({var_dim: [var]}) # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length] + logging.info(f"{data.coords['Stations'].values[0]} ({var}): history") history = self._shift_data(d, range(int(-(length - 1) / 2), 1), time_dim, var_dim, new_dim) + logging.info(f"{data.coords['Stations'].values[0]} ({var}): future") future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim) + logging.info(f"{data.coords['Stations'].values[0]} ({var}): concat to filter input") filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left") # filter_input_data = history.combine_first(future) # history.sel(datetime=slice("2010-11-01", "2011-04-01"),variables="o3").plot() @@ -397,6 +412,7 @@ class ClimateFIRFilter: # filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis, # input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True, # kwargs=kwargs) + logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve") with TimeTracking(name="convolve"): slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2)) filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel({new_dim: slicer}), @@ -422,8 +438,10 @@ class ClimateFIRFilter: # select only values at tmp dimension 0 at each point in time coll.append(filt.sel({new_dim: 0}, drop=True)) + logging.info(f"{data.coords['Stations'].values[0]}: concat all variables") res = xr.concat(coll, var_dim) # create result array with same shape like input data, gabs are filled by nans + logging.info(f"{data.coords['Stations'].values[0]}: create res_full") res_full = xr.ones_like(data) * np.nan res_full.loc[res.coords] = res return res_full, h, apriori