diff --git a/mlair/data_handler/data_handler_mixed_sampling.py b/mlair/data_handler/data_handler_mixed_sampling.py index 03f10eb85ec81648585b9bc6b830ad71cc7828a7..00408684379c31dc6e7d3c18cf8c8bcf7bf52778 100644 --- a/mlair/data_handler/data_handler_mixed_sampling.py +++ b/mlair/data_handler/data_handler_mixed_sampling.py @@ -150,7 +150,7 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi self.station_type, self.network, self.store_data_locally, self.data_origin, start, end) data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method[ind], - limit=self.interpolation_limit[ind]) + limit=self.interpolation_limit[ind], sampling=self.sampling[ind]) return data def _extract_lazy(self, lazy_data): diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py index 25822762e6f0bc682ee78374be79bcdf86345a07..4330efd9ee5d3ae8a64c6eb9b95a0c58e18b3c36 100644 --- a/mlair/data_handler/data_handler_single_station.py +++ b/mlair/data_handler/data_handler_single_station.py @@ -280,7 +280,7 @@ class DataHandlerSingleStation(AbstractDataHandler): self.station_type, self.network, self.store_data_locally, self.data_origin, self.start, self.end) self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method, - limit=self.interpolation_limit) + limit=self.interpolation_limit, sampling=self.sampling) self.set_inputs_and_targets() def set_inputs_and_targets(self): @@ -469,9 +469,8 @@ class DataHandlerSingleStation(AbstractDataHandler): all_vars = sorted(statistics_per_var.keys()) return os.path.join(path, f"{''.join(station)}_{'_'.join(all_vars)}_meta.csv") - @staticmethod - def interpolate(data, dim: str, method: str = 'linear', limit: int = None, use_coordinate: Union[bool, str] = True, - **kwargs): + def interpolate(self, data, dim: str, method: str = 'linear', limit: int = None, + use_coordinate: Union[bool, str] = True, sampling="daily", **kwargs): """ Interpolate values according to different methods. @@ -508,8 +507,22 @@ class DataHandlerSingleStation(AbstractDataHandler): :return: xarray.DataArray """ + data = self.create_full_time_dim(data, dim, sampling) return data.interpolate_na(dim=dim, method=method, limit=limit, use_coordinate=use_coordinate, **kwargs) + @staticmethod + def create_full_time_dim(data, dim, sampling): + """Ensure time dimension to be equidistant. Sometimes dates if missing values have been dropped.""" + start = data.coords[dim].values[0] + end = data.coords[dim].values[-1] + freq = {"daily": "1D", "hourly": "1H"}.get(sampling) + datetime_index = pd.DataFrame(index=pd.date_range(start, end, freq=freq)) + t = data.sel({dim: start}, drop=True) + res = xr.DataArray(coords=[datetime_index.index, *[t.coords[c] for c in t.coords]], dims=[dim, *t.coords]) + res = res.transpose(*data.dims) + res.loc[data.coords] = data + return res + def make_history_window(self, dim_name_of_inputs: str, window: int, dim_name_of_shift: str) -> None: """ Create a xr.DataArray containing history data. diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py index 4a6a5044d35d82eaea5a5b7e454771838375b3c5..b6c27cbcb781e4593a8533ab2ac72928fecadb67 100644 --- a/mlair/helpers/filter.py +++ b/mlair/helpers/filter.py @@ -92,6 +92,7 @@ class ClimateFIRFilter: if apriori is None: apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim) + diurnal_anomalies + logging.info(f"{plot_name}: apriori shape = {apriori.shape}") apriori_list = to_list(apriori) input_data = data.__deepcopy__() for i in range(len(order)): @@ -137,7 +138,7 @@ class ClimateFIRFilter: self._apriori = apriori_list @staticmethod - def create_unity_array(data, time_dim, extend_range=365): + def create_unity_array(data, time_dim, extend_range=366): """Create a xr data array filled with ones. time_dim is extended by extend_range days in future and past.""" coords = data.coords @@ -261,7 +262,7 @@ class ClimateFIRFilter: end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) + np.timedelta64( extend_range * factor + 1, td_type) - new_time_axis = np.arange(start, end).astype("datetime64[ns]") + new_time_axis = np.arange(start, end).astype("datetime64[ns]") # hint: arange does not include end date logging.info(f"{data.coords['Stations'].values[0]}: shape of new_time_axis = {new_time_axis.shape}") logging.info(f"{data.coords['Stations'].values[0]}: start of new_time_axis = {start}") logging.info(f"{data.coords['Stations'].values[0]}: end of new_time_axis = {end}") @@ -273,14 +274,17 @@ class ClimateFIRFilter: # start = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) - np.timedelta64( # extend_range * factor, td_type) end = coords[time_dim][-1].values.astype("datetime64[%s]" % td_type) - new_values = apriori.sel({time_dim: slice(start, end)}) + new_values = apriori.sel({time_dim: slice(start, end)}) # hint: slice includes end date logging.info(f"{data.coords['Stations'].values[0]}: shape of new_values = {new_values.shape}") logging.info(f"{data.coords['Stations'].values[0]}: start of new_values = {start}") logging.info(f"{data.coords['Stations'].values[0]}: end of new_values = {end}") logging.info(f"{data.coords['Stations'].values[0]}: delta of new_values = {end - start}") + + logging.info(f"{data.coords['Stations'].values[0]}: set new_time_axis") new_values.coords[time_dim] = new_time_axis # add new values to apriori + logging.info(f"{data.coords['Stations'].values[0]}: add to apriori") apriori = apriori.combine_first(new_values) return apriori