diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py index f7ce47a1b1cf36d4e6d526696d55c7ba2aa4e5f6..6a837993fcf849a860e029d441de910d55888a1b 100644 --- a/mlair/plotting/data_insight_plotting.py +++ b/mlair/plotting/data_insight_plotting.py @@ -634,7 +634,10 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover self._plot_total(raw=True) self._plot_total(raw=False) if multiple > 1: - self._plot_difference(label_names) + self._plot_difference(label_names, plot_name_add="_last") + self._prepare_pgram(generator, pos, multiple, use_multiprocessing=use_multiprocessing, + use_last_input_value=False) + self._plot_difference(label_names, plot_name_add="_first") @staticmethod def _has_filter_dimension(g, pos): @@ -651,7 +654,7 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover return check_data.coords[filter_dim].shape[0], check_data.coords[filter_dim].values.tolist() @TimeTrackingWrapper - def _prepare_pgram(self, generator, pos, multiple=1, use_multiprocessing=False): + def _prepare_pgram(self, generator, pos, multiple=1, use_multiprocessing=False, use_last_input_value=True): """ Create periodogram data. """ @@ -665,7 +668,8 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover plot_data_raw_single = dict() plot_data_mean_single = dict() self.f_index = np.logspace(-3, 0 if self._sampling == "daily" else np.log10(24), 1000) - raw_data_single = self._prepare_pgram_parallel_gen(generator, m, pos, use_multiprocessing) + raw_data_single = self._prepare_pgram_parallel_gen(generator, m, pos, use_multiprocessing, + use_last_input_value=use_last_input_value) for var in raw_data_single.keys(): pgram_com = [] pgram_mean = 0 @@ -717,7 +721,7 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover raw_data_single[var_str] = raw_data_single[var_str] + [(f, pgram)] return raw_data_single - def _prepare_pgram_parallel_gen(self, generator, m, pos, use_multiprocessing): + def _prepare_pgram_parallel_gen(self, generator, m, pos, use_multiprocessing, use_last_input_value=True): """Implementation of data preprocessing using parallel generator element processing.""" raw_data_single = dict() res = [] @@ -725,14 +729,15 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover pool = multiprocessing.Pool( min([psutil.cpu_count(logical=False), len(generator), 16])) # use only physical cpus output = [ - pool.apply_async(f_proc_2, args=(g, m, pos, self.variables_dim, self.time_dim, self.f_index)) + pool.apply_async(f_proc_2, args=(g, m, pos, self.variables_dim, self.time_dim, self.f_index, + use_last_input_value)) for g in generator] for i, p in enumerate(output): res.append(p.get()) pool.close() else: for g in generator: - res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim, self.f_index)) + res.append(f_proc_2(g, m, pos, self.variables_dim, self.time_dim, self.f_index, use_last_input_value)) for res_dict in res: for k, v in res_dict.items(): if k not in raw_data_single.keys(): @@ -818,8 +823,8 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover pdf_pages.close() plt.close('all') - def _plot_difference(self, label_names): - plot_name = f"{self.plot_name}_{self._sampling}_{self._add_text}_filter.pdf" + def _plot_difference(self, label_names, plot_name_add = ""): + plot_name = f"{self.plot_name}_{self._sampling}_{self._add_text}_filter{plot_name_add}.pdf" plot_path = os.path.join(os.path.abspath(self.plot_folder), plot_name) logging.info(f"... plotting {plot_name}") pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path) @@ -848,19 +853,19 @@ class PlotPeriodogram(AbstractPlotClass): # pragma: no cover plt.close('all') -def f_proc(var, d_var, f_index, time_dim="datetime"): # pragma: no cover +def f_proc(var, d_var, f_index, time_dim="datetime", use_last_value=True): # pragma: no cover var_str = str(var) t = (d_var[time_dim] - d_var[time_dim][0]).astype("timedelta64[h]").values / np.timedelta64(1, "D") if len(d_var.shape) > 1: # use only max value if dimensions are remaining (e.g. max(window) -> latest value) to_remove = remove_items(d_var.coords.dims, time_dim) for e in to_list(to_remove): - d_var = d_var.sel({e: d_var[e].max()}) + d_var = d_var.sel({e: d_var[e].max() if use_last_value is True else d_var[e].min()}) pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").power(f_index) # f, pgram = LombScargle(t, d_var.values.flatten(), nterms=1, normalization="psd").autopower() return var_str, f_index, pgram -def f_proc_2(g, m, pos, variables_dim, time_dim, f_index): # pragma: no cover +def f_proc_2(g, m, pos, variables_dim, time_dim, f_index, use_last_value): # pragma: no cover raw_data_single = dict() if hasattr(g.id_class, "lazy"): g.id_class.load_lazy() if g.id_class.lazy is True else None @@ -873,7 +878,7 @@ def f_proc_2(g, m, pos, variables_dim, time_dim, f_index): # pragma: no cover d = d[pos] if isinstance(d, tuple) else d for var in d[variables_dim].values: d_var = d.loc[{variables_dim: var}].squeeze().dropna(time_dim) - var_str, f, pgram = f_proc(var, d_var, f_index) + var_str, f, pgram = f_proc(var, d_var, f_index, use_last_value=use_last_value) raw_data_single[var_str] = [(f, pgram)] if hasattr(g.id_class, "lazy"): g.id_class.clean_up() if g.id_class.lazy is True else None