From 754e61f0ad927f37bf54c81d8398aff67e528092 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Tue, 18 May 2021 16:40:19 +0200
Subject: [PATCH] added many loggings to find HPC error

---
 mlair/data_handler/data_handler_with_filter.py |  3 ++-
 mlair/helpers/filter.py                        | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/mlair/data_handler/data_handler_with_filter.py b/mlair/data_handler/data_handler_with_filter.py
index 097c0da7..c33da2a9 100644
--- a/mlair/data_handler/data_handler_with_filter.py
+++ b/mlair/data_handler/data_handler_with_filter.py
@@ -9,7 +9,7 @@ import pandas as pd
 import xarray as xr
 from typing import List, Union, Tuple, Optional
 from functools import partial
-
+import logging
 from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
 from mlair.data_handler import DefaultDataHandler
 from mlair.helpers import remove_items, to_list, TimeTrackingWrapper
@@ -318,6 +318,7 @@ class DataHandlerClimateFirFilterSingleStation(DataHandlerFirFilterSingleStation
     def apply_filter(self):
         """Apply FIR filter only on inputs."""
         self.apriori = self.apriori.get(str(self)) if isinstance(self.apriori, dict) else self.apriori
+        logging.info(f"{self.station}: call ClimateFIRFilter")
         climate_filter = ClimateFIRFilter(self.input_data, self.fs, self.filter_order, self.filter_cutoff_freq,
                                           self.filter_window_type, time_dim=self.time_dim, var_dim=self.target_dim,
                                           apriori_type=self.apriori_type, apriori=self.apriori,
diff --git a/mlair/helpers/filter.py b/mlair/helpers/filter.py
index 9f7b5a6e..21dfcbee 100644
--- a/mlair/helpers/filter.py
+++ b/mlair/helpers/filter.py
@@ -75,23 +75,27 @@ class ClimateFIRFilter:
         :param apriori_diurnal: Use diurnal cycle as additional apriori information (only applicable for hourly
             resoluted data). The mean anomaly of each hour is added to the apriori_type information.
         """
+        logging.info(f"{plot_name}: start init ClimateFIRFilter")
         self.plot_path = plot_path
         self.plot_name = plot_name
         filtered = []
         h = []
         sel_opts = sel_opts if isinstance(sel_opts, dict) else {time_dim: sel_opts}
         sampling = {1: "1d", 24: "1H"}.get(int(fs))
+        logging.info(f"{plot_name}: create diurnal_anomalies")
         if apriori_diurnal is True and sampling == "1H":
             diurnal_anomalies = self.create_hourly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim,
                                                         as_anomaly=True)
         else:
             diurnal_anomalies = 0
+        logging.info(f"{plot_name}: create monthly apriori")
         if apriori is None:
             apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling,
                                                time_dim=time_dim) + diurnal_anomalies
         apriori_list = to_list(apriori)
         input_data = data.__deepcopy__()
         for i in range(len(order)):
+            logging.info(f"{plot_name}: start filter for order {order[i]}")
             # calculate climatological filter
             # clim_filter: Callable = {True: self.clim_filter_vectorized, False: self.clim_filter}[vectorized]
             clim_filter: Callable = {True: self.clim_filter_vectorized_less_memory, False: self.clim_filter}[vectorized]
@@ -99,20 +103,25 @@ class ClimateFIRFilter:
                                           apriori=apriori_list[i],
                                           sel_opts=sel_opts, sampling=sampling, time_dim=time_dim, window=window,
                                           var_dim=var_dim, plot_index=i, padlen_factor=padlen_factor)
+
+            logging.info(f"{plot_name}: finished clim_filter calculation")
             filtered.append(fi)
             h.append(hi)
             gc.collect()
 
             # calculate residuum
+            logging.info(f"{plot_name}: calculate residuum")
             input_data = input_data - fi
 
             # create new apriori information for next iteration if no further apriori is provided
             if len(apriori_list) <= i + 1:
+                logging.info(f"{plot_name}: create diurnal_anomalies")
                 if apriori_diurnal is True and sampling == "1H":
                     diurnal_anomalies = self.create_hourly_mean(input_data, sel_opts=sel_opts, sampling=sampling,
                                                                 time_dim=time_dim, as_anomaly=True)
                 else:
                     diurnal_anomalies = 0
+                logging.info(f"{plot_name}: create monthly apriori")
                 if apriori_type is None or apriori_type == "zeros":  # zero version
                     apriori_list.append(xr.zeros_like(apriori_list[i]) + diurnal_anomalies)
                 elif apriori_type == "residuum_stats":  # calculate monthly statistic on residuum
@@ -361,6 +370,8 @@ class ClimateFIRFilter:
                                            sampling="1d", time_dim="datetime", var_dim="variables", window="hamming",
                                            plot_index=None):
 
+        logging.info(f"{data.coords['Stations'].values[0]}: extend apriori")
+
         # calculate apriori information from data if not given and extend its range if not sufficient long enough
         if apriori is None:
             apriori = self.create_monthly_mean(data, sel_opts=sel_opts, sampling=sampling, time_dim=time_dim)
@@ -376,12 +387,16 @@ class ClimateFIRFilter:
         coll = []
 
         for var in data.coords[var_dim].values:
+            logging.info(f"{data.coords['Stations'].values[0]} ({var}): sel data")
             d = data.sel({var_dim: [var]})
             a = apriori.sel({var_dim: [var]})
 
             # combine historical data / observation [t0-length,t0] and climatological statistics [t0+1,t0+length]
+            logging.info(f"{data.coords['Stations'].values[0]} ({var}): history")
             history = self._shift_data(d, range(int(-(length - 1) / 2), 1), time_dim, var_dim, new_dim)
+            logging.info(f"{data.coords['Stations'].values[0]} ({var}): future")
             future = self._shift_data(a, range(1, int((length - 1) / 2) + 1), time_dim, var_dim, new_dim)
+            logging.info(f"{data.coords['Stations'].values[0]} ({var}): concat to filter input")
             filter_input_data = xr.concat([history.dropna(time_dim), future], dim=new_dim, join="left")
             # filter_input_data = history.combine_first(future)
             # history.sel(datetime=slice("2010-11-01", "2011-04-01"),variables="o3").plot()
@@ -397,6 +412,7 @@ class ClimateFIRFilter:
             #     filt = xr.apply_ufunc(fir_filter_vectorized, filter_input_data, time_axis,
             #                           input_core_dims=[[new_dim], []], output_core_dims=[[new_dim]], vectorize=True,
             #                           kwargs=kwargs)
+            logging.info(f"{data.coords['Stations'].values[0]} ({var}): start filter convolve")
             with TimeTracking(name="convolve"):
                 slicer = slice(int(-(length - 1) / 2), int((length - 1) / 2))
                 filt = xr.apply_ufunc(fir_filter_convolve_vectorized, filter_input_data.sel({new_dim: slicer}),
@@ -422,8 +438,10 @@ class ClimateFIRFilter:
             # select only values at tmp dimension 0 at each point in time
             coll.append(filt.sel({new_dim: 0}, drop=True))
 
+        logging.info(f"{data.coords['Stations'].values[0]}: concat all variables")
         res = xr.concat(coll, var_dim)
         # create result array with same shape like input data, gabs are filled by nans
+        logging.info(f"{data.coords['Stations'].values[0]}: create res_full")
         res_full = xr.ones_like(data) * np.nan
         res_full.loc[res.coords] = res
         return res_full, h, apriori
-- 
GitLab