From 07c8bcf5d03412d5c4d0a76c9139c3ca8574cdb4 Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz-juelich.de> Date: Fri, 30 Oct 2020 10:14:56 +0100 Subject: [PATCH] added kz filter to statistics --- mlair/helpers/statistics.py | 166 ++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py index 056f92be..f6206e98 100644 --- a/mlair/helpers/statistics.py +++ b/mlair/helpers/statistics.py @@ -9,6 +9,9 @@ import numpy as np import xarray as xr import pandas as pd from typing import Union, Tuple, Dict +from matplotlib import pyplot as plt + +from mlair.helpers import to_list Data = Union[xr.DataArray, pd.DataFrame] @@ -345,3 +348,166 @@ class SkillScores: monthly_mean[monthly_mean.index.dt.month == month, :] = mu[mu.month == month].values return monthly_mean + + +class KolmogorovZurbenkoBaseClass: + + def __init__(self, df, wl, itr, is_child=False, filter_dim="window"): + """ + It create the variables associate with the Kolmogorov-Zurbenko-filter. + + Args: + df(pd.DataFrame): time series of a variable + wl(list of int): window length + itr(list of int): number of iteration + """ + self.df = df + self.filter_dim = filter_dim + self.wl = to_list(wl) + self.itr = to_list(itr) + if abs(len(self.wl) - len(self.itr)) > 0: + raise ValueError("Length of lists for wl and itr must agree!") + self._isChild = is_child + self.child = self.set_child() + self.type = type(self).__name__ + + def set_child(self): + if len(self.wl) > 1: + return KolmogorovZurbenkoBaseClass(self.df, self.wl[1:], self.itr[1:], True, self.filter_dim) + else: + return None + + def kz_filter(self, m, k): + pass + + def spectral_calc(self): + kz = self.kz_filter(self.wl[0], self.itr[0]) + if self.child is None: + if not self._isChild: + return [self.subtract(self.df, kz), kz] + else: + return [kz, kz] + else: + if not self._isChild: + kz_next = self.child.spectral_calc() + return [self.subtract(self.df, kz), self.subtract(kz, kz_next[0])] + kz_next[1:] + else: + kz_next = self.child.spectral_calc() + return [kz, self.subtract(kz, kz_next[0])] + kz_next[1:] + + @staticmethod + def subtract(minuend, subtrahend): + try: # pandas implementation + return minuend.sub(subtrahend, axis=0) + except AttributeError: # general implementation + return minuend - subtrahend + + def run(self): + return self.spectral_calc() + + def transfer_function(self): + m = self.wl[0] + k = self.itr[0] + omega = np.linspace(0.00001, 0.15, 5000) + return omega, (np.sin(m * np.pi * omega) / (m * np.sin(np.pi * omega))) ** (2 * k) + + def omega_null(self, alpha=0.5): + a = np.sqrt(6) / np.pi + b = 1 / (2 * self.itr[0]) + c = 1 - alpha ** b + d = self.wl[0] ** 2 - alpha ** b + return a * np.sqrt(c / d) + + def period_null(self, alpha=0.5): + return 1. / self.omega_null(alpha) + + def period_null_days(self, alpha=0.5): + return self.period_null(alpha) / 24. + + def plot_transfer_function(self, fig=None, name=None): + if fig is None: + fig = plt.figure() + omega, transfer_function = self.transfer_function() + if self.child is not None: + transfer_function_child = self.child.plot_transfer_function(fig) + else: + transfer_function_child = transfer_function * 0 + plt.semilogx(omega, transfer_function - transfer_function_child, + label="m={:3.0f}, k={:3.0f}, T={:6.2f}d".format(self.wl[0], + self.itr[0], + self.period_null_days())) + plt.axvline(x=self.omega_null()) + if not self._isChild: + locs, labels = plt.xticks() + plt.xticks(locs, np.round(1. / (locs * 24), 1)) + plt.xlim([0.00001, 0.15]) + plt.legend() + if name is None: + plt.show() + else: + plt.savefig(name) + else: + return transfer_function + + +class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass): + + def __init__(self, df, wl: Union[list, int], itr: Union[list, int], is_child=False, filter_dim="window", + method="mean", percentile=0.5): + """ + It create the variables associate with the KolmogorovZurbenkoFilterMovingWindow class. + + Args: + df(pd.DataFrame): time series of a variable + wl: window length + itr: number of iteration + """ + self.valid_methods = ["mean", "percentile", "median", "max", "min"] + if method not in self.valid_methods: + raise ValueError("Method '{}' is not supported. Please select from [{}].".format( + method, ", ".join(self.valid_methods))) + else: + self.method = method + if percentile > 1 or percentile < 0: + raise ValueError("Percentile must be in range [0, 1]. Given was {}!".format(percentile)) + else: + self.percentile = percentile + super().__init__(df, wl, itr, is_child, filter_dim) + + def set_child(self): + if len(self.wl) > 1: + return KolmogorovZurbenkoFilterMovingWindow(self.df, self.wl[1:], self.itr[1:], is_child=True, + filter_dim=self.filter_dim, method=self.method, + percentile=self.percentile) + else: + return None + + def kz_filter(self, wl, itr): + """ + It passes the low frequency time series. + + Args: + wl(int): a window length + itr(int): a number of iteration + """ + df_itr = self.df.__deepcopy__() + try: + kwargs = {"min_periods": 1, + "center": True, + self.filter_dim: wl} + for _ in np.arange(0, itr): + rolling = df_itr.rolling(**kwargs) + if self.method == "median": + df_mv_avg_tmp = rolling.median() + elif self.method == "percentile": + df_mv_avg_tmp = rolling.quantile(self.percentile) + elif self.method == "max": + df_mv_avg_tmp = rolling.max() + elif self.method == "min": + df_mv_avg_tmp = rolling.min() + else: + df_mv_avg_tmp = rolling.mean() + df_itr = df_mv_avg_tmp + return df_itr + except ValueError: + raise ValueError -- GitLab