diff --git a/toarstats/trends/interface.py b/toarstats/trends/interface.py index f4ce6637549e9d0c46357000a36fd6a3cafc19a8..3cb825fab5f5f0527a200dcbc3c8283b29f2d916 100644 --- a/toarstats/trends/interface.py +++ b/toarstats/trends/interface.py @@ -4,43 +4,70 @@ This module contains the following function: calculate_trend - calculate the trend using the requested method """ +from toarstats.metrics.input_checks import check_data from toarstats.trends.ols import ols from toarstats.trends.quant_reg import quant_reg -from toarstats.trends.utils import deseasonalize +from toarstats.trends.utils import calculate_anomalies -def calculate_trend(method, data, formula="value ~ datetime", quantiles=None): +def calculate_trend(method, data, quantiles=None): """Calculate the trend using the requested method. This function is the public interface for the ``trends`` subpackage. It takes all the user inputs and returns the result of the requested trend analysis. + The calculation follows "Guidance note on best statistical practices + for TOAR analyses" (Chang et al. 2023, + https://arxiv.org/pdf/2304.14236.pdf) Annex E. + :param method: either ``"OLS"`` or ``"quant"`` :param data: data containing a list of date time values and associated parameter values on which to calculate the trend - :param formula: the formula specifying the model :param quantiles: a single quantile or a list of quantiles to calculate, these must be between 0 and 1; only needed when ``method="quant"`` - :raises ValueError: raised if the ``method`` parameter is not - recognized + :raises TypeError: raised if + + - the ``data`` parameter is not a data frame or + series + - the index is not a datetime index + - the values are not in a one-dimensional float + or int array + :raises ValueError: raised if + + - the ``method`` parameter is not recognized + - the index is empty, has duplicates or null + values + - the values array is empty or only contains + null values + - the index and values have different lengths + - any ``quantiles`` are not strictly within 0 + and 1 with ``method="quantreg"`` - :return: The result of the fit or a list of fit results if - ``method="quant"`` and multiple quantiles are given + :return: The result of the fit or a dict of fit results if + ``method="quant"`` """ - deseasonalized_data = deseasonalize(data) - if method == "OLS": - fit = ols(deseasonalized_data, formula) - elif method == "quant": - try: - fit = [quant_reg(deseasonalized_data, formula, quantile) - for quantile in quantiles] - except TypeError: - fit = quant_reg(deseasonalized_data, formula, quantiles) - else: + if method not in {"OLS", "quant"}: raise ValueError(f"{method} is not recognized, must be 'OLS' or" " 'quant'") + data_in = check_data(data, None, None).to_frame("value") + if method == "quant": + quantile_list = ( + quantiles + if isinstance(quantiles, (list, set, tuple)) + else [quantiles] + ) + if not all(0 < quantile < 1 for quantile in quantile_list): + raise ValueError("The quantiles must be strictly between 0 and 1.") + anomalies_series = calculate_anomalies(data_in) + if method == "quant_reg": + fit = { + quantile: quant_reg(anomalies_series, quantile) + for quantile in quantile_list + } + else: + fit = ols(anomalies_series) return fit diff --git a/toarstats/trends/ols.py b/toarstats/trends/ols.py index 255741d5a974bd450a2b8be0bdbc5bca972981b7..766ca2f71b9424826b8c024a6df013b159de1a85 100644 --- a/toarstats/trends/ols.py +++ b/toarstats/trends/ols.py @@ -4,19 +4,24 @@ This module contains the following function: ols - calculate the OLS linear regression """ +import numpy as np +import scipy.stats import statsmodels.formula.api as smf +from toarstats.trends.utils import moving_block_bootstrap -def ols(data, formula): + +def ols(data): """Calculate the OLS linear regression. :param data: data containing a list of date time values and associated parameter values on which to calculate the trend - :param formula: the formula specifying the model - :return: The result of the fit + :return: The trend with its uncertainty and p value """ - model = smf.ols(formula, data) - fit = model.fit() - return fit + fit = smf.ols("value~datetime", data).fit(method="qr").params + mbb = moving_block_bootstrap("OLS", data, quantile) + fit_se = np.nanstd(mbb, axis=0) + fit_pv = 2*scipy.stats.t.sf(x=abs(fit/fit_se), df=len(data)-2) + return {"trend": fit, "uncertainty": fit_se, "p_value": fit_pv} diff --git a/toarstats/trends/quant_reg.py b/toarstats/trends/quant_reg.py index 033597761266c22a61ad45aeece6b1212e0d00d6..dd2d926d3c2911912005ad3ffabceac6577740ee 100644 --- a/toarstats/trends/quant_reg.py +++ b/toarstats/trends/quant_reg.py @@ -1,23 +1,28 @@ """Quantile regression calculation. This module contains the following function: -quant_reg - calculate the OLS linear regression +quant_reg - calculate the quantile regression """ +import numpy as np +import scipy.stats import statsmodels.formula.api as smf +from toarstats.trends.utils import moving_block_bootstrap -def quant_reg(data, formula, quantile): + +def quant_reg(data, quantile): """Calculate the quantile regression. :param data: data containing a list of date time values and associated parameter values on which to calculate the trend - :param formula: the formula specifying the model :param quantile: a single quantile, must be between 0 and 1 - :return: The result of the fit + :return: The trend with its uncertainty and p value """ - model = smf.quantreg(formula, data) - fit = model.fit(q=quantile) - return fit + fit = smf.quantreg("value~datetime", data).fit(q=quantile).params + mbb = moving_block_bootstrap("quant", data, quantile) + fit_se = np.nanstd(mbb, axis=0) + fit_pv = 2*scipy.stats.t.sf(x=abs(fit/fit_se), df=len(data)-2) + return {"trend": fit, "uncertainty": fit_se, "p_value": fit_pv} diff --git a/toarstats/trends/utils.py b/toarstats/trends/utils.py index 0a07989d0c6fadc1115b6c3c4d0407ecbe982d2d..cf97a9451b49e70418cafe4a58bb70e7601ab2eb 100644 --- a/toarstats/trends/utils.py +++ b/toarstats/trends/utils.py @@ -1,7 +1,9 @@ """This module contains helper functions for the trends subpackage. This module contains the following function: -deseasonalize - calculate the trend using the requested method +calculate_seasonal_cycle - calculate the seasonal cycle +calculate_anomalies - calculate the anomalies series +moving_block_boostrap - perform the moving block bootstrap algorithm """ import numpy as np @@ -9,21 +11,60 @@ import pandas as pd import statsmodels.formula.api as smf -def deseasonalize(data): - """Deseasonalize the data. +def calculate_seasonal_cycle(data): + """Calculate the seasonal cycle. :param data: data containing a list of date time values and associated parameter values - :return: The deseasonalized data + :return: The seasonal cycle """ - seasonal_cycle = smf.ols( - "value~np.cos(month*2*np.pi/12)+np.sin(month*2*np.pi/12)" - "+np.cos(month*4*np.pi/12)+np.sin(month*4*np.pi/12)", + return smf.ols( + "value~np.sin(2*np.pi*month/12)+np.cos(2*np.pi*month/12)" + "+np.sin(2*np.pi*month/6)+np.cos(2*np.pi*month/6)", pd.DataFrame({"value": data["value"], "month": data.index.month}) ).fit(method="qr").predict(pd.DataFrame({"month": range(1, 13)})) + + +def calculate_anomalies(data): + """Calculate the anomalies series. + + :param data: data containing a list of date time values and + associated parameter values + + :return: The anomalies series + """ + seasonal_cycle = calculate_seasonal_cycle(data) idx = data.index return pd.DataFrame({ "value": data["value"].values - seasonal_cycle[idx.month-1].values, "datetime": (idx.year-2000)*12 + idx.month - }) + }).sort_values("datetime") + + +def moving_block_boostrap(method, data, quantile, num_samples=1000): + """Perform the moving block bootstrap algorithm. + + :param method: either ``"OLS"`` or ``"quant"`` + :param data: data containing a list of date time values and + associated parameter values + :param quantile: a single quantile, must be between 0 and 1 + :param num_samples: number of sampled trends + + :return: A list of sampled trends + """ + n = len(data) + b = np.ceil(n**0.25).astype(int) + nblocks = np.ceil(n/b).astype(int) + blocks = np.array([list(range(i, i+b)) for i in range(n-b+1)]) + rng = np.random.default_rng() + samples = [] + for _ in range(num_samples): + bn = rng.choice(len(blocks), nblocks) + samp_data = data.iloc[blocks[bn].flatten()] + if method == "quant": + mod = smf.quantreg("value~datetime", samp_data).fit(q=quantile) + else: + mod = smf.ols("value~datetime", samp_data).fit(method="qr") + samples.append(mod.params) + return samples