Skip to content
Snippets Groups Projects
Commit d427f3ae authored by Niklas Selke's avatar Niklas Selke
Browse files

Added moving block bootstrap algorithm to the trend calculation.

parent 22b4373e
Branches
Tags
2 merge requests!9Develop,!6Niklas issue009 feat calculate quantile regression
...@@ -4,43 +4,70 @@ This module contains the following function: ...@@ -4,43 +4,70 @@ This module contains the following function:
calculate_trend - calculate the trend using the requested method calculate_trend - calculate the trend using the requested method
""" """
from toarstats.metrics.input_checks import check_data
from toarstats.trends.ols import ols from toarstats.trends.ols import ols
from toarstats.trends.quant_reg import quant_reg from toarstats.trends.quant_reg import quant_reg
from toarstats.trends.utils import deseasonalize from toarstats.trends.utils import calculate_anomalies
def calculate_trend(method, data, formula="value ~ datetime", quantiles=None): def calculate_trend(method, data, quantiles=None):
"""Calculate the trend using the requested method. """Calculate the trend using the requested method.
This function is the public interface for the ``trends`` subpackage. This function is the public interface for the ``trends`` subpackage.
It takes all the user inputs and returns the result of the requested It takes all the user inputs and returns the result of the requested
trend analysis. trend analysis.
The calculation follows "Guidance note on best statistical practices
for TOAR analyses" (Chang et al. 2023,
https://arxiv.org/pdf/2304.14236.pdf) Annex E.
:param method: either ``"OLS"`` or ``"quant"`` :param method: either ``"OLS"`` or ``"quant"``
:param data: data containing a list of date time values and :param data: data containing a list of date time values and
associated parameter values on which to calculate the associated parameter values on which to calculate the
trend trend
:param formula: the formula specifying the model
:param quantiles: a single quantile or a list of quantiles to :param quantiles: a single quantile or a list of quantiles to
calculate, these must be between 0 and 1; only calculate, these must be between 0 and 1; only
needed when ``method="quant"`` needed when ``method="quant"``
:raises ValueError: raised if the ``method`` parameter is not :raises TypeError: raised if
recognized
- the ``data`` parameter is not a data frame or
series
- the index is not a datetime index
- the values are not in a one-dimensional float
or int array
:raises ValueError: raised if
- the ``method`` parameter is not recognized
- the index is empty, has duplicates or null
values
- the values array is empty or only contains
null values
- the index and values have different lengths
- any ``quantiles`` are not strictly within 0
and 1 with ``method="quantreg"``
:return: The result of the fit or a list of fit results if :return: The result of the fit or a dict of fit results if
``method="quant"`` and multiple quantiles are given ``method="quant"``
""" """
deseasonalized_data = deseasonalize(data) if method not in {"OLS", "quant"}:
if method == "OLS":
fit = ols(deseasonalized_data, formula)
elif method == "quant":
try:
fit = [quant_reg(deseasonalized_data, formula, quantile)
for quantile in quantiles]
except TypeError:
fit = quant_reg(deseasonalized_data, formula, quantiles)
else:
raise ValueError(f"{method} is not recognized, must be 'OLS' or" raise ValueError(f"{method} is not recognized, must be 'OLS' or"
" 'quant'") " 'quant'")
data_in = check_data(data, None, None).to_frame("value")
if method == "quant":
quantile_list = (
quantiles
if isinstance(quantiles, (list, set, tuple))
else [quantiles]
)
if not all(0 < quantile < 1 for quantile in quantile_list):
raise ValueError("The quantiles must be strictly between 0 and 1.")
anomalies_series = calculate_anomalies(data_in)
if method == "quant_reg":
fit = {
quantile: quant_reg(anomalies_series, quantile)
for quantile in quantile_list
}
else:
fit = ols(anomalies_series)
return fit return fit
...@@ -4,19 +4,24 @@ This module contains the following function: ...@@ -4,19 +4,24 @@ This module contains the following function:
ols - calculate the OLS linear regression ols - calculate the OLS linear regression
""" """
import numpy as np
import scipy.stats
import statsmodels.formula.api as smf import statsmodels.formula.api as smf
from toarstats.trends.utils import moving_block_bootstrap
def ols(data, formula):
def ols(data):
"""Calculate the OLS linear regression. """Calculate the OLS linear regression.
:param data: data containing a list of date time values and :param data: data containing a list of date time values and
associated parameter values on which to calculate the associated parameter values on which to calculate the
trend trend
:param formula: the formula specifying the model
:return: The result of the fit :return: The trend with its uncertainty and p value
""" """
model = smf.ols(formula, data) fit = smf.ols("value~datetime", data).fit(method="qr").params
fit = model.fit() mbb = moving_block_bootstrap("OLS", data, quantile)
return fit fit_se = np.nanstd(mbb, axis=0)
fit_pv = 2*scipy.stats.t.sf(x=abs(fit/fit_se), df=len(data)-2)
return {"trend": fit, "uncertainty": fit_se, "p_value": fit_pv}
"""Quantile regression calculation. """Quantile regression calculation.
This module contains the following function: This module contains the following function:
quant_reg - calculate the OLS linear regression quant_reg - calculate the quantile regression
""" """
import numpy as np
import scipy.stats
import statsmodels.formula.api as smf import statsmodels.formula.api as smf
from toarstats.trends.utils import moving_block_bootstrap
def quant_reg(data, formula, quantile):
def quant_reg(data, quantile):
"""Calculate the quantile regression. """Calculate the quantile regression.
:param data: data containing a list of date time values and :param data: data containing a list of date time values and
associated parameter values on which to calculate the associated parameter values on which to calculate the
trend trend
:param formula: the formula specifying the model
:param quantile: a single quantile, must be between 0 and 1 :param quantile: a single quantile, must be between 0 and 1
:return: The result of the fit :return: The trend with its uncertainty and p value
""" """
model = smf.quantreg(formula, data) fit = smf.quantreg("value~datetime", data).fit(q=quantile).params
fit = model.fit(q=quantile) mbb = moving_block_bootstrap("quant", data, quantile)
return fit fit_se = np.nanstd(mbb, axis=0)
fit_pv = 2*scipy.stats.t.sf(x=abs(fit/fit_se), df=len(data)-2)
return {"trend": fit, "uncertainty": fit_se, "p_value": fit_pv}
"""This module contains helper functions for the trends subpackage. """This module contains helper functions for the trends subpackage.
This module contains the following function: This module contains the following function:
deseasonalize - calculate the trend using the requested method calculate_seasonal_cycle - calculate the seasonal cycle
calculate_anomalies - calculate the anomalies series
moving_block_boostrap - perform the moving block bootstrap algorithm
""" """
import numpy as np import numpy as np
...@@ -9,21 +11,60 @@ import pandas as pd ...@@ -9,21 +11,60 @@ import pandas as pd
import statsmodels.formula.api as smf import statsmodels.formula.api as smf
def deseasonalize(data): def calculate_seasonal_cycle(data):
"""Deseasonalize the data. """Calculate the seasonal cycle.
:param data: data containing a list of date time values and :param data: data containing a list of date time values and
associated parameter values associated parameter values
:return: The deseasonalized data :return: The seasonal cycle
""" """
seasonal_cycle = smf.ols( return smf.ols(
"value~np.cos(month*2*np.pi/12)+np.sin(month*2*np.pi/12)" "value~np.sin(2*np.pi*month/12)+np.cos(2*np.pi*month/12)"
"+np.cos(month*4*np.pi/12)+np.sin(month*4*np.pi/12)", "+np.sin(2*np.pi*month/6)+np.cos(2*np.pi*month/6)",
pd.DataFrame({"value": data["value"], "month": data.index.month}) pd.DataFrame({"value": data["value"], "month": data.index.month})
).fit(method="qr").predict(pd.DataFrame({"month": range(1, 13)})) ).fit(method="qr").predict(pd.DataFrame({"month": range(1, 13)}))
def calculate_anomalies(data):
"""Calculate the anomalies series.
:param data: data containing a list of date time values and
associated parameter values
:return: The anomalies series
"""
seasonal_cycle = calculate_seasonal_cycle(data)
idx = data.index idx = data.index
return pd.DataFrame({ return pd.DataFrame({
"value": data["value"].values - seasonal_cycle[idx.month-1].values, "value": data["value"].values - seasonal_cycle[idx.month-1].values,
"datetime": (idx.year-2000)*12 + idx.month "datetime": (idx.year-2000)*12 + idx.month
}) }).sort_values("datetime")
def moving_block_boostrap(method, data, quantile, num_samples=1000):
"""Perform the moving block bootstrap algorithm.
:param method: either ``"OLS"`` or ``"quant"``
:param data: data containing a list of date time values and
associated parameter values
:param quantile: a single quantile, must be between 0 and 1
:param num_samples: number of sampled trends
:return: A list of sampled trends
"""
n = len(data)
b = np.ceil(n**0.25).astype(int)
nblocks = np.ceil(n/b).astype(int)
blocks = np.array([list(range(i, i+b)) for i in range(n-b+1)])
rng = np.random.default_rng()
samples = []
for _ in range(num_samples):
bn = rng.choice(len(blocks), nblocks)
samp_data = data.iloc[blocks[bn].flatten()]
if method == "quant":
mod = smf.quantreg("value~datetime", samp_data).fit(q=quantile)
else:
mod = smf.ols("value~datetime", samp_data).fit(method="qr")
samples.append(mod.params)
return samples
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment