Skip to content
Snippets Groups Projects
Commit 677cb247 authored by Niklas Selke's avatar Niklas Selke
Browse files

Merge branch 'niklas_issue009_feat_calculate-quantile-regression' into 'develop'

Niklas issue009 feat calculate quantile regression

See merge request !6
parents 22b4373e 1c21dc0a
No related branches found
No related tags found
2 merge requests!9Develop,!6Niklas issue009 feat calculate quantile regression
......@@ -4,43 +4,70 @@ This module contains the following function:
calculate_trend - calculate the trend using the requested method
"""
from toarstats.metrics.input_checks import check_data
from toarstats.trends.ols import ols
from toarstats.trends.quant_reg import quant_reg
from toarstats.trends.utils import deseasonalize
from toarstats.trends.utils import calculate_anomalies
def calculate_trend(method, data, formula="value ~ datetime", quantiles=None):
def calculate_trend(method, data, quantiles=None):
"""Calculate the trend using the requested method.
This function is the public interface for the ``trends`` subpackage.
It takes all the user inputs and returns the result of the requested
trend analysis.
The calculation follows "Guidance note on best statistical practices
for TOAR analyses" (Chang et al. 2023,
https://arxiv.org/pdf/2304.14236.pdf) Annex E.
:param method: either ``"OLS"`` or ``"quant"``
:param data: data containing a list of date time values and
associated parameter values on which to calculate the
trend
:param formula: the formula specifying the model
:param quantiles: a single quantile or a list of quantiles to
calculate, these must be between 0 and 1; only
needed when ``method="quant"``
:raises ValueError: raised if the ``method`` parameter is not
recognized
:raises TypeError: raised if
- the ``data`` parameter is not a data frame or
series
- the index is not a datetime index
- the values are not in a one-dimensional float
or int array
:raises ValueError: raised if
- the ``method`` parameter is not recognized
- the index is empty, has duplicates or null
values
- the values array is empty or only contains
null values
- the index and values have different lengths
- any ``quantiles`` are not strictly within 0
and 1 with ``method="quantreg"``
:return: The result of the fit or a list of fit results if
``method="quant"`` and multiple quantiles are given
:return: The result of the fit or a dict of fit results if
``method="quant"``
"""
deseasonalized_data = deseasonalize(data)
if method == "OLS":
fit = ols(deseasonalized_data, formula)
elif method == "quant":
try:
fit = [quant_reg(deseasonalized_data, formula, quantile)
for quantile in quantiles]
except TypeError:
fit = quant_reg(deseasonalized_data, formula, quantiles)
else:
if method not in {"OLS", "quant"}:
raise ValueError(f"{method} is not recognized, must be 'OLS' or"
" 'quant'")
data_in = check_data(data, None, None).to_frame("value")
if method == "quant":
quantile_list = (
quantiles
if isinstance(quantiles, (list, set, tuple))
else [quantiles]
)
if not all(0 < quantile < 1 for quantile in quantile_list):
raise ValueError("The quantiles must be strictly between 0 and 1.")
anomalies_series = calculate_anomalies(data_in)
if method == "quant":
fit = {
quantile: quant_reg(anomalies_series, quantile)
for quantile in quantile_list
}
else:
fit = ols(anomalies_series)
return fit
......@@ -4,19 +4,24 @@ This module contains the following function:
ols - calculate the OLS linear regression
"""
import numpy as np
import scipy.stats
import statsmodels.formula.api as smf
from toarstats.trends.utils import moving_block_bootstrap
def ols(data, formula):
def ols(data):
"""Calculate the OLS linear regression.
:param data: data containing a list of date time values and
associated parameter values on which to calculate the
trend
:param formula: the formula specifying the model
:return: The result of the fit
:return: The trend with its uncertainty and p value
"""
model = smf.ols(formula, data)
fit = model.fit()
return fit
fit = smf.ols("value~datetime", data).fit(method="qr").params
mbb = moving_block_bootstrap("OLS", data)
fit_se = np.nanstd(mbb, axis=0)
fit_pv = 2*scipy.stats.t.sf(x=abs(fit/fit_se), df=len(data)-2)
return {"trend": fit, "uncertainty": fit_se, "p_value": fit_pv}
"""Quantile regression calculation.
This module contains the following function:
quant_reg - calculate the OLS linear regression
quant_reg - calculate the quantile regression
"""
import numpy as np
import scipy.stats
import statsmodels.formula.api as smf
from toarstats.trends.utils import moving_block_bootstrap
def quant_reg(data, formula, quantile):
def quant_reg(data, quantile):
"""Calculate the quantile regression.
:param data: data containing a list of date time values and
associated parameter values on which to calculate the
trend
:param formula: the formula specifying the model
:param quantile: a single quantile, must be between 0 and 1
:return: The result of the fit
:return: The trend with its uncertainty and p value
"""
model = smf.quantreg(formula, data)
fit = model.fit(q=quantile)
return fit
fit = smf.quantreg("value~datetime", data).fit(q=quantile).params
mbb = moving_block_bootstrap("quant", data, quantile)
fit_se = np.nanstd(mbb, axis=0)
fit_pv = 2*scipy.stats.t.sf(x=abs(fit/fit_se), df=len(data)-2)
return {"trend": fit, "uncertainty": fit_se, "p_value": fit_pv}
"""This module contains helper functions for the trends subpackage.
This module contains the following function:
deseasonalize - calculate the trend using the requested method
calculate_seasonal_cycle - calculate the seasonal cycle
calculate_anomalies - calculate the anomalies series
moving_block_bootstrap - perform the moving block bootstrap algorithm
"""
import numpy as np
......@@ -9,21 +11,61 @@ import pandas as pd
import statsmodels.formula.api as smf
def deseasonalize(data):
"""Deseasonalize the data.
def calculate_seasonal_cycle(data):
"""Calculate the seasonal cycle.
:param data: data containing a list of date time values and
associated parameter values
:return: The deseasonalized data
:return: The seasonal cycle
"""
seasonal_cycle = smf.ols(
"value~np.cos(month*2*np.pi/12)+np.sin(month*2*np.pi/12)"
"+np.cos(month*4*np.pi/12)+np.sin(month*4*np.pi/12)",
return smf.ols(
"value~np.sin(2*np.pi*month/12)+np.cos(2*np.pi*month/12)"
"+np.sin(2*np.pi*month/6)+np.cos(2*np.pi*month/6)",
pd.DataFrame({"value": data["value"], "month": data.index.month})
).fit(method="qr").predict(pd.DataFrame({"month": range(1, 13)}))
def calculate_anomalies(data):
"""Calculate the anomalies series.
:param data: data containing a list of date time values and
associated parameter values
:return: The anomalies series
"""
seasonal_cycle = calculate_seasonal_cycle(data)
idx = data.index
return pd.DataFrame({
"value": data["value"].values - seasonal_cycle[idx.month-1].values,
"datetime": (idx.year-2000)*12 + idx.month
})
}).sort_values("datetime")
def moving_block_bootstrap(method, data, quantile=None, num_samples=1000):
"""Perform the moving block bootstrap algorithm.
:param method: either ``"OLS"`` or ``"quant"``
:param data: data containing a list of date time values and
associated parameter values
:param quantile: a single quantile, must be between 0 and 1 if
``method="quant"``
:param num_samples: number of sampled trends
:return: A list of sampled trends
"""
n = len(data)
b = np.ceil(n**0.25).astype(int)
nblocks = np.ceil(n/b).astype(int)
blocks = np.array([list(range(i, i+b)) for i in range(n-b+1)])
rng = np.random.default_rng()
samples = []
for _ in range(num_samples):
bn = rng.choice(len(blocks), nblocks)
samp_data = data.iloc[blocks[bn].flatten()]
if method == "quant":
mod = smf.quantreg("value~datetime", samp_data).fit(q=quantile)
else:
mod = smf.ols("value~datetime", samp_data).fit(method="qr")
samples.append(mod.params)
return samples
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment