Commit 96ac6a7b authored by Niklas Selke's avatar Niklas Selke
Browse files

Removed ''. The new interface is in ''.

parent 9b8c07db
"""This module contains the public interface for the toarstats package.
This module contains the following function:
toarstats - public interface for the toarstats package
import numpy as np
import pandas as pd
from toarstats import ozone_metrics
from toarstats import stats
from toarstats.stats_utils import get_growing_season, harmonize_time
def toarstats(sampling, statistics, data, metadata, seasons=None, crops=None,
"""Calculate the given statistics with the given sampling.
This function is the public interface for the toarstats package and
acts as a wrapper around all statistics and metrics included in the
:param sampling: temporal aggregation, one of ``daily``,
``monthly``, ``seasonal``, ``vegseason``,
``summer``, ``xsummer``, or ``annual``;
``summer`` will pick the 6-months summer season in
the hemisphere where the station is located;
``xsummer`` does the same for a 7-months summer
``vegseason`` requires also the crops argument and
will then determine the appropriate growing seasons
based on the ``climatic_zone`` metadata and crop
:param statistics: a list of statistics and metrics to call, these
must be defined in ```` or
a single string can also be given
:param data: a data frame with datetime values with hourly
resolution and a column with parameter values on which
to calculate the requested statistics and metrics
:param metadata: a named tuple with metadata information for
``station_lat``, ``station_lon``, and
:param seasons: a list of season names for seasonal statistics;
for a definition of seasons, see ````;
if ``None`` is passed, seasonal statistics will be
computed for the default seasons of the respective
metrics, normally, these are the four meteorological
seasons ``DJF``, ``MAM``, ``JJA`` and ``SON``;
if sampling is set to ``summer`` or ``xsummer``, the
correct season will be determined based on the
``station_lat`` metadata;
if sampling is ``vegseason`` and the crops argument
is given, the appropriate growing seasons will be
selected based on the crop type and
``climatic_zone`` metadata;
the growing seasons for wheat and rice will also be
selected if sampling is ``seasonal`` and the chosen
metrics contains ``aot40`` or ``w126``
:param crops: a list of crop types for ``vegseason`` statistics;
default is ``["wheat", "rice"]``;
a single string can also be given
:param data_capture: a fractional value which will be used to
identify valid data periods;
the default is 0.75 for most statistics,
meaning that 75% of hourly values must be
present in a given interval in order to mark a
result as valid;
note that the ``value_count``, ``mean`` and
``standard_deviation`` statistics do not use
this capture criterion, ``value_count`` counts
all values, ``mean`` and ``standard_deviation``
are calculated when there are at least 10 valid
hourly values in an interval;
the fraction may not always be applied to
original hourly values, but could for example
also be used to count the number of valid days
for a ``monthly``, ``seasonal``, or ``annual``
:raises ValueError: raised if ``diurnal_cycle`` is not given alone
:return: A data frame which contains the results for a each of the
requested statistics and metrics
if isinstance(statistics, str):
statistics = [statistics]
# Safety check: diurnal_cycle can only be evaluated alone.
if "diurnal_cycle" in statistics and len(statistics) > 1:
raise ValueError("Diurnal cycle can only be evaluated alone")
# Create a data frame from the data input.
if isinstance(data, pd.Series):
df = pd.DataFrame({"values": data.values}, index=data.index)
elif isinstance(data, pd.DataFrame):
if "value" in data.columns:
df = pd.DataFrame({"values": data["value"].values},
elif "values" in data.columns:
df = pd.DataFrame({"values": data["values"].values},
df = pd.DataFrame({"values": data.iloc[:, 0].values},
raise ValueError("The data must be provided as a data frame or series")
# Workaround: Remove time zone information to get all statistics
# working. Needs to be fixed so that all statistics work with given
# time zone information.
df.index = df.index.tz_localize(None)
# Create a reference data frame (complete date range) to evaluate
# data_capture. This assumes hourly data in the original data frame.
# Make sure to include complete year AND make sure to capture time
# zones that are not on full hours (e.g. India).
tstart = f"{df.index.min().year}-01-01 00:{df.index.min().minute}"
tend = f"{df.index.max().year}-12-31 23:{df.index.max().minute}"
tref = pd.date_range(start=tstart, end=tend, freq="H")
dfref = pd.DataFrame(np.zeros(len(tref)), index=tref)
res = []
mtype = "seasonal" if sampling == "vegseason" else sampling
for mkey in statistics:
if seasons is None:
myseasons = ["DJF", "MAM", "JJA", "SON"]
if (sampling == "vegseason"
or (sampling == "seasonal"
and ("aot40" in mkey or "w126" in mkey))):
if crops is None:
crops = ["wheat", "rice"]
elif isinstance(crops, str):
crops = [crops]
myseasons = []
for ctype in crops:
growing_season = get_growing_season(
ctype, metadata.station_climatic_zone,
if growing_season is not None:
myseasons = seasons
if sampling == "summer":
myseasons = (["NH-Summer"] if metadata.station_lat > 0.
else ["SH-Summer"])
elif sampling == "xsummer":
myseasons = (["NH-XSummer"] if metadata.station_lat > 0.
else ["SH-XSummer"])
if data_capture is None:
if mkey == "diurnal_cycle":
data_capture = 0.5
data_capture = 0.75
func = getattr(stats, mkey)
except AttributeError:
func = getattr(ozone_metrics, mkey)
res.extend(func(df, dfref, mtype, metadata, myseasons, data_capture))
rsfreq = "H" if statistics[0] == "diurnal_cycle" else None
if len(res) > 0:
res = harmonize_time(res, mtype, rsfreq)
return pd.DataFrame({r["name"]: r["df"]["values"] for r in res})
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment