Commit 96ac6a7b authored by Niklas Selke's avatar Niklas Selke
Browse files

Removed 'stats_main.py'. The new interface is in 'interface.py'.

parent 9b8c07db
"""This module contains the public interface for the toarstats package.
This module contains the following function:
toarstats - public interface for the toarstats package
"""
import numpy as np
import pandas as pd
from toarstats import ozone_metrics
from toarstats import stats
from toarstats.stats_utils import get_growing_season, harmonize_time
def toarstats(sampling, statistics, data, metadata, seasons=None, crops=None,
data_capture=None):
"""Calculate the given statistics with the given sampling.
This function is the public interface for the toarstats package and
acts as a wrapper around all statistics and metrics included in the
package.
:param sampling: temporal aggregation, one of ``daily``,
``monthly``, ``seasonal``, ``vegseason``,
``summer``, ``xsummer``, or ``annual``;
``summer`` will pick the 6-months summer season in
the hemisphere where the station is located;
``xsummer`` does the same for a 7-months summer
season;
``vegseason`` requires also the crops argument and
will then determine the appropriate growing seasons
based on the ``climatic_zone`` metadata and crop
type
:param statistics: a list of statistics and metrics to call, these
must be defined in ``stats.py`` or
``ozone_metrics.py``;
a single string can also be given
:param data: a data frame with datetime values with hourly
resolution and a column with parameter values on which
to calculate the requested statistics and metrics
:param metadata: a named tuple with metadata information for
``station_lat``, ``station_lon``, and
``station_climatic_zone``
:param seasons: a list of season names for seasonal statistics;
for a definition of seasons, see ``stats_utils.py``;
if ``None`` is passed, seasonal statistics will be
computed for the default seasons of the respective
metrics, normally, these are the four meteorological
seasons ``DJF``, ``MAM``, ``JJA`` and ``SON``;
if sampling is set to ``summer`` or ``xsummer``, the
correct season will be determined based on the
``station_lat`` metadata;
if sampling is ``vegseason`` and the crops argument
is given, the appropriate growing seasons will be
selected based on the crop type and
``climatic_zone`` metadata;
the growing seasons for wheat and rice will also be
selected if sampling is ``seasonal`` and the chosen
metrics contains ``aot40`` or ``w126``
:param crops: a list of crop types for ``vegseason`` statistics;
default is ``["wheat", "rice"]``;
a single string can also be given
:param data_capture: a fractional value which will be used to
identify valid data periods;
the default is 0.75 for most statistics,
meaning that 75% of hourly values must be
present in a given interval in order to mark a
result as valid;
note that the ``value_count``, ``mean`` and
``standard_deviation`` statistics do not use
this capture criterion, ``value_count`` counts
all values, ``mean`` and ``standard_deviation``
are calculated when there are at least 10 valid
hourly values in an interval;
the fraction may not always be applied to
original hourly values, but could for example
also be used to count the number of valid days
for a ``monthly``, ``seasonal``, or ``annual``
statistic
:raises ValueError: raised if ``diurnal_cycle`` is not given alone
:return: A data frame which contains the results for a each of the
requested statistics and metrics
"""
if isinstance(statistics, str):
statistics = [statistics]
# Safety check: diurnal_cycle can only be evaluated alone.
if "diurnal_cycle" in statistics and len(statistics) > 1:
raise ValueError("Diurnal cycle can only be evaluated alone")
# Create a data frame from the data input.
if isinstance(data, pd.Series):
df = pd.DataFrame({"values": data.values}, index=data.index)
elif isinstance(data, pd.DataFrame):
if "value" in data.columns:
df = pd.DataFrame({"values": data["value"].values},
index=data.index)
elif "values" in data.columns:
df = pd.DataFrame({"values": data["values"].values},
index=data.index)
else:
df = pd.DataFrame({"values": data.iloc[:, 0].values},
index=data.index)
else:
raise ValueError("The data must be provided as a data frame or series")
# Workaround: Remove time zone information to get all statistics
# working. Needs to be fixed so that all statistics work with given
# time zone information.
if df.index.tz:
df.index = df.index.tz_localize(None)
# Create a reference data frame (complete date range) to evaluate
# data_capture. This assumes hourly data in the original data frame.
# Make sure to include complete year AND make sure to capture time
# zones that are not on full hours (e.g. India).
tstart = f"{df.index.min().year}-01-01 00:{df.index.min().minute}"
tend = f"{df.index.max().year}-12-31 23:{df.index.max().minute}"
tref = pd.date_range(start=tstart, end=tend, freq="H")
dfref = pd.DataFrame(np.zeros(len(tref)), index=tref)
res = []
mtype = "seasonal" if sampling == "vegseason" else sampling
for mkey in statistics:
if seasons is None:
myseasons = ["DJF", "MAM", "JJA", "SON"]
if (sampling == "vegseason"
or (sampling == "seasonal"
and ("aot40" in mkey or "w126" in mkey))):
if crops is None:
crops = ["wheat", "rice"]
elif isinstance(crops, str):
crops = [crops]
myseasons = []
for ctype in crops:
growing_season = get_growing_season(
ctype, metadata.station_climatic_zone,
metadata.station_lat
)
if growing_season is not None:
myseasons.append(growing_season)
else:
myseasons = seasons
if sampling == "summer":
myseasons = (["NH-Summer"] if metadata.station_lat > 0.
else ["SH-Summer"])
elif sampling == "xsummer":
myseasons = (["NH-XSummer"] if metadata.station_lat > 0.
else ["SH-XSummer"])
if data_capture is None:
if mkey == "diurnal_cycle":
data_capture = 0.5
else:
data_capture = 0.75
try:
func = getattr(stats, mkey)
except AttributeError:
func = getattr(ozone_metrics, mkey)
res.extend(func(df, dfref, mtype, metadata, myseasons, data_capture))
rsfreq = "H" if statistics[0] == "diurnal_cycle" else None
if len(res) > 0:
res = harmonize_time(res, mtype, rsfreq)
return pd.DataFrame({r["name"]: r["df"]["values"] for r in res})
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment