Skip to content
Snippets Groups Projects
Commit 28b69523 authored by Sabine Schröder's avatar Sabine Schröder
Browse files

Merge branch 'max_version_0.6.3' into 'master'

Max version 0.6.3

See merge request !15
parents a01d7bc2 e6adfa70
No related branches found
No related tags found
1 merge request!15Max version 0.6.3
# Changelog
All notable changes to this project will be documented in this file.
## v0.6.3 - 2024-07-30 - fixed custom-sampling
### general:
* reference series now also uses the sampling type and adapts the data range accordingly
* fixed calc_data_capture
### technical:
* custom sample statistics now returns with an index that matches the start-date instead of the start of the year
## v0.6.2 - 2024-07-22 - bugfixes
### general:
......
File added
[metadata]
name = toarstats
version = 0.6.2
version = 0.6.3
author = Niklas Selke, Martin Schultz, Max Lensing
author_email = n.selke@fz-juelich.de, m.schultz@fz-juelich.de, m.lensing@fz-juelich.de
description = Collection of statistics for the TOAR community
......
......@@ -127,7 +127,7 @@ def calculate_statistics(
else DEFAULT_CROPS, input_parameters.required.seasons,
input_parameters.required.crops
)
ref = create_reference_series(input_parameters.data.index, daterange)
ref = create_reference_series(input_parameters.data.index, input_parameters.sampling, daterange)
resample_rule = ("seasonal" if input_parameters.sampling == "vegseason"
else input_parameters.sampling)
min_data_capture_value = (input_parameters.min_data_capture
......@@ -145,8 +145,14 @@ def calculate_statistics(
min_data_capture_value
)
)
results = harmonize_time(
results, resample_rule,
"H" if "diurnal_cycle" in input_parameters.statistics else None
)
if resample_rule == "custom":
custom_index = pd.DatetimeIndex([ref.index.min().strftime("%Y-%m-%d")])
for res in results:
res["ser"].index = custom_index
else:
results = harmonize_time(
results, resample_rule,
"H" if "diurnal_cycle" in input_parameters.statistics else None
)
return pd.DataFrame({res["name"]: res["ser"] for res in results})
......@@ -113,40 +113,48 @@ def calc_data_capture(ser, ref, sampling, how, mincount=0, minfrac=None,
:return: A series with the data capture fraction
"""
if sampling == "100YS":
ser_tmp = ser.resample(sampling).count().values
fcov = ser_tmp / ref.resample(sampling).count().values
data_capture = pd.Series([fcov[0]], index=[ref.index[0].round('H')], name="data_capture")
else:
ser_tmp = ser.resample(sampling).count()
fcov = ser_tmp / ref.resample(sampling).count()
data_capture = fcov.reindex(ser_tmp.index)
return data_capture
# brings the series to the form of the reference time series, no values in series are represented by NaN
ser_com = ser.combine(ref, max, fill_value=None)
ser_tmp = ser_com.resample(sampling).count()
fcov = ser_tmp / ref.resample(sampling).count()
return fcov.reindex(ser_tmp.index)
def create_reference_series(index, daterange=None):
"""Create a reference series.
def create_reference_series(index, sampling, daterange):
"""Create a reference series by the daterange. Additional extends to the intervals specified by the sampling.
:param index: the given index
:param sampling: sampling rate defines the daterange of the ref-series
e.g. monthly changes the daterange to fill in all months touched by daterange
:param daterange: start- and end-date separated by comma
if set, the reference series creates a series from start to end-date
:return: A series with a date range spanning from the beginning of
the earliest given year to the ending of the latest given
year and filled with zeros
:return: A series with a date range based on the specified date range. The date range is extended based on the
specified sampling.
"""
min_date = index.min()
max_date = index.max()
if daterange:
start_date = pd.to_datetime(daterange.split(",")[0])
start_date = start_date.replace(minute=min_date.minute)
end_date = pd.to_datetime(daterange.split(",")[1])
end_date = end_date.replace(minute=max_date.minute)
else:
start_date = f"{min_date.year}-01-01 00:{min_date.minute}"
end_date = f"{max_date.year}-12-31 23:{max_date.minute}"
# adjust start and end-date by the minute-offset of the timeseries
start_date = pd.to_datetime(daterange.split(",")[0])
start_date = start_date.replace(minute=min_date.minute)
end_date = pd.to_datetime(daterange.split(",")[1])
end_date = end_date.replace(minute=max_date.minute)
if sampling in ["annual", "seasonal", "summer", "xsummer"]:
start_date = start_date.replace(month=1, day=1, hour=0)
end_date = end_date.replace(month=12, day=31, hour=23)
elif sampling == "monthly":
start_date = start_date.replace(day=1, hour=0)
end_date = pd.offsets.MonthEnd().rollforward(end_date)
end_date = end_date.replace(hour=23)
elif sampling == "daily":
start_date = start_date.replace(hour=0)
end_date = end_date.replace(hour=23)
reference_index = pd.date_range(start=start_date, end=end_date, freq="h")
return pd.Series(0, reference_index)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment