Skip to content
Snippets Groups Projects
Commit af3295e1 authored by Max Lensing's avatar Max Lensing
Browse files

bugfix create_reference_series, updated toarstats to 0.6.4

parent 28b69523
Branches
Tags
1 merge request!16bugfix create_reference_series, updated toarstats to 0.6.4
# Changelog
All notable changes to this project will be documented in this file.
## v0.6.4 - 2024-09-16 - bugfix reference-series
### general:
* daterange in create_reference_series is now optional, therefore the series is created by dataframe indices
## v0.6.3 - 2024-07-30 - fixed custom-sampling
### general:
......
File added
[metadata]
name = toarstats
version = 0.6.3
version = 0.6.4
author = Niklas Selke, Martin Schultz, Max Lensing
author_email = n.selke@fz-juelich.de, m.schultz@fz-juelich.de, m.lensing@fz-juelich.de
description = Collection of statistics for the TOAR community
......
......@@ -121,8 +121,9 @@ def calc_data_capture(ser, ref, sampling, how, mincount=0, minfrac=None,
return fcov.reindex(ser_tmp.index)
def create_reference_series(index, sampling, daterange):
def create_reference_series(index, sampling, daterange=None):
"""Create a reference series by the daterange. Additional extends to the intervals specified by the sampling.
If no daterange is given, the series is based on the indices of the dataframe.
:param index: the given index
......@@ -133,29 +134,31 @@ def create_reference_series(index, sampling, daterange):
if set, the reference series creates a series from start to end-date
:return: A series with a date range based on the specified date range. The date range is extended based on the
:return: A series with a date range based on the specified date range or indices. The date range is extended based on the
specified sampling.
"""
min_date = index.min()
max_date = index.max()
# adjust start and end-date by the minute-offset of the timeseries
start_date = pd.to_datetime(daterange.split(",")[0])
start_date = start_date.replace(minute=min_date.minute)
end_date = pd.to_datetime(daterange.split(",")[1])
end_date = end_date.replace(minute=max_date.minute)
if sampling in ["annual", "seasonal", "summer", "xsummer"]:
start_date = start_date.replace(month=1, day=1, hour=0)
end_date = end_date.replace(month=12, day=31, hour=23)
elif sampling == "monthly":
start_date = start_date.replace(day=1, hour=0)
end_date = pd.offsets.MonthEnd().rollforward(end_date)
end_date = end_date.replace(hour=23)
elif sampling == "daily":
start_date = start_date.replace(hour=0)
end_date = end_date.replace(hour=23)
reference_index = pd.date_range(start=start_date, end=end_date, freq="h")
if daterange is not None:
# adjust start and end-date by the minute-offset of the timeseries
start_date = pd.to_datetime(daterange.split(",")[0])
start_date = start_date.replace(minute=min_date.minute)
end_date = pd.to_datetime(daterange.split(",")[1])
end_date = end_date.replace(minute=max_date.minute)
if sampling in ["annual", "seasonal", "summer", "xsummer"]:
start_date = start_date.replace(month=1, day=1, hour=0)
end_date = end_date.replace(month=12, day=31, hour=23)
elif sampling == "monthly":
start_date = start_date.replace(day=1, hour=0)
end_date = pd.offsets.MonthEnd().rollforward(end_date)
end_date = end_date.replace(hour=23)
elif sampling == "daily":
start_date = start_date.replace(hour=0)
end_date = end_date.replace(hour=23)
reference_index = pd.date_range(start=start_date, end=end_date, freq="h")
else:
reference_index = pd.date_range(start=min_date, end=max_date, freq="h")
return pd.Series(0, reference_index)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment