diff --git a/CHANGELOG.md b/CHANGELOG.md index 20d67405593f1188b18dc57f3f2d0902ad37a766..ab6462617c2768e30da8393db88d356469386356 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Changelog All notable changes to this project will be documented in this file. +## v0.6.4 - 2024-09-16 - bugfix reference-series + +### general: +* daterange in create_reference_series is now optional, therefore the series is created by dataframe indices + ## v0.6.3 - 2024-07-30 - fixed custom-sampling ### general: diff --git a/dist/toarstats-0.6.4-py3-none-any.whl b/dist/toarstats-0.6.4-py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..63662246f7b0180ee0beb2bf019884c51fce6be8 Binary files /dev/null and b/dist/toarstats-0.6.4-py3-none-any.whl differ diff --git a/setup.cfg b/setup.cfg index 29ab0fc3f343d8883ce9dda6ef55d78f3342e75b..a1b7ee0a33cfd9cbdcdc081bffea9e1972ce8dca 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = toarstats -version = 0.6.3 +version = 0.6.4 author = Niklas Selke, Martin Schultz, Max Lensing author_email = n.selke@fz-juelich.de, m.schultz@fz-juelich.de, m.lensing@fz-juelich.de description = Collection of statistics for the TOAR community diff --git a/toarstats/metrics/stats_utils.py b/toarstats/metrics/stats_utils.py index 5483663d3954283837d158f1cc0a3522654480c5..95b47999e79f89c87ba7899c7ea67f9e2d2d0351 100644 --- a/toarstats/metrics/stats_utils.py +++ b/toarstats/metrics/stats_utils.py @@ -121,8 +121,9 @@ def calc_data_capture(ser, ref, sampling, how, mincount=0, minfrac=None, return fcov.reindex(ser_tmp.index) -def create_reference_series(index, sampling, daterange): +def create_reference_series(index, sampling, daterange=None): """Create a reference series by the daterange. Additional extends to the intervals specified by the sampling. + If no daterange is given, the series is based on the indices of the dataframe. :param index: the given index @@ -133,29 +134,31 @@ def create_reference_series(index, sampling, daterange): if set, the reference series creates a series from start to end-date - :return: A series with a date range based on the specified date range. The date range is extended based on the + :return: A series with a date range based on the specified date range or indices. The date range is extended based on the specified sampling. """ min_date = index.min() max_date = index.max() - # adjust start and end-date by the minute-offset of the timeseries - start_date = pd.to_datetime(daterange.split(",")[0]) - start_date = start_date.replace(minute=min_date.minute) - end_date = pd.to_datetime(daterange.split(",")[1]) - end_date = end_date.replace(minute=max_date.minute) - - if sampling in ["annual", "seasonal", "summer", "xsummer"]: - start_date = start_date.replace(month=1, day=1, hour=0) - end_date = end_date.replace(month=12, day=31, hour=23) - elif sampling == "monthly": - start_date = start_date.replace(day=1, hour=0) - end_date = pd.offsets.MonthEnd().rollforward(end_date) - end_date = end_date.replace(hour=23) - elif sampling == "daily": - start_date = start_date.replace(hour=0) - end_date = end_date.replace(hour=23) - - reference_index = pd.date_range(start=start_date, end=end_date, freq="h") + if daterange is not None: + # adjust start and end-date by the minute-offset of the timeseries + start_date = pd.to_datetime(daterange.split(",")[0]) + start_date = start_date.replace(minute=min_date.minute) + end_date = pd.to_datetime(daterange.split(",")[1]) + end_date = end_date.replace(minute=max_date.minute) + + if sampling in ["annual", "seasonal", "summer", "xsummer"]: + start_date = start_date.replace(month=1, day=1, hour=0) + end_date = end_date.replace(month=12, day=31, hour=23) + elif sampling == "monthly": + start_date = start_date.replace(day=1, hour=0) + end_date = pd.offsets.MonthEnd().rollforward(end_date) + end_date = end_date.replace(hour=23) + elif sampling == "daily": + start_date = start_date.replace(hour=0) + end_date = end_date.replace(hour=23) + reference_index = pd.date_range(start=start_date, end=end_date, freq="h") + else: + reference_index = pd.date_range(start=min_date, end=max_date, freq="h") return pd.Series(0, reference_index)