diff --git a/CHANGELOG.md b/CHANGELOG.md index c48a3018121757035cd8769eabc9cc8b9919aac4..77a69c738f44c306e304609bde5ed8e995ac6508 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Changelog All notable changes to this project will be documented in this file. +## v0.6.2 - 2024-07-22 - bugfixes + +### general: +* corrected calc_data_capture when using custom sampling +* updated reference_series creation to correct minute-offset + ## v0.6.1 - 2024-07-15 - bugfixes ### general: diff --git a/toarstats/metrics/stats_utils.py b/toarstats/metrics/stats_utils.py index d745c035a29723922187bf9215b7f491333b6afa..11545fa9bd43ff43baad9280d6e5dfd15857e2f2 100644 --- a/toarstats/metrics/stats_utils.py +++ b/toarstats/metrics/stats_utils.py @@ -113,9 +113,15 @@ def calc_data_capture(ser, ref, sampling, how, mincount=0, minfrac=None, :return: A series with the data capture fraction """ - ser_tmp = ser.resample(sampling).count() - fcov = ser_tmp / ref.resample(sampling).count() - return fcov.reindex(ser_tmp.index) + if sampling == "100YS": + ser_tmp = ser.resample(sampling).count().values + fcov = ser_tmp / ref.resample(sampling).count().values + data_capture = pd.Series([fcov[0]], index=[ref.index[0].round('H')], name="data_capture") + else: + ser_tmp = ser.resample(sampling).count() + fcov = ser_tmp / ref.resample(sampling).count() + data_capture = fcov.reindex(ser_tmp.index) + return data_capture def create_reference_series(index, daterange=None): @@ -131,12 +137,14 @@ def create_reference_series(index, daterange=None): the earliest given year to the ending of the latest given year and filled with zeros """ + min_date = index.min() + max_date = index.max() if daterange: - start_date = daterange.split(",")[0] - end_date = daterange.split(",")[1] + start_date = pd.to_datetime(daterange.split(",")[0]) + start_date = start_date.replace(minute=min_date.minute) + end_date = pd.to_datetime(daterange.split(",")[1]) + end_date = end_date.replace(minute=max_date.minute) else: - min_date = index.min() - max_date = index.max() start_date = f"{min_date.year}-01-01 00:{min_date.minute}" end_date = f"{max_date.year}-12-31 23:{max_date.minute}" reference_index = pd.date_range(start=start_date, end=end_date, freq="h")