From 0381881f002ef1cceebf9e56ee4dfc62c20aeda9 Mon Sep 17 00:00:00 2001
From: Max Lensing <max.lensing@alumni.fh-aachen.de>
Date: Mon, 22 Jul 2024 11:12:42 +0200
Subject: [PATCH] bugfix create_reference_series and calc_data_capture, updated
 toarstats to 0.6.2

---
 CHANGELOG.md                     |  6 ++++++
 toarstats/metrics/stats_utils.py | 22 +++++++++++++++-------
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c48a301..77a69c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Changelog
 All notable changes to this project will be documented in this file.
 
+## v0.6.2 - 2024-07-22 - bugfixes
+
+### general:
+* corrected calc_data_capture when using custom sampling
+* updated reference_series creation to correct minute-offset
+
 ## v0.6.1 - 2024-07-15 - bugfixes
 
 ### general:
diff --git a/toarstats/metrics/stats_utils.py b/toarstats/metrics/stats_utils.py
index d745c03..11545fa 100644
--- a/toarstats/metrics/stats_utils.py
+++ b/toarstats/metrics/stats_utils.py
@@ -113,9 +113,15 @@ def calc_data_capture(ser, ref, sampling, how, mincount=0, minfrac=None,
 
     :return: A series with the data capture fraction
     """
-    ser_tmp = ser.resample(sampling).count()
-    fcov = ser_tmp / ref.resample(sampling).count()
-    return fcov.reindex(ser_tmp.index)
+    if sampling == "100YS":
+        ser_tmp = ser.resample(sampling).count().values
+        fcov = ser_tmp / ref.resample(sampling).count().values
+        data_capture = pd.Series([fcov[0]], index=[ref.index[0].round('H')], name="data_capture")
+    else:
+        ser_tmp = ser.resample(sampling).count()
+        fcov = ser_tmp / ref.resample(sampling).count()
+        data_capture = fcov.reindex(ser_tmp.index)
+    return data_capture
 
 
 def create_reference_series(index, daterange=None):
@@ -131,12 +137,14 @@ def create_reference_series(index, daterange=None):
              the earliest given year to the ending of the latest given
              year and filled with zeros
     """
+    min_date = index.min()
+    max_date = index.max()
     if daterange:
-        start_date = daterange.split(",")[0]
-        end_date = daterange.split(",")[1]
+        start_date = pd.to_datetime(daterange.split(",")[0])
+        start_date = start_date.replace(minute=min_date.minute)
+        end_date = pd.to_datetime(daterange.split(",")[1])
+        end_date = end_date.replace(minute=max_date.minute)
     else:
-        min_date = index.min()
-        max_date = index.max()
         start_date = f"{min_date.year}-01-01 00:{min_date.minute}"
         end_date = f"{max_date.year}-12-31 23:{max_date.minute}"
     reference_index = pd.date_range(start=start_date, end=end_date, freq="h")
-- 
GitLab