diff --git a/CHANGELOG.md b/CHANGELOG.md index 77a69c738f44c306e304609bde5ed8e995ac6508..20d67405593f1188b18dc57f3f2d0902ad37a766 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,15 @@ # Changelog All notable changes to this project will be documented in this file. +## v0.6.3 - 2024-07-30 - fixed custom-sampling + +### general: +* reference series now also uses the sampling type and adapts the data range accordingly +* fixed calc_data_capture + +### technical: +* custom sample statistics now returns with an index that matches the start-date instead of the start of the year + ## v0.6.2 - 2024-07-22 - bugfixes ### general: diff --git a/dist/toarstats-0.6.3-py3-none-any.whl b/dist/toarstats-0.6.3-py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..fff61d4df197c040285c71c9064ab4341973cb3f Binary files /dev/null and b/dist/toarstats-0.6.3-py3-none-any.whl differ diff --git a/setup.cfg b/setup.cfg index 1199ee5938b1fb1f95f4093e161bb8bc76a81d55..29ab0fc3f343d8883ce9dda6ef55d78f3342e75b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = toarstats -version = 0.6.2 +version = 0.6.3 author = Niklas Selke, Martin Schultz, Max Lensing author_email = n.selke@fz-juelich.de, m.schultz@fz-juelich.de, m.lensing@fz-juelich.de description = Collection of statistics for the TOAR community diff --git a/toarstats/metrics/interface.py b/toarstats/metrics/interface.py index 9cf39d7cd8eb95e20562b8e0eeb993d2085808e3..a34d97aeecf2db7eadadb7d05e629dd9c572d945 100644 --- a/toarstats/metrics/interface.py +++ b/toarstats/metrics/interface.py @@ -127,7 +127,7 @@ def calculate_statistics( else DEFAULT_CROPS, input_parameters.required.seasons, input_parameters.required.crops ) - ref = create_reference_series(input_parameters.data.index, daterange) + ref = create_reference_series(input_parameters.data.index, input_parameters.sampling, daterange) resample_rule = ("seasonal" if input_parameters.sampling == "vegseason" else input_parameters.sampling) min_data_capture_value = (input_parameters.min_data_capture @@ -145,8 +145,14 @@ def calculate_statistics( min_data_capture_value ) ) - results = harmonize_time( - results, resample_rule, - "H" if "diurnal_cycle" in input_parameters.statistics else None - ) + + if resample_rule == "custom": + custom_index = pd.DatetimeIndex([ref.index.min().strftime("%Y-%m-%d")]) + for res in results: + res["ser"].index = custom_index + else: + results = harmonize_time( + results, resample_rule, + "H" if "diurnal_cycle" in input_parameters.statistics else None + ) return pd.DataFrame({res["name"]: res["ser"] for res in results}) diff --git a/toarstats/metrics/stats_utils.py b/toarstats/metrics/stats_utils.py index 11545fa9bd43ff43baad9280d6e5dfd15857e2f2..5483663d3954283837d158f1cc0a3522654480c5 100644 --- a/toarstats/metrics/stats_utils.py +++ b/toarstats/metrics/stats_utils.py @@ -113,40 +113,48 @@ def calc_data_capture(ser, ref, sampling, how, mincount=0, minfrac=None, :return: A series with the data capture fraction """ - if sampling == "100YS": - ser_tmp = ser.resample(sampling).count().values - fcov = ser_tmp / ref.resample(sampling).count().values - data_capture = pd.Series([fcov[0]], index=[ref.index[0].round('H')], name="data_capture") - else: - ser_tmp = ser.resample(sampling).count() - fcov = ser_tmp / ref.resample(sampling).count() - data_capture = fcov.reindex(ser_tmp.index) - return data_capture + # brings the series to the form of the reference time series, no values in series are represented by NaN + ser_com = ser.combine(ref, max, fill_value=None) + + ser_tmp = ser_com.resample(sampling).count() + fcov = ser_tmp / ref.resample(sampling).count() + return fcov.reindex(ser_tmp.index) -def create_reference_series(index, daterange=None): - """Create a reference series. +def create_reference_series(index, sampling, daterange): + """Create a reference series by the daterange. Additional extends to the intervals specified by the sampling. :param index: the given index + :param sampling: sampling rate defines the daterange of the ref-series + e.g. monthly changes the daterange to fill in all months touched by daterange + :param daterange: start- and end-date separated by comma if set, the reference series creates a series from start to end-date - :return: A series with a date range spanning from the beginning of - the earliest given year to the ending of the latest given - year and filled with zeros + :return: A series with a date range based on the specified date range. The date range is extended based on the + specified sampling. """ min_date = index.min() max_date = index.max() - if daterange: - start_date = pd.to_datetime(daterange.split(",")[0]) - start_date = start_date.replace(minute=min_date.minute) - end_date = pd.to_datetime(daterange.split(",")[1]) - end_date = end_date.replace(minute=max_date.minute) - else: - start_date = f"{min_date.year}-01-01 00:{min_date.minute}" - end_date = f"{max_date.year}-12-31 23:{max_date.minute}" + # adjust start and end-date by the minute-offset of the timeseries + start_date = pd.to_datetime(daterange.split(",")[0]) + start_date = start_date.replace(minute=min_date.minute) + end_date = pd.to_datetime(daterange.split(",")[1]) + end_date = end_date.replace(minute=max_date.minute) + + if sampling in ["annual", "seasonal", "summer", "xsummer"]: + start_date = start_date.replace(month=1, day=1, hour=0) + end_date = end_date.replace(month=12, day=31, hour=23) + elif sampling == "monthly": + start_date = start_date.replace(day=1, hour=0) + end_date = pd.offsets.MonthEnd().rollforward(end_date) + end_date = end_date.replace(hour=23) + elif sampling == "daily": + start_date = start_date.replace(hour=0) + end_date = end_date.replace(hour=23) + reference_index = pd.date_range(start=start_date, end=end_date, freq="h") return pd.Series(0, reference_index)