Skip to content
Snippets Groups Projects
Commit b5db7d1f authored by Max Lensing's avatar Max Lensing
Browse files

integrated sampling-type into creation of reference-series, corrected custom sampling df-index

parent 92fd44d5
No related branches found
No related tags found
1 merge request!15Max version 0.6.3
...@@ -127,7 +127,7 @@ def calculate_statistics( ...@@ -127,7 +127,7 @@ def calculate_statistics(
else DEFAULT_CROPS, input_parameters.required.seasons, else DEFAULT_CROPS, input_parameters.required.seasons,
input_parameters.required.crops input_parameters.required.crops
) )
ref = create_reference_series(input_parameters.data.index, daterange) ref = create_reference_series(input_parameters.data.index, input_parameters.sampling, daterange)
resample_rule = ("seasonal" if input_parameters.sampling == "vegseason" resample_rule = ("seasonal" if input_parameters.sampling == "vegseason"
else input_parameters.sampling) else input_parameters.sampling)
min_data_capture_value = (input_parameters.min_data_capture min_data_capture_value = (input_parameters.min_data_capture
...@@ -145,8 +145,14 @@ def calculate_statistics( ...@@ -145,8 +145,14 @@ def calculate_statistics(
min_data_capture_value min_data_capture_value
) )
) )
results = harmonize_time(
results, resample_rule, if resample_rule == "custom":
"H" if "diurnal_cycle" in input_parameters.statistics else None custom_index = pd.DatetimeIndex([ref.index.min().strftime("%Y-%m-%d")])
) for res in results:
res["ser"].index = custom_index
else:
results = harmonize_time(
results, resample_rule,
"H" if "diurnal_cycle" in input_parameters.statistics else None
)
return pd.DataFrame({res["name"]: res["ser"] for res in results}) return pd.DataFrame({res["name"]: res["ser"] for res in results})
...@@ -113,40 +113,48 @@ def calc_data_capture(ser, ref, sampling, how, mincount=0, minfrac=None, ...@@ -113,40 +113,48 @@ def calc_data_capture(ser, ref, sampling, how, mincount=0, minfrac=None,
:return: A series with the data capture fraction :return: A series with the data capture fraction
""" """
if sampling == "100YS": # brings the series to the form of the reference time series, no values in series are represented by NaN
ser_tmp = ser.resample(sampling).count().values ser_com = ser.combine(ref, max, fill_value=None)
fcov = ser_tmp / ref.resample(sampling).count().values
data_capture = pd.Series([fcov[0]], index=[ref.index[0].round('H')], name="data_capture") ser_tmp = ser_com.resample(sampling).count()
else: fcov = ser_tmp / ref.resample(sampling).count()
ser_tmp = ser.resample(sampling).count() return fcov.reindex(ser_tmp.index)
fcov = ser_tmp / ref.resample(sampling).count()
data_capture = fcov.reindex(ser_tmp.index)
return data_capture
def create_reference_series(index, daterange=None): def create_reference_series(index, sampling, daterange):
"""Create a reference series. """Create a reference series by the daterange. Additional extends to the intervals specified by the sampling.
:param index: the given index :param index: the given index
:param sampling: sampling rate defines the daterange of the ref-series
e.g. monthly changes the daterange to fill in all months touched by daterange
:param daterange: start- and end-date separated by comma :param daterange: start- and end-date separated by comma
if set, the reference series creates a series from start to end-date if set, the reference series creates a series from start to end-date
:return: A series with a date range spanning from the beginning of :return: A series with a date range based on the specified date range. The date range is extended based on the
the earliest given year to the ending of the latest given specified sampling.
year and filled with zeros
""" """
min_date = index.min() min_date = index.min()
max_date = index.max() max_date = index.max()
if daterange: # adjust start and end-date by the minute-offset of the timeseries
start_date = pd.to_datetime(daterange.split(",")[0]) start_date = pd.to_datetime(daterange.split(",")[0])
start_date = start_date.replace(minute=min_date.minute) start_date = start_date.replace(minute=min_date.minute)
end_date = pd.to_datetime(daterange.split(",")[1]) end_date = pd.to_datetime(daterange.split(",")[1])
end_date = end_date.replace(minute=max_date.minute) end_date = end_date.replace(minute=max_date.minute)
else:
start_date = f"{min_date.year}-01-01 00:{min_date.minute}" if sampling in ["annual", "seasonal", "summer", "xsummer"]:
end_date = f"{max_date.year}-12-31 23:{max_date.minute}" start_date = start_date.replace(month=1, day=1, hour=0)
end_date = end_date.replace(month=12, day=31, hour=23)
elif sampling == "monthly":
start_date = start_date.replace(day=1, hour=0)
end_date = pd.offsets.MonthEnd().rollforward(end_date)
end_date = end_date.replace(hour=23)
elif sampling == "daily":
start_date = start_date.replace(hour=0)
end_date = end_date.replace(hour=23)
reference_index = pd.date_range(start=start_date, end=end_date, freq="h") reference_index = pd.date_range(start=start_date, end=end_date, freq="h")
return pd.Series(0, reference_index) return pd.Series(0, reference_index)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment