diff --git a/toargridding/toar_rest_client.py b/toargridding/toar_rest_client.py index cef1b093f69c29b16a1b8b8346c3455335067a04..792942697d760d094b5c31af7832061ed259052a 100644 --- a/toargridding/toar_rest_client.py +++ b/toargridding/toar_rest_client.py @@ -1,4 +1,5 @@ import time +from datetime import datetime import io from zipfile import ZipFile from dataclasses import dataclass, asdict, field @@ -413,23 +414,32 @@ class AnalysisService: """ # TODO: Why are there different numbers of columns to be dropped?? # TODO maybe use cf-index here already ? - first, last = timeseries.columns[0], timeseries.columns[-1] ##here we observe some differences in the number of timestamps. - # remove data where utc -> sun/local ? time conversion leads to dateshift - newDates = metadata.time.as_datetime_index() - lenDiff = len(timeseries.columns) - len(newDates) - if lenDiff == 0: - print(f"Info: Obtained data range covers {newDates[0]} to {newDates[-1]}") - elif lenDiff == 1: - print(f"Info: removed columns {timeseries.columns[-1]} to match data range of {newDates[0]} to {newDates[-1]}") - timeseries.drop(columns=[last], inplace=True) - elif lenDiff == 2: - print(f"Info: removed columns {timeseries.columns[0]} and {timeseries.columns[-1]} to match data range of {newDates[0]} to {newDates[-1]}") - timeseries.drop(columns=[first, last], inplace=True) - else: - raise RuntimeError(f"There is a mismatch in the timestamps...\nDownloaded:{timeseries.columns}\nFrom Metadata: {newDates}") - timeseries.columns = newDates + # remove data where utc -> sun/local ? time conversion leads to date shift + + #conversion from string to datetime objects for easier access + #TODO maybe check if the conversion is also valid for annual data + timeseries.columns = pd.DatetimeIndex([ datetime.strptime(val, "%Y-%m-%d") for val in timeseries.columns ]) + + #now drop columns outside of our requested range: + #this is independent of the other part + col2Drop = [] + for retrievedDate in timeseries.columns: + if retrievedDate < metadata.time.start: + col2Drop.append( retrievedDate ) + else: + break + for i in range(-1, -len(timeseries.columns) , -1): + retrievedDate = timeseries.columns[i] + if retrievedDate > metadata.time.end: + col2Drop.append( retrievedDate ) + else: + break + if len(col2Drop): + print(f"[Info:] Dropping columns ({col2Drop}) from TOAR data to match requested date range [{metadata.time.start}, {metadata.time.end}]") + timeseries.drop(columns=col2Drop, inplace=True) + all_na = timeseries.isna().all(axis=1) timeseries = timeseries[~all_na] timeseries = timeseries.fillna(0)