From 4da83bc5cdab8b57076e05d8146a26bef91b724b Mon Sep 17 00:00:00 2001 From: Carsten Hinz <c.hinz@fz-juelich.de> Date: Thu, 2 May 2024 10:25:14 +0200 Subject: [PATCH] quality_controll.ipynb -changed sampling from daily to monthly -fixed Metadata.construct parameter order toar_rest_client: DownloadAnalysisService: -added CF name of variable to sample file name fixed typos and added some documentation --- tests/quality_controll.ipynb | 4 ++-- toargridding/metadata.py | 13 +++++++++-- toargridding/toar_rest_client.py | 38 ++++++++++++++++++++++++++------ 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/tests/quality_controll.ipynb b/tests/quality_controll.ipynb index ddceef5..744629b 100644 --- a/tests/quality_controll.ipynb +++ b/tests/quality_controll.ipynb @@ -39,8 +39,8 @@ "analysis_service = AnalysisServiceDownload(endpoint, cache_dir, data_download_dir)\n", "my_grid = RegularGrid(1.9, 2.5)\n", "\n", - "time = TimeSample(dt(2016,1,1), dt(2016,12,31), \"daily\")\n", - "metadata = Metadata.construct(\"mole_fraction_of_ozone_in_air\", \"mean\", time)\n" + "time = TimeSample(dt(2016,1,1), dt(2016,12,31), \"monthly\")\n", + "metadata = Metadata.construct(\"mole_fraction_of_ozone_in_air\", time, \"mean\")\n" ] }, { diff --git a/toargridding/metadata.py b/toargridding/metadata.py index ac2bae5..b69c6a4 100644 --- a/toargridding/metadata.py +++ b/toargridding/metadata.py @@ -11,6 +11,7 @@ from toargridding.static_metadata import global_cf_attributes, TOARVariable from typing import Dict date_created = datetime.utcnow().strftime("%Y-%m-dT%H:%M:%SZ") +#date_created = datetime.now(datetime.UTC).strftime("%Y-%m-dT%H:%M:%SZ") # fix as utcnow will be removed in the future COORDINATE_VARIABLES = ["latitude", "longitude", "time"] DATA_VARIABLES = ["mean", "std", "n"] @@ -28,6 +29,14 @@ class TimeSample: """Sampling in time provides conversion into different formats + + Attributes: + start: + start time point + end: + end time point + sampling: + temporal aggregation of values, e.g. daily, monthly """ start: datetime @@ -35,7 +44,7 @@ class TimeSample: sampling: str @property - def sampling(self) -> str: # TODO make better + def sampling(self) -> str: """sampling for data request Sampling, i.e. the period used for the calculation of a parameters within the TOAD DB @@ -46,7 +55,7 @@ class TimeSample: @sampling.setter def sampling(self, sampling : str): if sampling not in ALLOWED_SAMPLING_VALUES: - raise ValueError(f"sampling: {sampling} is not in the list of supported samplings for toargridding.") + raise ValueError(f"sampling: {sampling} is not in the list of supported samplings for toargridding: {ALLOWED_SAMPLING_VALUES}") self._sampling = sampling def as_datetime_index(self) -> pd.DatetimeIndex: diff --git a/toargridding/toar_rest_client.py b/toargridding/toar_rest_client.py index 5a956c1..027641e 100644 --- a/toargridding/toar_rest_client.py +++ b/toargridding/toar_rest_client.py @@ -30,7 +30,7 @@ class QueryOptions: statistics: statistical quantity requested from the TOAR database. see toargridding.toarstats_constants.STATISTICS_LIST. sampling: - frequency of sampling within the datarange, e.g. daily, monthly + temporal aggregation, e.g. daily, monthly min_data_capture: most probably the minimum data to include in the request metadata_schema: @@ -113,13 +113,13 @@ class Cache: return storage[key] def put(self, key: str, content: str): - """get add key and content as key-value-pair to cache + """get add key and content as key-value-pair to cache """ with self.storage_dict() as storage: storage[key] = content def remove(self, key: str): - """remove a key and content as key-value-pair to cache + """remove a key and content as key-value-pair to cache """ with self.storage_dict() as storage: del storage[key] @@ -160,7 +160,7 @@ class Connection: def get(self, query_options): """get results for a request. - This is the main function to obtaind data from the TOAR DB. It will start requests or lookup if an already started requests is finished. + This is the main function to obtained data from the TOAR DB. It will start requests or lookup if an already started requests is finished. Throws an exception, if the results are not available after the waiting time. A restart of the function continues the regular lookup for results. """ @@ -232,7 +232,7 @@ class Connection: query_options: used with the base endpoint to create a request. If None, endpoint is expected to be a full endpoint wait_secs: - sleep in seconds before starting request to TAOR DB + sleep in seconds before starting request to TAORDB timeout: timeout for the request. """ @@ -305,7 +305,7 @@ class AnalysisService: timeseries: extracted time series metadata: - metadate belonging ot the timeseries. + metadata belonging ot the timeseries. return: timeseries without invalid numbers (none, NaN, etc) @@ -357,6 +357,22 @@ class AnalysisService: class AnalysisServiceDownload(AnalysisService): + """download service with caching of requests to the TOARDB + + This service performs the request to the TOAR database and downloads the results of the request to disc before returning if for further processing. + When retrieving data, a check is donw, if this request has already been cached on disc. + + Attributes: + ---------- + stats_endpoint: + link to statistics service of TOAR DB + cache_dir: + directory to store cache file for requests, needs to exist + sample_dir: + directory for caching results of request to the TOARDB + use_downloaded: + flag to control if the cache of downloaded requests is checked before extracting data from the TOARDB + """ def __init__( self, stats_endpoint, cache_dir, sample_dir: Path, use_downloaded=True ): @@ -382,4 +398,12 @@ class AnalysisServiceDownload(AnalysisService): @staticmethod def get_sample_file_name(metadata: Metadata): - return f"{metadata.statistic}_{metadata.time.sampling}_{metadata.time.start.date()}_{metadata.time.end.date()}.zip" + """creates a filename from the metadata + + At the moment considering statistical method, sampling (temporal aggregation) as well as start and end. + Parameters: + ---------- + metadata: + metadata for the request. + """ + return f"{metadata.statistic}_{metadata.time.sampling}_{metadata.variable.cf_standardname}_{metadata.time.start.date()}_{metadata.time.end.date()}.zip" -- GitLab