diff --git a/tests/quality_controll.ipynb b/tests/quality_controll.ipynb index ddceef5fafdf22fd0957666c72818bdb8b394f7f..744629b6c82b765395457ecc516619fb65765e70 100644 --- a/tests/quality_controll.ipynb +++ b/tests/quality_controll.ipynb @@ -39,8 +39,8 @@ "analysis_service = AnalysisServiceDownload(endpoint, cache_dir, data_download_dir)\n", "my_grid = RegularGrid(1.9, 2.5)\n", "\n", - "time = TimeSample(dt(2016,1,1), dt(2016,12,31), \"daily\")\n", - "metadata = Metadata.construct(\"mole_fraction_of_ozone_in_air\", \"mean\", time)\n" + "time = TimeSample(dt(2016,1,1), dt(2016,12,31), \"monthly\")\n", + "metadata = Metadata.construct(\"mole_fraction_of_ozone_in_air\", time, \"mean\")\n" ] }, { diff --git a/toargridding/metadata.py b/toargridding/metadata.py index ac2bae50b1895a02c7ab398d391be81f739131e6..b69c6a40b811a1f13ea064e2d6777fd3ccbdc9dc 100644 --- a/toargridding/metadata.py +++ b/toargridding/metadata.py @@ -11,6 +11,7 @@ from toargridding.static_metadata import global_cf_attributes, TOARVariable from typing import Dict date_created = datetime.utcnow().strftime("%Y-%m-dT%H:%M:%SZ") +#date_created = datetime.now(datetime.UTC).strftime("%Y-%m-dT%H:%M:%SZ") # fix as utcnow will be removed in the future COORDINATE_VARIABLES = ["latitude", "longitude", "time"] DATA_VARIABLES = ["mean", "std", "n"] @@ -28,6 +29,14 @@ class TimeSample: """Sampling in time provides conversion into different formats + + Attributes: + start: + start time point + end: + end time point + sampling: + temporal aggregation of values, e.g. daily, monthly """ start: datetime @@ -35,7 +44,7 @@ class TimeSample: sampling: str @property - def sampling(self) -> str: # TODO make better + def sampling(self) -> str: """sampling for data request Sampling, i.e. the period used for the calculation of a parameters within the TOAD DB @@ -46,7 +55,7 @@ class TimeSample: @sampling.setter def sampling(self, sampling : str): if sampling not in ALLOWED_SAMPLING_VALUES: - raise ValueError(f"sampling: {sampling} is not in the list of supported samplings for toargridding.") + raise ValueError(f"sampling: {sampling} is not in the list of supported samplings for toargridding: {ALLOWED_SAMPLING_VALUES}") self._sampling = sampling def as_datetime_index(self) -> pd.DatetimeIndex: diff --git a/toargridding/toar_rest_client.py b/toargridding/toar_rest_client.py index 5a956c18813dc1d887b9264d4d4c8e042f2ca3ef..027641edd8b1047a58450d99c40c38983290dacb 100644 --- a/toargridding/toar_rest_client.py +++ b/toargridding/toar_rest_client.py @@ -30,7 +30,7 @@ class QueryOptions: statistics: statistical quantity requested from the TOAR database. see toargridding.toarstats_constants.STATISTICS_LIST. sampling: - frequency of sampling within the datarange, e.g. daily, monthly + temporal aggregation, e.g. daily, monthly min_data_capture: most probably the minimum data to include in the request metadata_schema: @@ -113,13 +113,13 @@ class Cache: return storage[key] def put(self, key: str, content: str): - """get add key and content as key-value-pair to cache + """get add key and content as key-value-pair to cache """ with self.storage_dict() as storage: storage[key] = content def remove(self, key: str): - """remove a key and content as key-value-pair to cache + """remove a key and content as key-value-pair to cache """ with self.storage_dict() as storage: del storage[key] @@ -160,7 +160,7 @@ class Connection: def get(self, query_options): """get results for a request. - This is the main function to obtaind data from the TOAR DB. It will start requests or lookup if an already started requests is finished. + This is the main function to obtained data from the TOAR DB. It will start requests or lookup if an already started requests is finished. Throws an exception, if the results are not available after the waiting time. A restart of the function continues the regular lookup for results. """ @@ -232,7 +232,7 @@ class Connection: query_options: used with the base endpoint to create a request. If None, endpoint is expected to be a full endpoint wait_secs: - sleep in seconds before starting request to TAOR DB + sleep in seconds before starting request to TAORDB timeout: timeout for the request. """ @@ -305,7 +305,7 @@ class AnalysisService: timeseries: extracted time series metadata: - metadate belonging ot the timeseries. + metadata belonging ot the timeseries. return: timeseries without invalid numbers (none, NaN, etc) @@ -357,6 +357,22 @@ class AnalysisService: class AnalysisServiceDownload(AnalysisService): + """download service with caching of requests to the TOARDB + + This service performs the request to the TOAR database and downloads the results of the request to disc before returning if for further processing. + When retrieving data, a check is donw, if this request has already been cached on disc. + + Attributes: + ---------- + stats_endpoint: + link to statistics service of TOAR DB + cache_dir: + directory to store cache file for requests, needs to exist + sample_dir: + directory for caching results of request to the TOARDB + use_downloaded: + flag to control if the cache of downloaded requests is checked before extracting data from the TOARDB + """ def __init__( self, stats_endpoint, cache_dir, sample_dir: Path, use_downloaded=True ): @@ -382,4 +398,12 @@ class AnalysisServiceDownload(AnalysisService): @staticmethod def get_sample_file_name(metadata: Metadata): - return f"{metadata.statistic}_{metadata.time.sampling}_{metadata.time.start.date()}_{metadata.time.end.date()}.zip" + """creates a filename from the metadata + + At the moment considering statistical method, sampling (temporal aggregation) as well as start and end. + Parameters: + ---------- + metadata: + metadata for the request. + """ + return f"{metadata.statistic}_{metadata.time.sampling}_{metadata.variable.cf_standardname}_{metadata.time.start.date()}_{metadata.time.end.date()}.zip"