Skip to content
Snippets Groups Projects
Commit 2ad9e2e1 authored by Carsten Hinz's avatar Carsten Hinz
Browse files

added handling of error for requests without results

added script for testing of differnt timestamp ranges
parent 472d0c02
No related branches found
No related tags found
1 merge request!11Creation of first beta release version
from datetime import datetime as dt
from collections import namedtuple
from pathlib import Path
from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
from toargridding.grids import RegularGrid
from toargridding.gridding import get_gridded_toar_data
from toargridding.metadata import TimeSample
#creation of request.
Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"])
varName = "country"
grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )
configs = dict()
country="AL"
valid_data = Config(
grid,
TimeSample( start=dt(2000,1,1), end=dt(2018,12,31), sampling="daily"),#possibly adopt range:-)
["mole_fraction_of_ozone_in_air"],#variable name
[ "dma8epa_strict" ],
{varName : country}
)
configs[f"test_ta{country}"] = valid_data
#CAVE: this cell runs about 45minutes per requested year. therefore we increase the waiting duration to 1h per request.
#the processing is done on the server of the TOAR database.
#a restart of the cell continues the request to the REST API if the requested data are ready for download
# The download can also take a few minutes
stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
cache_basepath = Path("cache")
result_basepath = Path("results")
cache_basepath.mkdir(exist_ok=True)
result_basepath.mkdir(exist_ok=True)
analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)
Connection.DEBUG=True
# maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.
# as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.
analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)
createdFiles = []
for person, config in configs.items():
print(f"\nProcessing {person}:")
print(f"--------------------")
try:
datasets, metadatas = get_gridded_toar_data(
analysis_service=analysis_service,
grid=config.grid,
time=config.time,
variables=config.variables,
stats=config.stats,
**config.moreOptions
)
except KeyError as e:
print("failed for ", person)
continue
\ No newline at end of file
......@@ -114,7 +114,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.11.5"
}
},
"nbformat": 4,
......
%% Cell type:code id: tags:
``` python
from datetime import datetime as dt
from collections import namedtuple
from pathlib import Path
from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
from toargridding.grids import RegularGrid
from toargridding.gridding import get_gridded_toar_data
from toargridding.metadata import TimeSample
from toargridding.metadata_utilities import countryCodes
```
%% Cell type:code id: tags:
``` python
#creation of request.
Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"])
#see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf
varName = "country"
stationCountries = countryCodes()
validCountries = stationCountries.getValidVocabular(controlName="Country Code", varName=varName)
grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )
configs = dict()
for country in validCountries:
valid_data = Config(
grid,
TimeSample( start=dt(2000,1,1), end=dt(2018,12,31), sampling="daily"),#possibly adopt range:-)
["mole_fraction_of_ozone_in_air"],#variable name
[ "dma8epa_strict" ],
{varName : country}
)
configs[f"test_ta{country}"] = valid_data
```
%% Cell type:code id: tags:
``` python
#CAVE: this cell runs about 45minutes per requested year. therefore we increase the waiting duration to 1h per request.
#the processing is done on the server of the TOAR database.
#a restart of the cell continues the request to the REST API if the requested data are ready for download
# The download can also take a few minutes
stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
cache_basepath = Path("cache")
result_basepath = Path("results")
cache_basepath.mkdir(exist_ok=True)
result_basepath.mkdir(exist_ok=True)
analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)
Connection.DEBUG=True
# maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.
# as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.
analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)
createdFiles = []
for person, config in configs.items():
print(f"\nProcessing {person}:")
print(f"--------------------")
datasets, metadatas = get_gridded_toar_data(
analysis_service=analysis_service,
grid=config.grid,
time=config.time,
variables=config.variables,
stats=config.stats,
**config.moreOptions
)
try:
datasets, metadatas = get_gridded_toar_data(
analysis_service=analysis_service,
grid=config.grid,
time=config.time,
variables=config.variables,
stats=config.stats,
**config.moreOptions
)
except KeyError as e:
print("failed for ", person)
continue
for dataset, metadata in zip(datasets, metadatas):
outName = result_basepath / f"{metadata.get_id()}_{config.grid.get_id()}.nc"
dataset.to_netcdf(outName)
createdFiles.append(outName)
print(metadata.get_id())
```
%% Cell type:code id: tags:
``` python
##TODO: now we only need to combine all the obtained results...
```
......
......@@ -152,6 +152,7 @@ class Metadata:
For example, used for saving link to results of a request in the cache.
"""
addition = "_".join(f"{key}-{val}" for key, val in sorted(self.moreOptions.items()))
addition = addition.replace("/","%2F")
return "_".join(str(i) for i in [self.variable.name, self.statistic, self.time.daterange_option, self.time.frequency, addition, "at", datetime.now().date().isoformat()])
def get_title(self) -> str:
......
......@@ -450,6 +450,9 @@ class AnalysisService:
"""
zip_stream = io.BytesIO(content)
with ZipFile(zip_stream) as myzip:
if len(myzip.namelist())==1:
print("Downloaded data do not contain a timeseries.")
raise KeyError("Data file is empty")#TODO replace this with a custom exception.
timeseries = self.extract_data(myzip, metadata.statistic)
timeseries_metadata = self.extract_data(myzip, AnalysisService.METADATA)
......@@ -533,4 +536,5 @@ class AnalysisServiceDownload(AnalysisService):
metadata for the request.
"""
addition = "_".join(f"{key}{val}" for key,val in sorted(metadata.moreOptions.items()))
addition = addition.replace("/","%2F")
return "_".join(str(i) for i in [metadata.statistic, metadata.time.sampling, metadata.variable.cf_standardname, metadata.time.start.date(), metadata.time.end.date(), addition]) + ".zip"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment