Skip to content
Snippets Groups Projects
Commit d3398af4 authored by Carsten Hinz's avatar Carsten Hinz
Browse files

added custom exception, for the case, if an analysis does not provide results

parent fb9df16e
No related branches found
No related tags found
2 merge requests!11Creation of first beta release version,!7Resolve "Cache and requests: Handling of dead status endpoints (Internal Server Error, old requests)"
%% Cell type:code id: tags:
``` python
from datetime import datetime as dt
from collections import namedtuple
from pathlib import Path
from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
from toargridding.toar_rest_client import AnalysisServiceDownload, Connection, EmptyDataError
from toargridding.grids import RegularGrid
from toargridding.gridding import get_gridded_toar_data
from toargridding.metadata import TimeSample
from toargridding.metadata_utilities import countryCodes
```
%% Cell type:code id: tags:
``` python
#creation of request.
Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"])
#see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf
varName = "country"
stationCountries = countryCodes()
validCountries = stationCountries.getValidVocabular(controlName="Country Code", varName=varName)
grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )
configs = dict()
for country in validCountries:
valid_data = Config(
grid,
TimeSample( start=dt(2000,1,1), end=dt(2018,12,31), sampling="daily"),#possibly adopt range:-)
["mole_fraction_of_ozone_in_air"],#variable name
[ "dma8epa_strict" ],
{varName : country}
)
configs[f"test_ta{country}"] = valid_data
```
%% Cell type:code id: tags:
``` python
#CAVE: this cell runs about 45minutes per requested year. therefore we increase the waiting duration to 1h per request.
#the processing is done on the server of the TOAR database.
#a restart of the cell continues the request to the REST API if the requested data are ready for download
# The download can also take a few minutes
stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
cache_basepath = Path("cache")
result_basepath = Path("results")
cache_basepath.mkdir(exist_ok=True)
result_basepath.mkdir(exist_ok=True)
analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)
Connection.DEBUG=True
# maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.
# as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.
analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)
createdFiles = []
for person, config in configs.items():
print(f"\nProcessing {person}:")
print(f"--------------------")
try:
datasets, metadatas = get_gridded_toar_data(
analysis_service=analysis_service,
grid=config.grid,
time=config.time,
variables=config.variables,
stats=config.stats,
**config.moreOptions
)
except KeyError as e:
except EmptyDataError as e:
print("failed for ", person)
continue
for dataset, metadata in zip(datasets, metadatas):
outName = result_basepath / f"{metadata.get_id()}_{config.grid.get_id()}.nc"
dataset.to_netcdf(outName)
createdFiles.append(outName)
print(metadata.get_id())
```
%% Cell type:code id: tags:
``` python
##TODO: now we only need to combine all the obtained results...
```
......
......@@ -20,6 +20,14 @@ STATION_LON = "station_coordinates_lng"
COORDS = [STATION_LAT, STATION_LON]
class EmptyDataError(ValueError):
"""! custom exception for requests, where the analysis service only provides metadata.
This might be the case, if there are not statuins, or if the statistical analysis does not yield any data points
"""
def __init__(self, message):
super().__init__(message)
@dataclass(frozen=True)
class QueryOptions:
"""Creation of a request to the TOAR database.
......@@ -256,12 +264,14 @@ class Connection:
try:
response.raise_for_status()
except requests.exceptions.HTTPError as e:
print(f"\tconnection error ({e.response.status_code}: {e.response.reason}). Trying again later")
print(f"\tconnection error ({e.response.status_code}: {e.response.reason}).")
self.printExecption(e, response)
#a Status Code 500 seems indicated an aborted request -> restart the request and continue with new status endpoint
if e.response.status_code == 500:
self.cache.remove(query_options.cache_key)
status_endpoint = self.get_status_endpoint(query_options)
else:
print("\t Trying again later.")
continue
#are our results ready to obtain?
if response.headers["Content-Type"] == "application/zip":
......@@ -305,6 +315,7 @@ class Connection:
#will be overwritten in the next step.
self.cache.remove(query_options.cache_key)
print("Removing status endpoint from cache and submitting new request.")
pass
except:
raise RuntimeError(f"An error occurred during accessing a cached request")
else:
......@@ -336,12 +347,9 @@ class Connection:
if response.headers["Content-Type"] == "application/json":
status_endpoint = response.json()["status"]
#else:
# raise Exception( f"Unexpected type of response: {response.headers['Content-Type']}" )
#TODO: can this raise cause a problem?
response.raise_for_status()
except requests.exceptions.HTTPError as e:
print(f"A connection error occurred:")
print(f"An HTTP error occurred:")
self.printExecption(e, response)
raise e
except requests.exceptions.ReadTimeout as e:
......@@ -500,6 +508,9 @@ class AnalysisService:
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""convert downloaded byte stream into pandas dataframes
throws an EmptyDataError, if the results file does not contain data.
This is a result if there are not stations contributing to a request or if the restrictions of the analysis exclude all points of a station.
Parameters:
----------
content:
......@@ -510,8 +521,7 @@ class AnalysisService:
zip_stream = io.BytesIO(content)
with ZipFile(zip_stream) as myzip:
if len(myzip.namelist())==1:
print("Downloaded data do not contain a timeseries.")
raise KeyError("Data file is empty")#TODO replace this with a custom exception.
raise EmptyDataError("Data file from TOAR analysis service is empty")
timeseries = self.extract_data(myzip, metadata.statistic)
timeseries_metadata = self.extract_data(myzip, AnalysisService.METADATA)
......@@ -536,7 +546,7 @@ class AnalysisServiceDownload(AnalysisService):
"""download service with caching of requests to the TOARDB
This service performs the request to the TOAR database and downloads the results of the request to disc before returning if for further processing.
When retrieving data, a check is donw, if this request has already been cached on disc.
When retrieving data, a check is done, if this request has already been cached on disc.
Attributes:
----------
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment