Skip to content
Snippets Groups Projects
Commit 553582f7 authored by Carsten Hinz's avatar Carsten Hinz
Browse files

added debug option for exceptions

added some typing for return values in rest toar_rest_client
added flag to control debug output by toar_rest_client; Default: False
parent 53cab2e4
No related branches found
No related tags found
1 merge request!11Creation of first beta release version
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from datetime import datetime as dt from datetime import datetime as dt
from collections import namedtuple from collections import namedtuple
from pathlib import Path from pathlib import Path
from toargridding.toar_rest_client import AnalysisServiceDownload from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
from toargridding.grids import RegularGrid from toargridding.grids import RegularGrid
from toargridding.gridding import get_gridded_toar_data from toargridding.gridding import get_gridded_toar_data
from toargridding.metadata import TimeSample from toargridding.metadata import TimeSample
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#creation of request. #creation of request.
Config = namedtuple("Config", ["grid", "time", "variables", "stats"]) Config = namedtuple("Config", ["grid", "time", "variables", "stats"])
valid_data = Config( valid_data = Config(
RegularGrid( lat_resolution=1.9, lon_resolution=2.5, ), RegularGrid( lat_resolution=1.9, lon_resolution=2.5, ),
TimeSample( start=dt(2000,1,1), end=dt(2019,12,31), sampling="daily"),#possibly adopt range:-) TimeSample( start=dt(2000,1,1), end=dt(2019,12,31), sampling="daily"),#possibly adopt range:-)
["mole_fraction_of_ozone_in_air"],#variable name ["mole_fraction_of_ozone_in_air"],#variable name
[ "dma8epax" ]# change to dma8epa_strict [ "dma8epax" ]# change to dma8epa_strict
) )
configs = { configs = {
"test_ta" : valid_data "test_ta" : valid_data
} }
#testing access: #testing access:
#config = configs["test_ta"] #config = configs["test_ta"]
#config.grid #config.grid
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#CAVE: the request takes over 30min per requested year. Therefore this cell needs to be executed at different times to check, if the results are ready for download. #CAVE: the request takes over 30min per requested year. Therefore this cell needs to be executed at different times to check, if the results are ready for download.
#the processing is done on the server of the TOAR database. #the processing is done on the server of the TOAR database.
#a restart of the cell continues the request to the REST API if the requested data are ready for download #a restart of the cell continues the request to the REST API if the requested data are ready for download
# The download can also take a few minutes # The download can also take a few minutes
stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/" stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
cache_basepath = Path("cache") cache_basepath = Path("cache")
result_basepath = Path("results") result_basepath = Path("results")
cache_basepath.mkdir(exist_ok=True) cache_basepath.mkdir(exist_ok=True)
result_basepath.mkdir(exist_ok=True) result_basepath.mkdir(exist_ok=True)
analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True) analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)
Connection.DEBUG=True
for person, config in configs.items(): for person, config in configs.items():
datasets, metadatas = get_gridded_toar_data( datasets, metadatas = get_gridded_toar_data(
analysis_service=analysis_service, analysis_service=analysis_service,
grid=config.grid, grid=config.grid,
time=config.time, time=config.time,
variables=config.variables, variables=config.variables,
stats=config.stats, stats=config.stats,
) )
for dataset, metadata in zip(datasets, metadatas): for dataset, metadata in zip(datasets, metadatas):
dataset.to_netcdf(result_basepath / f"{metadata.get_id()}.nc") dataset.to_netcdf(result_basepath / f"{metadata.get_id()}.nc")
print(metadata.get_id()) print(metadata.get_id())
``` ```
......
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from datetime import datetime as dt from datetime import datetime as dt
from collections import namedtuple from collections import namedtuple
from pathlib import Path from pathlib import Path
from toargridding.toar_rest_client import AnalysisServiceDownload from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
from toargridding.grids import RegularGrid from toargridding.grids import RegularGrid
from toargridding.gridding import get_gridded_toar_data from toargridding.gridding import get_gridded_toar_data
from toargridding.metadata import TimeSample from toargridding.metadata import TimeSample
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#creation of request. #creation of request.
Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"]) Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"])
#moreOptions is implemented as a dict to add additional arguments to the query to the REST API #moreOptions is implemented as a dict to add additional arguments to the query to the REST API
#For example the field toar1_category with its possible values Urban, RuralLowElevation, RuralHighElevation and Unclassified can be added. #For example the field toar1_category with its possible values Urban, RuralLowElevation, RuralHighElevation and Unclassified can be added.
#see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf #see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf
#or type_of_area with urban, suburban and rural on page 20 can be used #or type_of_area with urban, suburban and rural on page 20 can be used
details4Query ={ details4Query ={
#"toar1_category" : "Urban" #uncomment if wished:-) #"toar1_category" : "Urban" #uncomment if wished:-)
#"toar1_category" : "RuralLowElevation" #uncomment if wished:-) #"toar1_category" : "RuralLowElevation" #uncomment if wished:-)
#"toar1_category" : "RuralHighElevation" #uncomment if wished:-) #"toar1_category" : "RuralHighElevation" #uncomment if wished:-)
#"type_of_area" : "Urban" #also test Rural, Suburban, #"type_of_area" : "Urban" #also test Rural, Suburban,
"type_of_area" : "Rural" #also test Rural, Suburban, "type_of_area" : "Rural" #also test Rural, Suburban,
#"type_of_area" : "Suburban" #also test Rural, Suburban, #"type_of_area" : "Suburban" #also test Rural, Suburban,
} }
valid_data = Config( valid_data = Config(
RegularGrid( lat_resolution=1.9, lon_resolution=2.5, ), RegularGrid( lat_resolution=1.9, lon_resolution=2.5, ),
TimeSample( start=dt(2000,1,1), end=dt(2019,12,31), sampling="daily"),#possibly adopt range:-) TimeSample( start=dt(2000,1,1), end=dt(2019,12,31), sampling="daily"),#possibly adopt range:-)
["mole_fraction_of_ozone_in_air"],#variable name ["mole_fraction_of_ozone_in_air"],#variable name
#[ "mean", "dma8epax"],# will start one request after another other... #[ "mean", "dma8epax"],# will start one request after another other...
[ "dma8epa_strict" ], [ "dma8epa_strict" ],
details4Query details4Query
) )
configs = { configs = {
"test_ta" : valid_data "test_ta" : valid_data
} }
#testing access: #testing access:
config = configs["test_ta"] config = configs["test_ta"]
config.grid config.grid
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#CAVE: this cell runs about 30minutes per requested year #CAVE: this cell runs about 30minutes per requested year
#the processing is done on the server of the TOAR database. #the processing is done on the server of the TOAR database.
#a restart of the cell continues the request to the REST API if the requested data are ready for download #a restart of the cell continues the request to the REST API if the requested data are ready for download
# The download can also take a few minutes # The download can also take a few minutes
stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/" stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
cache_basepath = Path("cache") cache_basepath = Path("cache")
result_basepath = Path("results") result_basepath = Path("results")
cache_basepath.mkdir(exist_ok=True) cache_basepath.mkdir(exist_ok=True)
result_basepath.mkdir(exist_ok=True) result_basepath.mkdir(exist_ok=True)
analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True) analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)
Connection.DEBUG=True
for person, config in configs.items(): for person, config in configs.items():
datasets, metadatas = get_gridded_toar_data( datasets, metadatas = get_gridded_toar_data(
analysis_service=analysis_service, analysis_service=analysis_service,
grid=config.grid, grid=config.grid,
time=config.time, time=config.time,
variables=config.variables, variables=config.variables,
stats=config.stats, stats=config.stats,
**config.moreOptions **config.moreOptions
) )
for dataset, metadata in zip(datasets, metadatas): for dataset, metadata in zip(datasets, metadatas):
dataset.to_netcdf(result_basepath / f"{metadata.get_id()}.nc") dataset.to_netcdf(result_basepath / f"{metadata.get_id()}.nc")
print(metadata.get_id()) print(metadata.get_id())
``` ```
......
...@@ -153,6 +153,7 @@ class Cache: ...@@ -153,6 +153,7 @@ class Cache:
class Connection: class Connection:
DEBUG = False
def __init__(self, endpoint, cache_dir): def __init__(self, endpoint, cache_dir):
"""connection to the rest API of the TOAR database """connection to the rest API of the TOAR database
...@@ -175,7 +176,7 @@ class Connection: ...@@ -175,7 +176,7 @@ class Connection:
# max wait time is 30min # max wait time is 30min
self.wait_seconds = [minutes * 60 for minutes in (5, 5, 5, 5, 5, 5)] self.wait_seconds = [minutes * 60 for minutes in (5, 5, 5, 5, 5, 5)]
def get(self, query_options : QueryOptions): def get(self, query_options : QueryOptions) -> requests.models.Response:
"""get results for a request. """get results for a request.
This is the main function to obtained data from the TOAR DB. It will start requests or lookup if an already started requests is finished. This is the main function to obtained data from the TOAR DB. It will start requests or lookup if an already started requests is finished.
...@@ -193,9 +194,7 @@ class Connection: ...@@ -193,9 +194,7 @@ class Connection:
response.raise_for_status() response.raise_for_status()
except requests.exceptions.HTTPError as e: except requests.exceptions.HTTPError as e:
print(f"\tconnection error ({e.response.status_code}: {e.response.reason}). Trying again later") print(f"\tconnection error ({e.response.status_code}: {e.response.reason}). Trying again later")
self.printExecption(e) self.printExecption(e, response)
print(response.content)
print(response.json())
continue continue
#are our results ready to obtain? #are our results ready to obtain?
if response.headers["Content-Type"] == "application/zip": if response.headers["Content-Type"] == "application/zip":
...@@ -206,7 +205,7 @@ class Connection: ...@@ -206,7 +205,7 @@ class Connection:
f"No data available after {sum(self.wait_seconds) / 60} minutes. retry later." f"No data available after {sum(self.wait_seconds) / 60} minutes. retry later."
) )
def get_status_endpoint(self, query_options: QueryOptions): def get_status_endpoint(self, query_options: QueryOptions) -> str:
"""get endpoint to results of a request """get endpoint to results of a request
This function checks if the request is already known and has been submitted to the TOAD DB. This function checks if the request is already known and has been submitted to the TOAD DB.
...@@ -231,10 +230,8 @@ class Connection: ...@@ -231,10 +230,8 @@ class Connection:
raise RuntimeError("Connection to TAORDB timed out (ReadTimeout) while checking cached status point. Please try again later.") raise RuntimeError("Connection to TAORDB timed out (ReadTimeout) while checking cached status point. Please try again later.")
except requests.exceptions.HTTPError as e: except requests.exceptions.HTTPError as e:
#TODO add detailed processing: What was the reason for the error? Do we really need to create a new request or is there another problem, that might resolve by simply waiting #TODO add detailed processing: What was the reason for the error? Do we really need to create a new request or is there another problem, that might resolve by simply waiting
print(f"A connection error occurred:") print(f"A HTTP error occurred:")
self.printExecption(e) self.printExecption(e, response)
print(response.content)
print(response.json())
print(f"Status Endpoint: {status_endpoint}") print(f"Status Endpoint: {status_endpoint}")
#use inverse order for saving. the status endpoint should be more unique #use inverse order for saving. the status endpoint should be more unique
self.cache_backup.put(status_endpoint, query_options.cache_key) self.cache_backup.put(status_endpoint, query_options.cache_key)
...@@ -251,7 +248,7 @@ class Connection: ...@@ -251,7 +248,7 @@ class Connection:
status_endpoint = self.query_for_status_endpoint(query_options) status_endpoint = self.query_for_status_endpoint(query_options)
return status_endpoint return status_endpoint
def query_for_status_endpoint(self, query_options: QueryOptions): def query_for_status_endpoint(self, query_options: QueryOptions) -> str:
"""create and new request to the TOAR DB. """create and new request to the TOAR DB.
Adds the status endpoint of the request to the cache. Adds the status endpoint of the request to the cache.
...@@ -267,10 +264,7 @@ class Connection: ...@@ -267,10 +264,7 @@ class Connection:
response = self.wait_and_get(self.endpoint, asdict(query_options, dict_factory=quarryToDict)) response = self.wait_and_get(self.endpoint, asdict(query_options, dict_factory=quarryToDict))
except requests.exceptions.HTTPError as e: except requests.exceptions.HTTPError as e:
print(f"A connection error occurred:") print(f"A connection error occurred:")
self.printExecption(e) self.printExecption(e, response)
print(f"{response=}")
print(f"{response.content=}")
print(response.json())
raise e raise e
except requests.exceptions.ReadTimeout as e: except requests.exceptions.ReadTimeout as e:
print("Caught read timeout.") print("Caught read timeout.")
...@@ -288,7 +282,7 @@ class Connection: ...@@ -288,7 +282,7 @@ class Connection:
def wait_and_get( def wait_and_get(
self, endpoint : str, query_options : Dict =None, wait_secs=None, timeout=(3.05, 20) self, endpoint : str, query_options : Dict =None, wait_secs=None, timeout=(3.05, 20)
): ) -> requests.models.Response:
"""accesses given endpoint """accesses given endpoint
Parameters: Parameters:
...@@ -306,10 +300,14 @@ class Connection: ...@@ -306,10 +300,14 @@ class Connection:
time.sleep(wait_secs) time.sleep(wait_secs)
return requests.get(endpoint, params=query_options, timeout=timeout) return requests.get(endpoint, params=query_options, timeout=timeout)
def printExecption(self, e : requests.exceptions.HTTPError): def printExecption(self, e : requests.exceptions.HTTPError, response : requests.Response):
print(f"Status Code: {e.response.status_code}") if Connection.DEBUG:
print(f"Reason: {e.response.reason}") print(f"Status Code: {e.response.status_code}")
print(f"Text: {e.response.text}") print(f"Reason: {e.response.reason}")
print(f"Text: {e.response.text}")
print(f"{response=}")
print(f"{response.content=}")
print(response.json())
class AnalysisService: class AnalysisService:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment