added debug option for exceptions

added some typing for return values in rest toar_rest_client added flag to control debug output by toar_rest_client; Default: False

added debug option for exceptions
553582f7 · Carsten Hinz · 53cab2e4 · 553582f7 · 553582f7 · 553582f7
Commit 553582f7 authored 9 months ago by Carsten Hinz
--- a/tests/produce_data_manyStations.ipynb
+++ b/tests/produce_data_manyStations.ipynb
@@ -10,7 +10,7 @@
    "from collections import namedtuple\n",
    "from pathlib import Path\n",
    "\n",
-    "from toargridding.toar_rest_client import AnalysisServiceDownload\n",
+    "from toargridding.toar_rest_client import AnalysisServiceDownload, Connection\n",
    "from toargridding.grids import RegularGrid\n",
    "from toargridding.gridding import get_gridded_toar_data\n",
    "from toargridding.metadata import TimeSample"
@@ -60,6 +60,8 @@
    "result_basepath.mkdir(exist_ok=True)\n",
    "analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)\n",
    "\n",
+    "Connection.DEBUG=True\n",
+    "\n",
    "for person, config in configs.items():\n",
    "    datasets, metadatas = get_gridded_toar_data(\n",
    "        analysis_service=analysis_service,\n",

 %% Cell type:code id: tags:
 ``` python
 from datetime import datetime as dt
 from collections import namedtuple
 from pathlib import Path
-from toargridding.toar_rest_client import AnalysisServiceDownload
+from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
 from toargridding.grids import RegularGrid
 from toargridding.gridding import get_gridded_toar_data
 from toargridding.metadata import TimeSample
 ```
 %% Cell type:code id: tags:
 ``` python
 #creation of request.
 Config = namedtuple("Config", ["grid", "time", "variables", "stats"])
 valid_data = Config(
    RegularGrid( lat_resolution=1.9, lon_resolution=2.5, ),
    TimeSample( start=dt(2000,1,1), end=dt(2019,12,31), sampling="daily"),#possibly adopt range:-)
    ["mole_fraction_of_ozone_in_air"],#variable name
    [ "dma8epax" ]# change to dma8epa_strict
 )
 configs = {
    "test_ta"  : valid_data
 }
 #testing access:
 #config = configs["test_ta"]
 #config.grid
 ```
 %% Cell type:code id: tags:
 ``` python
 #CAVE: the request takes over 30min per requested year. Therefore this cell needs to be executed at different times to check, if the results are ready for download.
 #the processing is done on the server of the TOAR database.
 #a restart of the cell continues the request to the REST API if the requested data are ready for download
 # The download can also take a few minutes
 stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
 cache_basepath = Path("cache")
 result_basepath = Path("results")
 cache_basepath.mkdir(exist_ok=True)
 result_basepath.mkdir(exist_ok=True)
 analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)
+Connection.DEBUG=True
 for person, config in configs.items():
    datasets, metadatas = get_gridded_toar_data(
        analysis_service=analysis_service,
        grid=config.grid,
        time=config.time,
        variables=config.variables,
        stats=config.stats,
    )
    for dataset, metadata in zip(datasets, metadatas):
        dataset.to_netcdf(result_basepath / f"{metadata.get_id()}.nc")
        print(metadata.get_id())
 ```

--- a/tests/produce_data_withOptional.ipynb
+++ b/tests/produce_data_withOptional.ipynb
@@ -10,7 +10,7 @@
    "from collections import namedtuple\n",
    "from pathlib import Path\n",
    "\n",
-    "from toargridding.toar_rest_client import AnalysisServiceDownload\n",
+    "from toargridding.toar_rest_client import AnalysisServiceDownload, Connection\n",
    "from toargridding.grids import RegularGrid\n",
    "from toargridding.gridding import get_gridded_toar_data\n",
    "from toargridding.metadata import TimeSample"
@@ -76,6 +76,8 @@
    "result_basepath.mkdir(exist_ok=True)\n",
    "analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)\n",
    "\n",
+    "Connection.DEBUG=True\n",
+    "\n",
    "for person, config in configs.items():\n",
    "    datasets, metadatas = get_gridded_toar_data(\n",
    "        analysis_service=analysis_service,\n",

 %% Cell type:code id: tags:
 ``` python
 from datetime import datetime as dt
 from collections import namedtuple
 from pathlib import Path
-from toargridding.toar_rest_client import AnalysisServiceDownload
+from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
 from toargridding.grids import RegularGrid
 from toargridding.gridding import get_gridded_toar_data
 from toargridding.metadata import TimeSample
 ```
 %% Cell type:code id: tags:
 ``` python
 #creation of request.
 Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"])
 #moreOptions is implemented as a dict to add additional arguments to the query to the REST API
 #For example the field toar1_category with its possible values Urban, RuralLowElevation, RuralHighElevation and Unclassified can be added.
 #see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf
 #or type_of_area with urban, suburban and rural on page 20 can be used
 details4Query ={
    #"toar1_category" : "Urban" #uncomment if wished:-)
    #"toar1_category" : "RuralLowElevation" #uncomment if wished:-)
    #"toar1_category" : "RuralHighElevation" #uncomment if wished:-)
    #"type_of_area" : "Urban" #also test Rural, Suburban,
    "type_of_area" : "Rural" #also test Rural, Suburban,
    #"type_of_area" : "Suburban" #also test Rural, Suburban,
 }
 valid_data = Config(
    RegularGrid( lat_resolution=1.9, lon_resolution=2.5, ),
    TimeSample( start=dt(2000,1,1), end=dt(2019,12,31), sampling="daily"),#possibly adopt range:-)
    ["mole_fraction_of_ozone_in_air"],#variable name
    #[ "mean", "dma8epax"],# will start one request after another other...
    [ "dma8epa_strict" ],
    details4Query
 )
 configs = {
    "test_ta"  : valid_data
 }
 #testing access:
 config = configs["test_ta"]
 config.grid
 ```
 %% Cell type:code id: tags:
 ``` python
 #CAVE: this cell runs about 30minutes per requested year
 #the processing is done on the server of the TOAR database.
 #a restart of the cell continues the request to the REST API if the requested data are ready for download
 # The download can also take a few minutes
 stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
 cache_basepath = Path("cache")
 result_basepath = Path("results")
 cache_basepath.mkdir(exist_ok=True)
 result_basepath.mkdir(exist_ok=True)
 analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)
+Connection.DEBUG=True
 for person, config in configs.items():
    datasets, metadatas = get_gridded_toar_data(
        analysis_service=analysis_service,
        grid=config.grid,
        time=config.time,
        variables=config.variables,
        stats=config.stats,
        **config.moreOptions
    )
    for dataset, metadata in zip(datasets, metadatas):
        dataset.to_netcdf(result_basepath / f"{metadata.get_id()}.nc")
        print(metadata.get_id())
 ```

--- a/toargridding/toar_rest_client.py
+++ b/toargridding/toar_rest_client.py
@@ -153,6 +153,7 @@ class Cache:
 class Connection:
+    DEBUG = False
    def __init__(self, endpoint, cache_dir):
        """connection to the rest API of the TOAR database
@@ -175,7 +176,7 @@ class Connection:
        # max wait time is 30min
        self.wait_seconds = [minutes * 60 for minutes in (5, 5, 5, 5, 5, 5)]
-    def get(self, query_options : QueryOptions):
+    def get(self, query_options : QueryOptions) -> requests.models.Response:
        """get results for a request.
        This is the main function to obtained data from the TOAR DB. It will start requests or lookup if an already started requests is finished.
@@ -193,9 +194,7 @@ class Connection:
                response.raise_for_status()
            except requests.exceptions.HTTPError as e: 
                print(f"\tconnection error ({e.response.status_code}: {e.response.reason}). Trying again later")
-                self.printExecption(e)
+                self.printExecption(e, response)
-                print(response.content)
-                print(response.json())
                continue
            #are our results ready to obtain?
            if response.headers["Content-Type"] == "application/zip":
@@ -206,7 +205,7 @@ class Connection:
                f"No data available after {sum(self.wait_seconds) / 60} minutes. retry later."
            )
-    def get_status_endpoint(self, query_options: QueryOptions):
+    def get_status_endpoint(self, query_options: QueryOptions) -> str:
        """get endpoint to results of a request
        This function checks if the request is already known and has been submitted to the TOAD DB.
@@ -231,10 +230,8 @@ class Connection:
                raise RuntimeError("Connection to TAORDB timed out (ReadTimeout) while checking cached status point. Please try again later.")
            except requests.exceptions.HTTPError as e:
                #TODO add detailed processing: What was the reason for the error? Do we really need to create a new request or is there another problem, that might resolve by simply waiting
-                print(f"A connection error occurred:")
+                print(f"A HTTP error occurred:")
-                self.printExecption(e)
+                self.printExecption(e, response)
-                print(response.content)
-                print(response.json())
                print(f"Status Endpoint: {status_endpoint}")
                #use inverse order for saving. the status endpoint should be more unique
                self.cache_backup.put(status_endpoint, query_options.cache_key)
@@ -251,7 +248,7 @@ class Connection:
        status_endpoint = self.query_for_status_endpoint(query_options)
        return status_endpoint
-    def query_for_status_endpoint(self, query_options: QueryOptions):
+    def query_for_status_endpoint(self, query_options: QueryOptions) -> str:
        """create and new request to the TOAR DB.
        Adds the status endpoint of the request to the cache. 
@@ -267,10 +264,7 @@ class Connection:
            response = self.wait_and_get(self.endpoint, asdict(query_options, dict_factory=quarryToDict))
        except requests.exceptions.HTTPError as e:
            print(f"A connection error occurred:")
-            self.printExecption(e)
+            self.printExecption(e, response)
-            print(f"{response=}")
-            print(f"{response.content=}")
-            print(response.json())
            raise e
        except requests.exceptions.ReadTimeout as e:
            print("Caught read timeout.")
@@ -288,7 +282,7 @@ class Connection:
    def wait_and_get(
        self, endpoint : str, query_options : Dict =None, wait_secs=None, timeout=(3.05, 20)
-    ):
+    ) -> requests.models.Response:
        """accesses given endpoint
        Parameters:
@@ -306,10 +300,14 @@ class Connection:
            time.sleep(wait_secs)
        return requests.get(endpoint, params=query_options, timeout=timeout)
-    def printExecption(self, e : requests.exceptions.HTTPError):
+    def printExecption(self, e : requests.exceptions.HTTPError, response : requests.Response):
-        print(f"Status Code: {e.response.status_code}")
+        if Connection.DEBUG:
-        print(f"Reason: {e.response.reason}")
+            print(f"Status Code: {e.response.status_code}")
-        print(f"Text: {e.response.text}")
+            print(f"Reason: {e.response.reason}")
+            print(f"Text: {e.response.text}")
+            print(f"{response=}")
+            print(f"{response.content=}")
+            print(response.json())
 class AnalysisService: