diff --git a/README.md b/README.md index 135017adb4a6d54b60ad50a85649f6a7f383a82e..87c98595e982de4fb1bc33abcdacf1352f004b85 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,29 @@ The handling of required packages is done with poetry. So run poetry in the proj poetry install ``` +# How does this tool work? + +This tool has two main parts. The first handles requests to the TOAR database and the analysis of the data. +The second part is the gridding, which is performed offline + +## Request to TOAR Database with Statistical Analysis + +Requests are send to the analysis service of the TOAR database. This allows a selection of different stations base on their metadata and performing a statistical analysis. +Whenever a request is submitted, it will be processed. The returned status endpoint will point ot the results as soon as the process is finished. +A request can take several hours, depending on time range and the number of requested stations. +At the moment, there is no possibility implemented to check the status of a running job until it is finished (Date: 2024-05-14). + +As soon as a request is finished, the status endpoint will not be valid forever. The data will be stored longer in a cache by the analysis service. As soon as the same request is submitted, first the cache is checked, if the results have already been calculated. The retrieval of the results from the cache can take some time, similar to the analysis. + +There is no check, if a request is already running. Therefore, submitting a request multiple times, leads to additional load on the system and slows down all requests. + +The TOAR database has only a limited number of workers for performing a statistical analysis. Therefore, it is advised to run one request after another, especially for large requests covering a large number of stations and or a longer time. + +## Gridding + +The gridding uses a user defined grid to combine all stations in a cell. +Per cell mean, standard deviation and the number of stations are reported. + # Example There are at the moment three example provided as jupyter notebooks (https://jupyter.org/). @@ -73,7 +96,7 @@ tests/get_sample_data_manual.ipynb Downloads data from the TOAR database with a manual creation of the request to the TOAR database. This example does not perform any gridding. -## Retriving data and visualization +## Retrieving data and visualization ``` tests/quality_controll.ipynb ``` diff --git a/toargridding/toar_rest_client.py b/toargridding/toar_rest_client.py index 33a52955d9dd7ef180dca204d62eb6d6a04f5e91..9c7ff534cd136344aef64dd9c319e6e7b3484e68 100644 --- a/toargridding/toar_rest_client.py +++ b/toargridding/toar_rest_client.py @@ -196,6 +196,7 @@ class Connection: continue #are our results ready to obtain? if response.headers["Content-Type"] == "application/zip": + print("Results are available for download") return response else: raise RuntimeError( @@ -210,6 +211,8 @@ class Connection: If the cache knows the endpoint, but the DB has deleted it, the endpoint is removed from the cache and a new request is started. Otherwise a new new request is started. + Throws an RuntimeError in case, of a connection error or any other error. In case of an HTTPError, the request is removed from the cache + Parameters: ---------- Options for the request. @@ -219,6 +222,9 @@ class Connection: try: # test for stale cache self.wait_and_get(status_endpoint).raise_for_status() + except requests.exceptions.ReadTimeout as e: + print("Caught read timeout.") + raise RuntimeError("Connection to TAORDB timed out (ReadTimeout) while checking cached status point. Please try again later.") except requests.exceptions.HTTPError as e: #TODO add detailed processing: What was the reason for the error? Do we really need to create a new request or is there another problem, that might resolve by simply waiting print(f"A connection error occurred:") @@ -253,9 +259,22 @@ class Connection: query_options: request to the TOAR database. """ - response = self.wait_and_get(self.endpoint, asdict(query_options, dict_factory=quarryToDict)) try: - status_endpoint = response.json()["status"] + response = self.wait_and_get(self.endpoint, asdict(query_options, dict_factory=quarryToDict)) + except requests.exceptions.HTTPError as e: + print(f"A connection error occurred:") + print(f"Status Code: {e.response.status_code}") + print(f"Reason: {e.response.reason}") + print(f"Text: {e.response.text}") + raise e + except requests.exceptions.ReadTimeout as e: + print("Caught read timeout.") + raise RuntimeError("Read timeout while querying for status endpoint") + try: + if response.headers["Content-Type"] == "application/json": + status_endpoint = response.json()["status"] + else: + raise Exception( f"Unexpected type of response: {response.headers['Content-Type']}" ) except: raise RuntimeError(f"Request was not successful. Response by TOAR database: {response.text}") self.cache.put(query_options.cache_key, status_endpoint)