From 9a4d975888b874fc6bdcf461eb888555ec5d1fc8 Mon Sep 17 00:00:00 2001 From: Carsten Hinz <c.hinz@fz-juelich.de> Date: Fri, 24 May 2024 16:55:08 +0200 Subject: [PATCH] notebook examples: -changed from one large request to yearly requests -changed waiting time for individual requests -cleaned up toar_rest_client: -added function to alter the waiting intervals before checking for the results -added indention to cache file -worked on debug output - --- tests/produce_data_manyStations.ipynb | 13 ++++---- tests/produce_data_withOptional.ipynb | 14 ++++---- toargridding/toar_rest_client.py | 47 ++++++++++++++++++++++----- 3 files changed, 51 insertions(+), 23 deletions(-) diff --git a/tests/produce_data_manyStations.ipynb b/tests/produce_data_manyStations.ipynb index 7e6e9dd..51195d1 100644 --- a/tests/produce_data_manyStations.ipynb +++ b/tests/produce_data_manyStations.ipynb @@ -37,11 +37,7 @@ " [ \"dma8epax\" ]# change to dma8epa_strict\n", " )\n", " \n", - " configs[f\"test_ta{year}\"] = valid_data\n", - "\n", - "#testing access:\n", - "#config = configs[\"test_ta\"]\n", - "#config.grid" + " configs[f\"test_ta{year}\"] = valid_data\n" ] }, { @@ -63,8 +59,11 @@ "analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)\n", "\n", "Connection.DEBUG=True\n", - "minutes = 5\n", - "analysis_service.connection.wait_seconds = [minutes * 60 for i in range(5,61,minutes) ]\n", + "\n", + "#here we adopt the durations before, a request is stopped.\n", + "#the default value is 30 minutes. \n", + "#waiting up to 3h for one request\n", + "analysis_service.connection.setRequestTimes(interval_min=45, maxWait_min=3*60)\n", "\n", "for person, config in configs.items():\n", " print(f\"\\nProcessing {person}:\")\n", diff --git a/tests/produce_data_withOptional.ipynb b/tests/produce_data_withOptional.ipynb index f1979f8..0963958 100644 --- a/tests/produce_data_withOptional.ipynb +++ b/tests/produce_data_withOptional.ipynb @@ -43,7 +43,7 @@ "grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )\n", "\n", "configs = dict()\n", - "for year in range (0,19):\n", + "for year in range(0,19):\n", " valid_data = Config(\n", " grid,\n", " TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling=\"daily\"),#possibly adopt range:-)\n", @@ -53,11 +53,7 @@ " details4Query\n", " )\n", " \n", - " configs[f\"test_ta{year}\"] = valid_data\n", - "\n", - "#testing access:\n", - "#config = configs[\"test_ta2\"]\n", - "#config.grid" + " configs[f\"test_ta{year}\"] = valid_data\n" ] }, { @@ -79,8 +75,10 @@ "analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)\n", "\n", "Connection.DEBUG=True\n", - "minutes = 5\n", - "analysis_service.connection.wait_seconds = [minutes * 60 for i in range(5,61,minutes) ]\n", + "\n", + "# maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.\n", + "# as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.\n", + "analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)\n", "\n", "for person, config in configs.items():\n", " print(f\"\\nProcessing {person}:\")\n", diff --git a/toargridding/toar_rest_client.py b/toargridding/toar_rest_client.py index 0854bc3..e6a43de 100644 --- a/toargridding/toar_rest_client.py +++ b/toargridding/toar_rest_client.py @@ -149,7 +149,7 @@ class Cache: yield storage_dict with open(self.cache_file, "w") as cache: # teardown - json.dump(storage_dict, cache) + json.dump(storage_dict, cache, indent=2) class Connection: @@ -174,7 +174,27 @@ class Connection: self.cache = Cache(cache_dir) self.cache_backup = Cache(cache_dir, "status_endpoints.old") # max wait time is 30min - self.wait_seconds = [minutes * 60 for minutes in (5, 5, 5, 5, 5, 5)] + self.wait_seconds = [] + def setRequestTimes(self, interval_min, maxWait_min): + """set the intervals and maximum duration to wait, before requests to the analysis service are stopped + + The waiting intervals determine how long and often the status endpoint is checked if the results are available. + As soon as the maximum waiting time is reached, the process is stopped. It can be restarted at any time, as the required endpoint is stored in a cache file + + Parameters: + ----------- + interval_min: + waiting interval in minutes + maxWait_min: + maximum duration to wait in minutes. + """ + if maxWait_min <=0: + raise RuntimeError(f"The maximum waiting time needs to be larger than 0min.") + elif interval_min <0 or interval_min > maxWait_min: + self.wait_seconds[0] = maxWait_min + else: + self.wait_seconds = [interval_min * 60 for _ in range(interval_min, maxWait_min+1, interval_min)] + def get(self, query_options : QueryOptions) -> requests.models.Response: """get results for a request. @@ -262,20 +282,28 @@ class Connection: """ try: response = self.wait_and_get(self.endpoint, asdict(query_options, dict_factory=quarryToDict)) + url = response.history[0].url if response.history else response.url + if Connection.DEBUG: + print(f"[DEBUG] Original request: {url}") + + if response.headers["Content-Type"] == "application/json": + status_endpoint = response.json()["status"] + #else: + # raise Exception( f"Unexpected type of response: {response.headers['Content-Type']}" ) + #TODO: can this raise cause a problem? + response.raise_for_status() except requests.exceptions.HTTPError as e: print(f"A connection error occurred:") self.printExecption(e, response) raise e except requests.exceptions.ReadTimeout as e: print("Caught read timeout.") + self.printExecption(e, response) raise RuntimeError("Read timeout while querying for status endpoint") - try: - if response.headers["Content-Type"] == "application/json": - status_endpoint = response.json()["status"] - else: - raise Exception( f"Unexpected type of response: {response.headers['Content-Type']}" ) except: raise RuntimeError(f"Request was not successful. Response by TOAR database: {response.text}") + + #we mage it: let's remember the status endpoint to get our results later:-) self.cache.put(query_options.cache_key, status_endpoint) return status_endpoint @@ -307,7 +335,10 @@ class Connection: print(f"Text: {e.response.text}") print(f"{response=}") print(f"{response.content=}") - print(response.json()) + try: + print(response.json()) + except Exception as e: + print("Decoding as json failed.") class AnalysisService: -- GitLab