from datetime import datetime, timedelta """ Script for benchmarking the performance of the TOAR Database. The script relies on the AnalysisServiceDownload for downloading the requested data. For a subsequent request, the already downloaded data are not considered. The script contains two ways to access the databse: - a manual way without downloading the data (not used) - usage of the AnalysisService created for this module The output of this script are the durations of the requests. Each call is ended with the duration for this interval. Meanwhile, the status outputs of the analysis service are given. """ import time import requests from pathlib import Path from toargridding.toar_rest_client import AnalysisServiceDownload from toargridding.metadata import Metadata, TimeSample, TOARVariable start = datetime(2016, 3, 1) end = datetime(2016, 3, 3) SAMPLING = "daily" # FIXME check monthly !!! STATISTICS_ENDPOINT = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/" STATISTIC = "mean" TEST_ROOT = Path(__file__).parent def get_toar_response(start, end): """manual request to the TOAR database """ end_with_padding = end + timedelta(1) response = requests.get( STATISTICS_ENDPOINT, params={ "daterange": f"{start.isoformat()},{end_with_padding.isoformat()}", # 1-year "variable_id": 5, "statistics": STATISTIC, "sampling": SAMPLING, "min_data_capture": 0, "limit": "None", # get all timeseries "format": "by_statistic", "metadata_scheme": "basic", }, ) return wait_for_data(response) def wait_for_data(response): """waiting for data of a manual request """ tries = 0 while True: print(f"n tries: {tries}") if response.headers["Content-Type"] == "application/zip": break else: status_endpoint = response.json()["status"] time.sleep(60) try: response = requests.get(status_endpoint) except ConnectionError: print("temporarly no connection") tries += 1 return tries def time_toar_response(start, end): """benchmark with manual creation of request to TOAR database """ print(f"starting request for {start}-{end}") timer_start = datetime.now() tries = get_toar_response(start, end) timer_end = datetime.now() response_time = timer_end - timer_start print(f"response time for {start}-{end}: {response_time} ({tries} tries)") def wait_for_client_response(client, sample): """waiting for response. get_data throws exception after 30min, if results are not available. """ for half_hours in range(20): try: client.get_data(sample) except RuntimeError: print(f"time out after 30min, try {half_hours+1}") def time_rest_client_response(start :datetime, end:datetime): """benchmark function using the AnalysisService with download of the requested data Parameters: ---------- start: start time point end: end time point Results: Prints duration and number of tries, i.e. number of calls of get_data, which times out after 30min. """ print(f"starting request for {start}-{end}") path_cache = TEST_ROOT / "temp_data_cache" path_data = TEST_ROOT / "temp_data" path_cache.mkdir(parents=True, exist_ok=True) path_data.mkdir(parents=True, exist_ok=True) rest_client = AnalysisServiceDownload( STATISTICS_ENDPOINT, path_cache, path_data, use_downloaded=False, ) time_window = TimeSample(start, end, SAMPLING) sample = Metadata.construct("mole_fraction_of_ozone_in_air", time_window, STATISTIC) timer_start = datetime.now() wait_for_client_response(rest_client, sample) timer_end = datetime.now() response_time = timer_end - timer_start print(f"response time for {start}-{end}: {response_time} (tries)") if __name__ == "__main__": time_windows = [ #(datetime(2010, 1, 1), datetime(2010, 1, 2)),#this line seems to cause crashes (datetime(2010, 1, 1), datetime(2010, 1, 8)), (datetime(2010, 1, 1), datetime(2010, 1, 31)), (datetime(2010, 1, 1), datetime(2010, 12, 31)), (datetime(2010, 1, 1), datetime(2015, 12, 31)), ] for start, end in time_windows: print("rest client") time_rest_client_response(start, end)