-
Carsten Hinz authored
removed extraction of a single day from benchmark -> seems to cause crashes. added output directories of benchmark script to ignore
Carsten Hinz authoredremoved extraction of a single day from benchmark -> seems to cause crashes. added output directories of benchmark script to ignore
benchmark.py 4.42 KiB
from datetime import datetime, timedelta
"""
Script for benchmarking the performance of the TOAR Database.
The script relies on the AnalysisServiceDownload for downloading the requested data.
For a subsequent request, the already downloaded data are not considered.
The script contains two ways to access the databse:
- a manual way without downloading the data (not used)
- usage of the AnalysisService created for this module
The output of this script are the durations of the requests. Each call is ended with the duration for this interval.
Meanwhile, the status outputs of the analysis service are given.
"""
import time
import requests
from pathlib import Path
from toargridding.toar_rest_client import AnalysisServiceDownload
from toargridding.metadata import Metadata, TimeSample, TOARVariable
start = datetime(2016, 3, 1)
end = datetime(2016, 3, 3)
SAMPLING = "daily" # FIXME check monthly !!!
STATISTICS_ENDPOINT = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
STATISTIC = "mean"
TEST_ROOT = Path(__file__).parent
def get_toar_response(start, end):
"""manual request to the TOAR database
"""
end_with_padding = end + timedelta(1)
response = requests.get(
STATISTICS_ENDPOINT,
params={
"daterange": f"{start.isoformat()},{end_with_padding.isoformat()}", # 1-year
"variable_id": 5,
"statistics": STATISTIC,
"sampling": SAMPLING,
"min_data_capture": 0,
"limit": "None", # get all timeseries
"format": "by_statistic",
"metadata_scheme": "basic",
},
)
return wait_for_data(response)
def wait_for_data(response):
"""waiting for data of a manual request
"""
tries = 0
while True:
print(f"n tries: {tries}")
if response.headers["Content-Type"] == "application/zip":
break
else:
status_endpoint = response.json()["status"]
time.sleep(60)
try:
response = requests.get(status_endpoint)
except ConnectionError:
print("temporarly no connection")
tries += 1
return tries
def time_toar_response(start, end):
"""benchmark with manual creation of request to TOAR database
"""
print(f"starting request for {start}-{end}")
timer_start = datetime.now()
tries = get_toar_response(start, end)
timer_end = datetime.now()
response_time = timer_end - timer_start
print(f"response time for {start}-{end}: {response_time} ({tries} tries)")
def wait_for_client_response(client, sample):
"""waiting for response. get_data throws exception after 30min, if results are not available.
"""
for half_hours in range(20):
try:
client.get_data(sample)
except RuntimeError:
print(f"time out after 30min, try {half_hours+1}")
def time_rest_client_response(start :datetime, end:datetime):
"""benchmark function using the AnalysisService with download of the requested data
Parameters:
----------
start:
start time point
end:
end time point
Results:
Prints duration and number of tries, i.e. number of calls of get_data, which times out after 30min.
"""
print(f"starting request for {start}-{end}")
path_cache = TEST_ROOT / "temp_data_cache"
path_data = TEST_ROOT / "temp_data"
path_cache.mkdir(parents=True, exist_ok=True)
path_data.mkdir(parents=True, exist_ok=True)
rest_client = AnalysisServiceDownload(
STATISTICS_ENDPOINT,
path_cache,
path_data,
use_downloaded=False,
)
time_window = TimeSample(start, end, SAMPLING)
sample = Metadata.construct("mole_fraction_of_ozone_in_air", time_window, STATISTIC)
timer_start = datetime.now()
wait_for_client_response(rest_client, sample)
timer_end = datetime.now()
response_time = timer_end - timer_start
print(f"response time for {start}-{end}: {response_time} (tries)")
if __name__ == "__main__":
time_windows = [
#(datetime(2010, 1, 1), datetime(2010, 1, 2)),#this line seems to cause crashes
(datetime(2010, 1, 1), datetime(2010, 1, 8)),
(datetime(2010, 1, 1), datetime(2010, 1, 31)),
(datetime(2010, 1, 1), datetime(2010, 12, 31)),
(datetime(2010, 1, 1), datetime(2015, 12, 31)),
]
for start, end in time_windows:
print("rest client")
time_rest_client_response(start, end)