Newer
Older
Simon Grasse
committed
from datetime import datetime, timedelta
"""
Script for benchmarking the performance of the TOAR Database.
The script relies on the AnalysisServiceDownload for downloading the requested data.
For a subsequent request, the already downloaded data are not considered.
The script contains two ways to access the databse:
- a manual way without downloading the data (not used)
- usage of the AnalysisService created for this module
The output of this script are the durations of the requests. Each call is ended with the duration for this interval.
Meanwhile, the status outputs of the analysis service are given.
"""
Simon Grasse
committed
import time
import requests
from pathlib import Path
from toargridding.toar_rest_client import AnalysisServiceDownload
from toargridding.metadata import Metadata, TimeSample, TOARVariable
start = datetime(2016, 3, 1)
end = datetime(2016, 3, 3)
SAMPLING = "daily" # FIXME check monthly !!!
STATISTICS_ENDPOINT = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
STATISTIC = "mean"
TEST_ROOT = Path(__file__).parent
def get_toar_response(start, end):
"""manual request to the TOAR database
"""
Simon Grasse
committed
end_with_padding = end + timedelta(1)
response = requests.get(
STATISTICS_ENDPOINT,
params={
"daterange": f"{start.isoformat()},{end_with_padding.isoformat()}", # 1-year
"variable_id": 5,
"statistics": STATISTIC,
"sampling": SAMPLING,
"min_data_capture": 0,
"limit": "None", # get all timeseries
"format": "by_statistic",
"metadata_scheme": "basic",
},
)
return wait_for_data(response)
def wait_for_data(response):
"""waiting for data of a manual request
"""
Simon Grasse
committed
tries = 0
while True:
print(f"n tries: {tries}")
if response.headers["Content-Type"] == "application/zip":
break
else:
status_endpoint = response.json()["status"]
time.sleep(60)
try:
response = requests.get(status_endpoint)
except ConnectionError:
print("temporarly no connection")
tries += 1
return tries
def time_toar_response(start, end):
"""benchmark with manual creation of request to TOAR database
"""
Simon Grasse
committed
print(f"starting request for {start}-{end}")
timer_start = datetime.now()
tries = get_toar_response(start, end)
timer_end = datetime.now()
response_time = timer_end - timer_start
print(f"response time for {start}-{end}: {response_time} ({tries} tries)")
def wait_for_client_response(client, sample):
"""waiting for response. get_data throws exception after 30min, if results are not available.
"""
Simon Grasse
committed
for half_hours in range(20):
try:
client.get_data(sample)
except RuntimeError:
print(f"time out after 30min, try {half_hours+1}")
def time_rest_client_response(start :datetime, end:datetime):
"""benchmark function using the AnalysisService with download of the requested data
Parameters:
----------
start:
start time point
end:
end time point
Results:
Prints duration and number of tries, i.e. number of calls of get_data, which times out after 30min.
"""
Simon Grasse
committed
print(f"starting request for {start}-{end}")
path_cache = TEST_ROOT / "temp_data_cache"
path_data = TEST_ROOT / "temp_data"
path_cache.mkdir(parents=True, exist_ok=True)
path_data.mkdir(parents=True, exist_ok=True)
Simon Grasse
committed
rest_client = AnalysisServiceDownload(
STATISTICS_ENDPOINT,
Simon Grasse
committed
use_downloaded=False,
)
time_window = TimeSample(start, end, SAMPLING)
sample = Metadata.construct("mole_fraction_of_ozone_in_air", time_window, STATISTIC)
Simon Grasse
committed
timer_start = datetime.now()
wait_for_client_response(rest_client, sample)
timer_end = datetime.now()
response_time = timer_end - timer_start
print(f"response time for {start}-{end}: {response_time} (tries)")
if __name__ == "__main__":
time_windows = [
#(datetime(2010, 1, 1), datetime(2010, 1, 2)),#this line seems to cause crashes
Simon Grasse
committed
(datetime(2010, 1, 1), datetime(2010, 1, 8)),
(datetime(2010, 1, 1), datetime(2010, 1, 31)),
(datetime(2010, 1, 1), datetime(2010, 12, 31)),
(datetime(2010, 1, 1), datetime(2015, 12, 31)),
]
for start, end in time_windows:
print("rest client")
time_rest_client_response(start, end)