diff --git a/mlair/helpers/data_sources/data_loader.py b/mlair/helpers/data_sources/data_loader.py index 8027e46dc0b3e03e8d0a2b93ddd8f3f3dbc67bf3..7131c6b3fa4f340715c53e94163ce3e67ec40003 100644 --- a/mlair/helpers/data_sources/data_loader.py +++ b/mlair/helpers/data_sources/data_loader.py @@ -3,6 +3,8 @@ __date__ = '2023-06-01' import logging from typing import Dict, Union, List +import time +import random import requests from requests.adapters import HTTPAdapter, Retry @@ -83,7 +85,7 @@ class EmptyQueryResult(Exception): pass -def get_data(opts: Dict, headers: Dict, as_json: bool = True) -> Union[Dict, List, str]: +def get_data(opts: Dict, headers: Dict, as_json: bool = True, max_retries=5, timeout_base=60) -> Union[Dict, List, str]: """ Download join data using requests framework. @@ -96,15 +98,26 @@ def get_data(opts: Dict, headers: Dict, as_json: bool = True) -> Union[Dict, Lis :return: requested data (either as list or dictionary) """ url = create_url(**opts) - try: - with TimeTracking(name=url): - response = retries_session().get(url, headers=headers, timeout=(5, None)) # timeout=(open, read) - if response.status_code == 200: - return response.json() if as_json is True else response.text - else: - raise EmptyQueryResult(f"There was an error (STATUS {response.status_code}) for request {url}") - except requests.exceptions.RetryError as e: - raise EmptyQueryResult(f"There was an RetryError for request {url}: {e}") + response_error = None + for retry in range(max_retries + 1): + time.sleep(random.random()) + try: + timeout = timeout_base * (2 ** retry) + logging.info(f"connect (retry={retry}, timeout={timeout}) {url}") + with TimeTracking(name=url): + session = retries_session(max_retries=0) + response = session.get(url, headers=headers, timeout=(5, timeout)) # timeout=(open, read) + if response.status_code == 200: + return response.json() if as_json is True else response.text + else: + logging.debug(f"There was an error (STATUS {response.status_code}) for request {url}") + response_error = f"STATUS {response.status_code}" + except Exception as e: + time.sleep(retry) + logging.debug(f"There was an error for request {url}: {e}") + response_error = e + if retry + 1 >= max_retries: + raise EmptyQueryResult(f"There was an RetryError for request {url}: {response_error}") def correct_stat_name(stat: str) -> str: