diff --git a/mlair/helpers/data_sources/data_loader.py b/mlair/helpers/data_sources/data_loader.py index c30568b8579afcfb4c4b0a75d56bd9572a8136f6..4e69c006ee6c9593c2e323d2f4fdf73174c992ff 100644 --- a/mlair/helpers/data_sources/data_loader.py +++ b/mlair/helpers/data_sources/data_loader.py @@ -3,6 +3,8 @@ __date__ = '2023-06-01' import logging from typing import Dict, Union, List +import time +import random import requests from requests.adapters import HTTPAdapter, Retry @@ -93,7 +95,7 @@ class EmptyQueryResult(Exception): pass -def get_data(opts: Dict, headers: Dict, as_json: bool = True) -> Union[Dict, List, str]: +def get_data(opts: Dict, headers: Dict, as_json: bool = True, max_retries=5) -> Union[Dict, List, str]: """ Download join data using requests framework. @@ -106,15 +108,23 @@ def get_data(opts: Dict, headers: Dict, as_json: bool = True) -> Union[Dict, Lis :return: requested data (either as list or dictionary) """ url = create_url(**opts) - try: - with TimeTracking(name=url): - response = retries_session().get(url, headers=headers, timeout=(5, None)) # timeout=(open, read) - if response.status_code == 200: - return response.json() if as_json is True else response.text - else: - raise EmptyQueryResult(f"There was an error (STATUS {response.status_code}) for request {url}") - except requests.exceptions.RetryError as e: - raise EmptyQueryResult(f"There was an RetryError for request {url}: {e}") + for retry in range(max_retries): + time.sleep(random.random()) + try: + timeout = 60 * (2 ** retry) + logging.info(f"connect (retry={retry}, timeout={timeout}) {url}") + with TimeTracking(name=url): + session = retries_session(max_retries=0) + response = session.get(url, headers=headers, timeout=(5, timeout)) # timeout=(open, read) + if response.status_code == 200: + return response.json() if as_json is True else response.text + else: + logging.debug(f"There was an error (STATUS {response.status_code}) for request {url}") + except Exception as e: + time.sleep(retry) + logging.debug(f"There was an error for request {url}: {e}") + if retry + 1 >= max_retries: + raise EmptyQueryResult(f"There was an RetryError for request {url}: {e}") def correct_stat_name(stat: str) -> str: