From 8b41958b0423d22d3b24f2b5be12263fc0b6f631 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 7 Jun 2023 09:27:42 +0200
Subject: [PATCH] change retry behaviour

---
 mlair/helpers/data_sources/data_loader.py | 30 +++++++++++++++--------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/mlair/helpers/data_sources/data_loader.py b/mlair/helpers/data_sources/data_loader.py
index 8027e46..7f61d1b 100644
--- a/mlair/helpers/data_sources/data_loader.py
+++ b/mlair/helpers/data_sources/data_loader.py
@@ -3,6 +3,8 @@ __date__ = '2023-06-01'
 
 import logging
 from typing import Dict, Union, List
+import time
+import random
 
 import requests
 from requests.adapters import HTTPAdapter, Retry
@@ -83,7 +85,7 @@ class EmptyQueryResult(Exception):
     pass
 
 
-def get_data(opts: Dict, headers: Dict, as_json: bool = True) -> Union[Dict, List, str]:
+def get_data(opts: Dict, headers: Dict, as_json: bool = True, max_retries=5) -> Union[Dict, List, str]:
     """
     Download join data using requests framework.
 
@@ -96,15 +98,23 @@ def get_data(opts: Dict, headers: Dict, as_json: bool = True) -> Union[Dict, Lis
     :return: requested data (either as list or dictionary)
     """
     url = create_url(**opts)
-    try:
-        with TimeTracking(name=url):
-            response = retries_session().get(url, headers=headers, timeout=(5, None))  # timeout=(open, read)
-        if response.status_code == 200:
-            return response.json() if as_json is True else response.text
-        else:
-            raise EmptyQueryResult(f"There was an error (STATUS {response.status_code}) for request {url}")
-    except requests.exceptions.RetryError as e:
-        raise EmptyQueryResult(f"There was an RetryError for request {url}: {e}")
+    for retry in range(max_retries):
+        time.sleep(random.random())
+        try:
+            timeout = 60 * (2 ** retry)
+            logging.info(f"connect (retry={retry}, timeout={timeout}) {url}")
+            with TimeTracking(name=url):
+                session = retries_session(max_retries=0)
+                response = session.get(url, headers=headers, timeout=(5, timeout))  # timeout=(open, read)
+                if response.status_code == 200:
+                    return response.json() if as_json is True else response.text
+                else:
+                    logging.debug(f"There was an error (STATUS {response.status_code}) for request {url}")
+        except Exception as e:
+            time.sleep(retry)
+            logging.debug(f"There was an error for request {url}: {e}")
+            if retry + 1 >= max_retries:
+                raise EmptyQueryResult(f"There was an RetryError for request {url}: {e}")
 
 
 def correct_stat_name(stat: str) -> str:
-- 
GitLab