From e88fd61b714c6f25cd4f2f20d9b564ff5dcb13e5 Mon Sep 17 00:00:00 2001
From: lukas leufen <l.leufen@fz-juelich.de>
Date: Wed, 5 Feb 2020 08:50:40 +0100
Subject: [PATCH] add join settings to switch between daily and hourly data

---
 src/join.py          | 27 ++++++++++++++++++---------
 src/join_settings.py | 11 +++++++++++
 2 files changed, 29 insertions(+), 9 deletions(-)
 create mode 100644 src/join_settings.py

diff --git a/src/join.py b/src/join.py
index 43271a7b..066b0340 100644
--- a/src/join.py
+++ b/src/join.py
@@ -8,8 +8,9 @@ import pandas as pd
 import datetime as dt
 from typing import Iterator, Union, List, Dict
 from src import helpers
+from src.join_settings import join_settings
 
-join_url_base = 'https://join.fz-juelich.de/services/rest/surfacedata/'
+# join_url_base = 'https://join.fz-juelich.de/services/rest/surfacedata/'
 str_or_none = Union[str, None]
 
 
@@ -21,7 +22,7 @@ class EmptyQueryResult(Exception):
 
 
 def download_join(station_name: Union[str, List[str]], stat_var: dict, station_type: str = None,
-                  network_name: str = None) -> [pd.DataFrame, pd.DataFrame]:
+                  network_name: str = None, sampling: str = "daily") -> [pd.DataFrame, pd.DataFrame]:
 
     """
     read data from JOIN/TOAR
@@ -29,6 +30,7 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t
     :param stat_var: key as variable like 'O3', values as statistics on keys like 'mean'
     :param station_type: set the station type like "traffic" or "background", can be none
     :param network_name: set the measurement network like "UBA" or "AIRBASE", can be none
+    :param sampling: sampling rate of the downloaded data, either set to daily or hourly (default daily)
     :returns:
         - df - data frame with all variables and statistics
         - meta - data frame with all meta information
@@ -36,8 +38,11 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t
     # make sure station_name parameter is a list
     station_name = helpers.to_list(station_name)
 
+    # get data connection settings
+    join_url_base, headers = join_settings(sampling)
+
     # load series information
-    vars_dict = load_series_information(station_name, station_type, network_name)
+    vars_dict = load_series_information(station_name, station_type, network_name, join_url_base, headers)
 
     # download all variables with given statistic
     data = None
@@ -49,10 +54,10 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t
 
             # create data link
             opts = {'base': join_url_base, 'service': 'stats', 'id': vars_dict[var], 'statistics': stat_var[var],
-                    'sampling': 'daily', 'capture': 0, 'min_data_length': 1460}
+                    'sampling': sampling, 'capture': 0, 'min_data_length': 1460}
 
             # load data
-            data = get_data(opts)
+            data = get_data(opts, headers)
 
             # correct namespace of statistics
             stat = _correct_stat_name(stat_var[var])
@@ -70,30 +75,34 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t
         raise EmptyQueryResult("No data found in JOIN.")
 
 
-def get_data(opts: Dict) -> Union[Dict, List]:
+def get_data(opts: Dict, headers: Dict) -> Union[Dict, List]:
     """
     Download join data using requests framework. Data is returned as json like structure. Depending on the response
     structure, this can lead to a list or dictionary.
     :param opts: options to create the request url
+    :param headers: additional headers information like authorization, can be empty
     :return: requested data (either as list or dictionary)
     """
     url = create_url(**opts)
-    response = requests.get(url)
+    response = requests.get(url, headers=headers)
     return response.json()
 
 
-def load_series_information(station_name: List[str], station_type: str_or_none, network_name: str_or_none) -> Dict:
+def load_series_information(station_name: List[str], station_type: str_or_none, network_name: str_or_none,
+                            join_url_base: str, headers: Dict) -> Dict:
     """
     List all series ids that are available for given station id and network name.
     :param station_name: Station name e.g. DEBW107
     :param station_type: station type like "traffic" or "background"
     :param network_name: measurement network of the station like "UBA" or "AIRBASE"
+    :param join_url_base: base url name to download data from
+    :param headers: additional headers information like authorization, can be empty
     :return: all available series for requested station stored in an dictionary with parameter name (variable) as key
         and the series id as value.
     """
     opts = {"base": join_url_base, "service": "series", "station_id": station_name[0], "station_type": station_type,
             "network_name": network_name}
-    station_vars = get_data(opts)
+    station_vars = get_data(opts, headers)
     vars_dict = {item[3].lower(): item[0] for item in station_vars}
     return vars_dict
 
diff --git a/src/join_settings.py b/src/join_settings.py
new file mode 100644
index 00000000..d6843630
--- /dev/null
+++ b/src/join_settings.py
@@ -0,0 +1,11 @@
+
+def join_settings(sampling="daily"):
+    if sampling == "daily":
+        TOAR_SERVICE_URL = 'https://join.fz-juelich.de/services/rest/surfacedata/'
+        headers = {}
+    elif sampling == "hourly":
+        TOAR_SERVICE_URL = 'https://join.fz-juelich.de/services/rest/surfacedata/'
+        headers = {"Authorization": "Token 12345"}
+    else:
+        raise NameError(f"Given sampling {sampling} is not supported, choose from either daily or hourly sampling.")
+    return TOAR_SERVICE_URL, headers
-- 
GitLab