From e88fd61b714c6f25cd4f2f20d9b564ff5dcb13e5 Mon Sep 17 00:00:00 2001 From: lukas leufen <l.leufen@fz-juelich.de> Date: Wed, 5 Feb 2020 08:50:40 +0100 Subject: [PATCH] add join settings to switch between daily and hourly data --- src/join.py | 27 ++++++++++++++++++--------- src/join_settings.py | 11 +++++++++++ 2 files changed, 29 insertions(+), 9 deletions(-) create mode 100644 src/join_settings.py diff --git a/src/join.py b/src/join.py index 43271a7b..066b0340 100644 --- a/src/join.py +++ b/src/join.py @@ -8,8 +8,9 @@ import pandas as pd import datetime as dt from typing import Iterator, Union, List, Dict from src import helpers +from src.join_settings import join_settings -join_url_base = 'https://join.fz-juelich.de/services/rest/surfacedata/' +# join_url_base = 'https://join.fz-juelich.de/services/rest/surfacedata/' str_or_none = Union[str, None] @@ -21,7 +22,7 @@ class EmptyQueryResult(Exception): def download_join(station_name: Union[str, List[str]], stat_var: dict, station_type: str = None, - network_name: str = None) -> [pd.DataFrame, pd.DataFrame]: + network_name: str = None, sampling: str = "daily") -> [pd.DataFrame, pd.DataFrame]: """ read data from JOIN/TOAR @@ -29,6 +30,7 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t :param stat_var: key as variable like 'O3', values as statistics on keys like 'mean' :param station_type: set the station type like "traffic" or "background", can be none :param network_name: set the measurement network like "UBA" or "AIRBASE", can be none + :param sampling: sampling rate of the downloaded data, either set to daily or hourly (default daily) :returns: - df - data frame with all variables and statistics - meta - data frame with all meta information @@ -36,8 +38,11 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t # make sure station_name parameter is a list station_name = helpers.to_list(station_name) + # get data connection settings + join_url_base, headers = join_settings(sampling) + # load series information - vars_dict = load_series_information(station_name, station_type, network_name) + vars_dict = load_series_information(station_name, station_type, network_name, join_url_base, headers) # download all variables with given statistic data = None @@ -49,10 +54,10 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t # create data link opts = {'base': join_url_base, 'service': 'stats', 'id': vars_dict[var], 'statistics': stat_var[var], - 'sampling': 'daily', 'capture': 0, 'min_data_length': 1460} + 'sampling': sampling, 'capture': 0, 'min_data_length': 1460} # load data - data = get_data(opts) + data = get_data(opts, headers) # correct namespace of statistics stat = _correct_stat_name(stat_var[var]) @@ -70,30 +75,34 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t raise EmptyQueryResult("No data found in JOIN.") -def get_data(opts: Dict) -> Union[Dict, List]: +def get_data(opts: Dict, headers: Dict) -> Union[Dict, List]: """ Download join data using requests framework. Data is returned as json like structure. Depending on the response structure, this can lead to a list or dictionary. :param opts: options to create the request url + :param headers: additional headers information like authorization, can be empty :return: requested data (either as list or dictionary) """ url = create_url(**opts) - response = requests.get(url) + response = requests.get(url, headers=headers) return response.json() -def load_series_information(station_name: List[str], station_type: str_or_none, network_name: str_or_none) -> Dict: +def load_series_information(station_name: List[str], station_type: str_or_none, network_name: str_or_none, + join_url_base: str, headers: Dict) -> Dict: """ List all series ids that are available for given station id and network name. :param station_name: Station name e.g. DEBW107 :param station_type: station type like "traffic" or "background" :param network_name: measurement network of the station like "UBA" or "AIRBASE" + :param join_url_base: base url name to download data from + :param headers: additional headers information like authorization, can be empty :return: all available series for requested station stored in an dictionary with parameter name (variable) as key and the series id as value. """ opts = {"base": join_url_base, "service": "series", "station_id": station_name[0], "station_type": station_type, "network_name": network_name} - station_vars = get_data(opts) + station_vars = get_data(opts, headers) vars_dict = {item[3].lower(): item[0] for item in station_vars} return vars_dict diff --git a/src/join_settings.py b/src/join_settings.py new file mode 100644 index 00000000..d6843630 --- /dev/null +++ b/src/join_settings.py @@ -0,0 +1,11 @@ + +def join_settings(sampling="daily"): + if sampling == "daily": + TOAR_SERVICE_URL = 'https://join.fz-juelich.de/services/rest/surfacedata/' + headers = {} + elif sampling == "hourly": + TOAR_SERVICE_URL = 'https://join.fz-juelich.de/services/rest/surfacedata/' + headers = {"Authorization": "Token 12345"} + else: + raise NameError(f"Given sampling {sampling} is not supported, choose from either daily or hourly sampling.") + return TOAR_SERVICE_URL, headers -- GitLab