From ee34c882abfa37fd9f63345aa6f6d1808515141e Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz-juelich.de> Date: Wed, 6 Jul 2022 10:45:30 +0200 Subject: [PATCH] restructured data loading modules --- .../data_handler_single_station.py | 31 ++++-- mlair/data_handler/default_data_handler.py | 2 +- mlair/helpers/data_sources/__init__.py | 10 ++ mlair/helpers/{ => data_sources}/era5.py | 29 +++-- mlair/helpers/{ => data_sources}/join.py | 103 +++++------------- mlair/helpers/data_sources/toar_data.py | 89 +++++++++++++++ .../{ => data_sources}/toar_data_v2.py | 9 +- mlair/run_modules/pre_processing.py | 2 +- .../{ => test_data_sources}/test_join.py | 58 ++-------- .../test_data_sources/test_toar_data.py | 40 +++++++ 10 files changed, 219 insertions(+), 154 deletions(-) create mode 100644 mlair/helpers/data_sources/__init__.py rename mlair/helpers/{ => data_sources}/era5.py (64%) rename mlair/helpers/{ => data_sources}/join.py (83%) create mode 100644 mlair/helpers/data_sources/toar_data.py rename mlair/helpers/{ => data_sources}/toar_data_v2.py (96%) rename test/test_helpers/{ => test_data_sources}/test_join.py (88%) create mode 100644 test/test_helpers/test_data_sources/test_toar_data.py diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py index 690a44ff..516fab7d 100644 --- a/mlair/data_handler/data_handler_single_station.py +++ b/mlair/data_handler/data_handler_single_station.py @@ -20,9 +20,9 @@ import xarray as xr from mlair.configuration import check_path_and_create from mlair import helpers -from mlair.helpers import join, statistics, TimeTrackingWrapper, filter_dict_by_value, select_from_dict, era5 +from mlair.helpers import statistics, TimeTrackingWrapper, filter_dict_by_value, select_from_dict from mlair.data_handler.abstract_data_handler import AbstractDataHandler -from mlair.helpers import toar_data_v2 +from mlair.helpers import data_sources # define a more general date type for type hinting date = Union[dt.date, dt.datetime] @@ -382,8 +382,8 @@ class DataHandlerSingleStation(AbstractDataHandler): :return: downloaded data and its meta data """ df_all = {} - df_era5, df_toar = None, None - meta_era5, meta_toar = None, None + df_era5, df_toar, df_join = None, None, None + meta_era5, meta_toar, meta_join = None, None, None if data_origin is not None: era5_origin = filter_dict_by_value(data_origin, "era5", True) era5_stats = select_from_dict(statistics_per_var, era5_origin.keys()) @@ -398,13 +398,24 @@ class DataHandlerSingleStation(AbstractDataHandler): # load data if era5_origin is not None and len(era5_stats) > 0: # load era5 data - df_era5, meta_era5 = era5.load_era5(station_name=station, stat_var=era5_stats, sampling=sampling, - data_origin=era5_origin) + df_era5, meta_era5 = data_sources.era5.load_era5(station_name=station, stat_var=era5_stats, + sampling=sampling, data_origin=era5_origin) if toar_origin is None or len(toar_stats) > 0: - # load join data - # df_toar, meta_toar = toar_data_v2.download_toar(station, toar_stats, sampling=sampling, data_origin=toar_origin) - df_join, meta_join = join.download_join(station_name=station, stat_var=toar_stats, sampling=sampling, - station_type=station_type, data_origin=toar_origin) + # load combined ata from toar-data (v2 & v1) + df_toar, meta_toar = data_sources.toar_data.download_toar(station=station, toar_stats=toar_stats, + sampling=sampling, data_origin=toar_origin, + station_type=station_type) + + # # load data from toar-data (v2) + # df_toar, meta_toar = toar_data.download_toar(station, toar_stats, sampling=sampling, data_origin=toar_origin) + # + # # load join data (toar-data v1) + # df_join, meta_join = join.download_join(station_name=station, stat_var=toar_stats, sampling=sampling, + # station_type=station_type, data_origin=toar_origin) + # + # # fill-up toar-data with join data + # a = 1 + df = pd.concat([df_era5, df_toar], axis=1, sort=True) meta = meta_era5 if meta_era5 is not None else meta_toar meta.loc["data_origin"] = str(data_origin) diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py index 300e0435..8ba78f19 100644 --- a/mlair/data_handler/default_data_handler.py +++ b/mlair/data_handler/default_data_handler.py @@ -22,7 +22,7 @@ import xarray as xr from mlair.data_handler.abstract_data_handler import AbstractDataHandler from mlair.helpers import remove_items, to_list, TimeTrackingWrapper -from mlair.helpers.join import EmptyQueryResult +from mlair.helpers.data_sources.toar_data import EmptyQueryResult number = Union[float, int] diff --git a/mlair/helpers/data_sources/__init__.py b/mlair/helpers/data_sources/__init__.py new file mode 100644 index 00000000..6b753bc3 --- /dev/null +++ b/mlair/helpers/data_sources/__init__.py @@ -0,0 +1,10 @@ +""" +Data Sources. + +The module data_sources collects different data sources, namely ERA5, TOAR-Data v1 (JOIN), and TOAR-Data v2 +""" + +__author__ = "Lukas Leufen" +__date__ = "2022-07-05" + +from . import era5, join, toar_data, toar_data_v2 diff --git a/mlair/helpers/era5.py b/mlair/helpers/data_sources/era5.py similarity index 64% rename from mlair/helpers/era5.py rename to mlair/helpers/data_sources/era5.py index e0fb0746..a4f60afc 100644 --- a/mlair/helpers/era5.py +++ b/mlair/helpers/data_sources/era5.py @@ -5,14 +5,14 @@ __date__ = "2022-06-09" import logging import os -import numpy as np import pandas as pd import xarray as xr from mlair import helpers from mlair.configuration.era5_settings import era5_settings -from mlair.configuration.join_settings import join_settings -from mlair.helpers.join import load_meta_data, EmptyQueryResult +from mlair.configuration.toar_data_v2_settings import toar_data_v2_settings +from mlair.helpers.data_sources.toar_data_v2 import load_station_information, combine_meta_data +from mlair.helpers.data_sources.toar_data import EmptyQueryResult from mlair.helpers.meteo import relative_humidity_from_dewpoint @@ -30,14 +30,15 @@ def load_era5(station_name, stat_var, sampling, data_origin): else: raise ValueError(f"Given sampling {sampling} is not supported, only hourly sampling can be used.") - # get data connection settings - # load series information (lat/lon) from join database - join_url_base, headers = join_settings() - meta = load_meta_data(station_name, None, None, join_url_base, headers) + # load station meta using toar-data v2 API + meta_url_base, headers = toar_data_v2_settings("meta") + station_meta = load_station_information(station_name, meta_url_base, headers) # sel data for station using sel method nearest + logging.info(f"load data for {station_meta['codes'][0]} from ERA5") with xr.open_mfdataset(os.path.join(data_path, file_names)) as data: - station_dask = data.sel(lon=meta["station_lon"], lat=meta["station_lat"], method="nearest", drop=True) + lon, lat = station_meta["coordinates"]["lng"], station_meta["coordinates"]["lat"] + station_dask = data.sel(lon=lon, lat=lat, method="nearest", drop=True) station_data = station_dask.to_array().T.compute() # transform data and meta to pandas @@ -55,10 +56,20 @@ def load_era5(station_name, stat_var, sampling, data_origin): else: station_data = station_data[stat_var] - meta = pd.DataFrame.from_dict(meta, orient="index", columns=station_name) + variable_meta = _emulate_meta_data(station_data) + meta = combine_meta_data(station_meta, variable_meta) + meta = pd.DataFrame.from_dict(meta, orient='index') + meta.columns = station_name return station_data, meta +def _emulate_meta_data(station_data): + general_meta = {"sampling_frequency": "hourly", "data_origin": "model", "data_origin_type": "model"} + roles_meta = {"roles": [{"contact": {"organisation": {"name": "ERA5", "longname": "ECMWF"}}}]} + variable_meta = {var: {"variable": {"name": var}, **roles_meta, ** general_meta} for var in station_data.columns} + return variable_meta + + def _rename_era5_variables(era5_names): mapper = {"SP": "press", "U10M": "u", "V10M": "v", "T2M": "temp", "D2M": "dew", "BLH": "pblheight", "TCC": "cloudcover", "RHw": "relhum"} diff --git a/mlair/helpers/join.py b/mlair/helpers/data_sources/join.py similarity index 83% rename from mlair/helpers/join.py rename to mlair/helpers/data_sources/join.py index 6d38887c..0ae1af1c 100644 --- a/mlair/helpers/join.py +++ b/mlair/helpers/data_sources/join.py @@ -7,23 +7,16 @@ import logging from typing import Iterator, Union, List, Dict, Tuple import pandas as pd -import requests -from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry from mlair import helpers from mlair.configuration.join_settings import join_settings +from mlair.helpers.data_sources import toar_data + # join_url_base = 'https://join.fz-juelich.de/services/rest/surfacedata/' str_or_none = Union[str, None] -class EmptyQueryResult(Exception): - """Exception that get raised if a query to JOIN returns empty results.""" - - pass - - def download_join(station_name: Union[str, List[str]], stat_var: dict, station_type: str = None, sampling: str = "daily", data_origin: Dict = None) -> [pd.DataFrame, pd.DataFrame]: """ @@ -49,14 +42,15 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t # load series information vars_dict, data_origin = load_series_information(station_name, station_type, network_name, join_url_base, headers, - data_origin) + data_origin, stat_var) # check if all requested variables are available if set(stat_var).issubset(vars_dict) is False: missing_variables = set(stat_var).difference(vars_dict) origin = helpers.select_from_dict(data_origin, missing_variables) options = f"station={station_name}, type={station_type}, network={network_name}, origin={origin}" - raise EmptyQueryResult(f"No data found for variables {missing_variables} and options {options} in JOIN.") + raise toar_data.EmptyQueryResult(f"No data found for variables {missing_variables} and options {options} in " + f"JOIN.") # correct stat_var values if data is not aggregated (hourly) if sampling == "hourly": @@ -76,7 +70,7 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t 'sampling': sampling, 'capture': 0, 'format': 'json'} # load data - data = get_data(opts, headers) + data = toar_data.get_data(opts, headers) # adjust data format if given as list of list # no branch cover because this just happens when downloading hourly data using a secret token, not available @@ -97,7 +91,7 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t meta.columns = station_name return df, meta else: - raise EmptyQueryResult("No data found in JOIN.") + raise toar_data.EmptyQueryResult("No data found in JOIN.") def split_network_and_origin(origin_network_dict: dict) -> Tuple[Union[None, dict], Union[None, dict]]: @@ -163,38 +157,6 @@ def correct_data_format(data): return formatted -def get_data(opts: Dict, headers: Dict, as_json: bool = True) -> Union[Dict, List, str]: - """ - Download join data using requests framework. - - Data is returned as json like structure. Depending on the response structure, this can lead to a list or dictionary. - - :param opts: options to create the request url - :param headers: additional headers information like authorization, can be empty - :param as_json: extract response as json if true (default True) - - :return: requested data (either as list or dictionary) - """ - url = create_url(**opts) - response = retries_session().get(url, headers=headers, timeout=(5, None)) # timeout=(open, read) - if response.status_code == 200: - return response.json() if as_json is True else response.text - else: - raise EmptyQueryResult(f"There was an error (STATUS {response.status_code}) for request {url}") - - -def retries_session(max_retries=3): - retry_strategy = Retry(total=max_retries, - backoff_factor=0.1, - status_forcelist=[429, 500, 502, 503, 504], - method_whitelist=["HEAD", "GET", "OPTIONS"]) - adapter = HTTPAdapter(max_retries=retry_strategy) - http = requests.Session() - http.mount("https://", adapter) - http.mount("http://", adapter) - return http - - def load_meta_data(station_name: List[str], station_type: str_or_none, network_name: str_or_none, join_url_base: str, headers: Dict) -> [Dict, Dict]: opts = {"base": join_url_base, "service": "search", "station_id": station_name[0], "station_type": station_type, @@ -210,11 +172,11 @@ def load_meta_data(station_name: List[str], station_type: str_or_none, network_n "google_resolution,station_comments,station_max_population_density_5km"} if network_name is None: opts["columns"] = opts["columns"].replace(",network_name", "") - return get_data(opts, headers)[-1] + return toar_data.get_data(opts, headers)[-1] def load_series_information(station_name: List[str], station_type: str_or_none, network_name: str_or_none, - join_url_base: str, headers: Dict, data_origin: Dict = None) -> [Dict, Dict]: + join_url_base: str, headers: Dict, data_origin: Dict = None, stat_var: Dict = None) -> [Dict, Dict]: """ List all series ids that are available for given station id and network name. @@ -229,14 +191,23 @@ def load_series_information(station_name: List[str], station_type: str_or_none, and the series id as value. """ network_name_opts = _create_network_name_opts(network_name) + parameter_name_opts = _create_parameter_name_opts(stat_var) opts = {"base": join_url_base, "service": "search", "station_id": station_name[0], "station_type": station_type, - "network_name": network_name_opts, "as_dict": "true", + "network_name": network_name_opts, "as_dict": "true", "parameter_name": parameter_name_opts, "columns": "id,network_name,station_id,parameter_name,parameter_label,parameter_attribute"} - station_vars = get_data(opts, headers) + station_vars = toar_data.get_data(opts, headers) logging.debug(f"{station_name}: {station_vars}") return _select_distinct_series(station_vars, data_origin, network_name) +def _create_parameter_name_opts(stat_var): + if stat_var is None: + parameter_name_opts = None + else: + parameter_name_opts = ",".join(stat_var.keys()) + return parameter_name_opts + + def _create_network_name_opts(network_name): if network_name is None: network_name_opts = network_name @@ -253,8 +224,8 @@ def _create_network_name_opts(network_name): return network_name_opts -def _select_distinct_series(vars: List[Dict], data_origin: Dict = None, network_name: Union[str, List[str]] = None) -> \ - [Dict, Dict]: +def _select_distinct_series(vars: List[Dict], data_origin: Dict = None, network_name: Union[str, List[str]] = None) \ + -> [Dict, Dict]: """ Select distinct series ids for all variables. Also check if a parameter is from REA or not. """ @@ -295,10 +266,10 @@ def _select_distinct_network(vars: dict, network_name: Union[list, dict]) -> dic else: if len(network_list) == 0: # just print message which network is used if none is provided selected[var] = series[0] - logging.info(f"Could not find a valid match for variable {var} and networks {network_name}! " - f"Therefore, use first answer from JOIN: {series[0]}") + logging.info(f"Could not find a valid match for variable {var} and networks {network_name.get(var, [])}" + f"! Therefore, use first answer from JOIN: {series[0]}") else: # raise error if network name is provided but no match could be found - raise ValueError(f"Cannot find a valid match for requested networks {network_name} and " + raise ValueError(f"Cannot find a valid match for requested networks {network_name.get(var, [])} and " f"variable {var} as only following networks are available in JOIN: " f"{list(map(lambda x: x['network_name'], series))}") return selected @@ -380,30 +351,6 @@ def _lower_list(args: List[str]) -> Iterator[str]: yield string.lower() -def create_url(base: str, service: str, param_id: Union[str, int, None] = None, - **kwargs: Union[str, int, float, None]) -> str: - """ - Create a request url with given base url, service type and arbitrarily many additional keyword arguments. - - :param base: basic url of the rest service - :param service: service type, e.g. series, stats - :param param_id: id for a distinct service, is added between ending / of service and ? of kwargs - :param kwargs: keyword pairs for optional request specifications, e.g. 'statistics=maximum' - - :return: combined url as string - """ - if not base.endswith("/"): - base += "/" - url = f"{base}{service}" - if not url.endswith("/"): - url += "/" - if param_id is not None: - url = f"{url}{param_id}" - if len(kwargs) > 0: - url = f"{url}?{'&'.join(f'{k}={v}' for k, v in kwargs.items() if v is not None)}" - return url - - if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) var_all_dic = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', diff --git a/mlair/helpers/data_sources/toar_data.py b/mlair/helpers/data_sources/toar_data.py new file mode 100644 index 00000000..70d62238 --- /dev/null +++ b/mlair/helpers/data_sources/toar_data.py @@ -0,0 +1,89 @@ +__author__ = "Lukas Leufen" +__date__ = "2022-07-05" + + +from typing import Union, List, Dict + +from . import join, toar_data_v2 + +import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + + +class EmptyQueryResult(Exception): + """Exception that get raised if a query to JOIN returns empty results.""" + + pass + + +def create_url(base: str, service: str, param_id: Union[str, int, None] = None, + **kwargs: Union[str, int, float, None]) -> str: + """ + Create a request url with given base url, service type and arbitrarily many additional keyword arguments. + + :param base: basic url of the rest service + :param service: service type, e.g. series, stats + :param param_id: id for a distinct service, is added between ending / of service and ? of kwargs + :param kwargs: keyword pairs for optional request specifications, e.g. 'statistics=maximum' + + :return: combined url as string + """ + if not base.endswith("/"): + base += "/" + url = f"{base}{service}" + if not url.endswith("/"): + url += "/" + if param_id is not None: + url = f"{url}{param_id}" + if len(kwargs) > 0: + url = f"{url}?{'&'.join(f'{k}={v}' for k, v in kwargs.items() if v is not None)}" + return url + + +def get_data(opts: Dict, headers: Dict, as_json: bool = True) -> Union[Dict, List, str]: + """ + Download join data using requests framework. + + Data is returned as json like structure. Depending on the response structure, this can lead to a list or dictionary. + + :param opts: options to create the request url + :param headers: additional headers information like authorization, can be empty + :param as_json: extract response as json if true (default True) + + :return: requested data (either as list or dictionary) + """ + url = create_url(**opts) + response = retries_session().get(url, headers=headers, timeout=(5, None)) # timeout=(open, read) + if response.status_code == 200: + return response.json() if as_json is True else response.text + else: + raise EmptyQueryResult(f"There was an error (STATUS {response.status_code}) for request {url}") + + +def retries_session(max_retries=3): + retry_strategy = Retry(total=max_retries, + backoff_factor=0.1, + status_forcelist=[429, 500, 502, 503, 504], + method_whitelist=["HEAD", "GET", "OPTIONS"]) + adapter = HTTPAdapter(max_retries=retry_strategy) + http = requests.Session() + http.mount("https://", adapter) + http.mount("http://", adapter) + return http + + +def download_toar(station, toar_stats, sampling, data_origin, station_type=None): + + # load data from toar-data (v2) + df_toar, meta_toar = toar_data_v2.download_toar(station, toar_stats, sampling=sampling, data_origin=data_origin) + + # load join data (toar-data v1) + df_join, meta_join = join.download_join(station_name=station, stat_var=toar_stats, sampling=sampling, + station_type=station_type, data_origin=data_origin) + + return df_toar + + +def merge_toar_join(df_toar, df_join): + start_date = min([df_toar.index.min(), df_join.index.min()]) \ No newline at end of file diff --git a/mlair/helpers/toar_data_v2.py b/mlair/helpers/data_sources/toar_data_v2.py similarity index 96% rename from mlair/helpers/toar_data_v2.py rename to mlair/helpers/data_sources/toar_data_v2.py index 5cc67b6d..bf85dd9e 100644 --- a/mlair/helpers/toar_data_v2.py +++ b/mlair/helpers/data_sources/toar_data_v2.py @@ -4,14 +4,14 @@ __date__ = '2022-06-30' import logging -from typing import Iterator, Union, List, Dict +from typing import Union, List, Dict from io import StringIO import pandas as pd from mlair.configuration.toar_data_v2_settings import toar_data_v2_settings from mlair.helpers import to_list -from mlair.helpers.join import EmptyQueryResult, get_data +from mlair.helpers.data_sources.toar_data import EmptyQueryResult, get_data str_or_none = Union[str, None] @@ -90,12 +90,10 @@ def prepare_meta(meta, sampling, stat_var, var): def combine_meta_data(station_meta, timeseries_meta): meta = {} for k, v in station_meta.items(): - print(k) if k == "codes": meta[k] = v[0] elif k in ["coordinates", "additional_metadata", "globalmeta"]: for _key, _val in v.items(): - print(_key) if _key == "lng": meta["lon"] = _val else: @@ -105,9 +103,7 @@ def combine_meta_data(station_meta, timeseries_meta): else: meta[k] = v for var, var_meta in timeseries_meta.items(): - print(var) for k, v in var_meta.items(): - print(k) if k in ["additional_metadata", "station", "programme", "annotations", "changelog"]: continue elif k == "roles": @@ -192,7 +188,6 @@ def select_timeseries_by_origin(toar_meta, var_origin): def load_variables_information(var_dict, url_base, headers): var_meta_dict = {} for var in var_dict.keys(): - # opts = {"base": url_base, "service": f"variables/{var}"} opts = {"base": url_base, "service": f"variables", "param_id": var} var_meta_dict[var] = get_data(opts, headers) return var_meta_dict diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index 0e416acb..de700024 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -18,7 +18,7 @@ import pandas as pd from mlair.data_handler import DataCollection, AbstractDataHandler from mlair.helpers import TimeTracking, to_list, tables from mlair.configuration import path_config -from mlair.helpers.join import EmptyQueryResult +from mlair.helpers.data_sources.toar_data import EmptyQueryResult from mlair.run_modules.run_environment import RunEnvironment diff --git a/test/test_helpers/test_join.py b/test/test_helpers/test_data_sources/test_join.py similarity index 88% rename from test/test_helpers/test_join.py rename to test/test_helpers/test_data_sources/test_join.py index 9a79d45e..0a9715f5 100644 --- a/test/test_helpers/test_join.py +++ b/test/test_helpers/test_data_sources/test_join.py @@ -2,11 +2,12 @@ from typing import Iterable import pytest -from mlair.helpers.join import * -from mlair.helpers.join import _save_to_pandas, _correct_stat_name, _lower_list, _select_distinct_series, \ +from mlair.helpers.data_sources.join import * +from mlair.helpers.data_sources.join import _save_to_pandas, _correct_stat_name, _lower_list, _select_distinct_series, \ _select_distinct_data_origin, _select_distinct_network from mlair.configuration.join_settings import join_settings from mlair.helpers.testing import check_nested_equality +from mlair.helpers.data_sources.toar_data import EmptyQueryResult class TestDownloadJoin: @@ -46,14 +47,6 @@ class TestCorrectDataFormat: "metadata": {"station": "test_station_001", "author": "ME", "success": True}} -class TestGetData: - - def test(self): - opts = {"base": join_settings()[0], "service": "series", "station_id": 'DEBW107', "network_name": "UBA", - "parameter_name": "o3,no2"} - assert get_data(opts, headers={}) == [[17057, 'UBA', 'DEBW107', 'O3'], [17058, 'UBA', 'DEBW107', 'NO2']] - - class TestLoadSeriesInformation: def test_standard_query(self): @@ -160,8 +153,7 @@ class TestSelectDistinctNetwork: 'parameter_label': 'PRESS-REA-MIUB', 'parameter_attribute': 'REA'}} assert check_nested_equality(res, expected) is True - message = "Could not find a valid match for variable %s and networks {'no2': [], 'o3': [], 'cloudcover': [], " \ - "'temp': [], 'press': []}! Therefore, use first answer from JOIN:" + message = "Could not find a valid match for variable %s and networks []! Therefore, use first answer from JOIN:" assert message % "no2" in caplog.messages[0] assert message % "o3" in caplog.messages[1] assert message % "cloudcover" in caplog.messages[2] @@ -186,16 +178,13 @@ class TestSelectDistinctNetwork: def test_single_network_given_no_match(self, vars): with pytest.raises(ValueError) as e: # AIRBASE not avail for all variables _select_distinct_network(vars, ["AIRBASE"]) - assert e.value.args[-1] == "Cannot find a valid match for requested networks {'no2': ['AIRBASE'], 'o3': " \ - "['AIRBASE'], 'cloudcover': ['AIRBASE'], 'temp': ['AIRBASE'], 'press': ['AIRBASE']" \ - "} and variable no2 as only following networks are available in JOIN: ['UBA']" + assert e.value.args[-1] == "Cannot find a valid match for requested networks ['AIRBASE'] and variable no2 as " \ + "only following networks are available in JOIN: ['UBA']" with pytest.raises(ValueError) as e: # both requested networks are not available for all variables _select_distinct_network(vars, ["LUBW", "EMEP"]) - assert e.value.args[-1] == "Cannot find a valid match for requested networks {'no2': ['LUBW', 'EMEP'], 'o3': " \ - "['LUBW', 'EMEP'], 'cloudcover': ['LUBW', 'EMEP'], 'temp': ['LUBW', 'EMEP'], " \ - "'press': ['LUBW', 'EMEP']} and variable no2 as only following networks are " \ - "available in JOIN: ['UBA']" + assert e.value.args[-1] == "Cannot find a valid match for requested networks ['LUBW', 'EMEP'] and variable " \ + "no2 as only following networks are available in JOIN: ['UBA']" def test_multiple_networks_given(self, vars): res = _select_distinct_network(vars, ["UBA", "AIRBASE"]) @@ -294,9 +283,8 @@ class TestSelectDistinctSeries: def test_network_not_available(self, vars): with pytest.raises(ValueError) as e: _select_distinct_series(vars, network_name="AIRBASE") - assert e.value.args[-1] == "Cannot find a valid match for requested networks {'no2': ['AIRBASE'], 'o3': " \ - "['AIRBASE'], 'cloudcover': ['AIRBASE'], 'temp': ['AIRBASE'], 'press': ['AIRBASE']" \ - "} and variable no2 as only following networks are available in JOIN: ['UBA']" + assert e.value.args[-1] == "Cannot find a valid match for requested networks ['AIRBASE'] and variable no2 as " \ + "only following networks are available in JOIN: ['UBA']" def test_different_network_and_origin(self, vars): origin = {"no2": "test", "temp": "", "cloudcover": "REA"} @@ -366,29 +354,3 @@ class TestLowerList: assert list(list_iterator) == ["capitalised", "already_small", "uppercase", "verystrange"] -class TestCreateUrl: - - def test_minimal_args_given(self): - url = create_url("www.base.edu", "testingservice") - assert url == "www.base.edu/testingservice/" - - def test_given_kwargs(self): - url = create_url("www.base2.edu/", "testingservice", mood="happy", confidence=0.98) - assert url == "www.base2.edu/testingservice/?mood=happy&confidence=0.98" - - def test_single_kwargs(self): - url = create_url("www.base2.edu/", "testingservice", mood="undefined") - assert url == "www.base2.edu/testingservice/?mood=undefined" - - def test_none_kwargs(self): - url = create_url("www.base2.edu/", "testingservice", mood="sad", happiness=None, stress_factor=100) - assert url == "www.base2.edu/testingservice/?mood=sad&stress_factor=100" - - def test_param_id(self): - url = create_url("www.base.edu", "testingservice", param_id="2001") - assert url == "www.base.edu/testingservice/2001" - - def test_param_id_kwargs(self): - url = create_url("www.base.edu", "testingservice", param_id=2001, mood="sad", happiness=None, stress_factor=100) - assert url == "www.base.edu/testingservice/?2001&mood=sad&stress_factor=100" - diff --git a/test/test_helpers/test_data_sources/test_toar_data.py b/test/test_helpers/test_data_sources/test_toar_data.py new file mode 100644 index 00000000..277a637b --- /dev/null +++ b/test/test_helpers/test_data_sources/test_toar_data.py @@ -0,0 +1,40 @@ +from mlair.configuration.join_settings import join_settings +from mlair.helpers.data_sources.toar_data import get_data, create_url + + +class TestGetData: + + def test(self): + opts = {"base": join_settings()[0], "service": "series", "station_id": 'DEBW107', "network_name": "UBA", + "parameter_name": "o3,no2"} + assert get_data(opts, headers={}) == [[17057, 'UBA', 'DEBW107', 'O3'], [17058, 'UBA', 'DEBW107', 'NO2']] + + +class TestCreateUrl: + + def test_minimal_args_given(self): + url = create_url("www.base.edu", "testingservice") + assert url == "www.base.edu/testingservice/" + + def test_given_kwargs(self): + url = create_url("www.base2.edu/", "testingservice", mood="happy", confidence=0.98) + assert url == "www.base2.edu/testingservice/?mood=happy&confidence=0.98" + + def test_single_kwargs(self): + url = create_url("www.base2.edu/", "testingservice", mood="undefined") + assert url == "www.base2.edu/testingservice/?mood=undefined" + + def test_none_kwargs(self): + url = create_url("www.base2.edu/", "testingservice", mood="sad", happiness=None, stress_factor=100) + assert url == "www.base2.edu/testingservice/?mood=sad&stress_factor=100" + + def test_param_id(self): + url = create_url("www.base.edu", "testingservice", param_id="2001") + assert url == "www.base.edu/testingservice/2001" + + def test_param_id_kwargs(self): + url = create_url("www.base.edu", "testingservice", param_id=2001, mood="sad", happiness=None, stress_factor=100) + assert url == "www.base.edu/testingservice/2001?mood=sad&stress_factor=100" + + url = create_url("www.base.edu", "testingservice", param_id=2001, mood="sad", series_id=222) + assert url == "www.base.edu/testingservice/2001?mood=sad&series_id=222" -- GitLab