From ee34c882abfa37fd9f63345aa6f6d1808515141e Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 6 Jul 2022 10:45:30 +0200
Subject: [PATCH] restructured data loading modules

---
 .../data_handler_single_station.py            |  31 ++++--
 mlair/data_handler/default_data_handler.py    |   2 +-
 mlair/helpers/data_sources/__init__.py        |  10 ++
 mlair/helpers/{ => data_sources}/era5.py      |  29 +++--
 mlair/helpers/{ => data_sources}/join.py      | 103 +++++-------------
 mlair/helpers/data_sources/toar_data.py       |  89 +++++++++++++++
 .../{ => data_sources}/toar_data_v2.py        |   9 +-
 mlair/run_modules/pre_processing.py           |   2 +-
 .../{ => test_data_sources}/test_join.py      |  58 ++--------
 .../test_data_sources/test_toar_data.py       |  40 +++++++
 10 files changed, 219 insertions(+), 154 deletions(-)
 create mode 100644 mlair/helpers/data_sources/__init__.py
 rename mlair/helpers/{ => data_sources}/era5.py (64%)
 rename mlair/helpers/{ => data_sources}/join.py (83%)
 create mode 100644 mlair/helpers/data_sources/toar_data.py
 rename mlair/helpers/{ => data_sources}/toar_data_v2.py (96%)
 rename test/test_helpers/{ => test_data_sources}/test_join.py (88%)
 create mode 100644 test/test_helpers/test_data_sources/test_toar_data.py

diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 690a44ff..516fab7d 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -20,9 +20,9 @@ import xarray as xr
 
 from mlair.configuration import check_path_and_create
 from mlair import helpers
-from mlair.helpers import join, statistics, TimeTrackingWrapper, filter_dict_by_value, select_from_dict, era5
+from mlair.helpers import statistics, TimeTrackingWrapper, filter_dict_by_value, select_from_dict
 from mlair.data_handler.abstract_data_handler import AbstractDataHandler
-from mlair.helpers import toar_data_v2
+from mlair.helpers import data_sources
 
 # define a more general date type for type hinting
 date = Union[dt.date, dt.datetime]
@@ -382,8 +382,8 @@ class DataHandlerSingleStation(AbstractDataHandler):
         :return: downloaded data and its meta data
         """
         df_all = {}
-        df_era5, df_toar = None, None
-        meta_era5, meta_toar = None, None
+        df_era5, df_toar, df_join = None, None, None
+        meta_era5, meta_toar, meta_join = None, None, None
         if data_origin is not None:
             era5_origin = filter_dict_by_value(data_origin, "era5", True)
             era5_stats = select_from_dict(statistics_per_var, era5_origin.keys())
@@ -398,13 +398,24 @@ class DataHandlerSingleStation(AbstractDataHandler):
         # load data
         if era5_origin is not None and len(era5_stats) > 0:
             # load era5 data
-            df_era5, meta_era5 = era5.load_era5(station_name=station, stat_var=era5_stats, sampling=sampling,
-                                      data_origin=era5_origin)
+            df_era5, meta_era5 = data_sources.era5.load_era5(station_name=station, stat_var=era5_stats,
+                                                             sampling=sampling, data_origin=era5_origin)
         if toar_origin is None or len(toar_stats) > 0:
-            # load join data
-            # df_toar, meta_toar = toar_data_v2.download_toar(station, toar_stats, sampling=sampling, data_origin=toar_origin)
-            df_join, meta_join = join.download_join(station_name=station, stat_var=toar_stats, sampling=sampling,
-                                                    station_type=station_type, data_origin=toar_origin)
+            # load combined ata from toar-data (v2 & v1)
+            df_toar, meta_toar = data_sources.toar_data.download_toar(station=station, toar_stats=toar_stats,
+                                                                      sampling=sampling, data_origin=toar_origin,
+                                                                      station_type=station_type)
+
+            # # load data from toar-data (v2)
+            # df_toar, meta_toar = toar_data.download_toar(station, toar_stats, sampling=sampling, data_origin=toar_origin)
+            #
+            # # load join data (toar-data v1)
+            # df_join, meta_join = join.download_join(station_name=station, stat_var=toar_stats, sampling=sampling,
+            #                                         station_type=station_type, data_origin=toar_origin)
+            #
+            # # fill-up toar-data with join data
+            # a = 1
+
         df = pd.concat([df_era5, df_toar], axis=1, sort=True)
         meta = meta_era5 if meta_era5 is not None else meta_toar
         meta.loc["data_origin"] = str(data_origin)
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 300e0435..8ba78f19 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -22,7 +22,7 @@ import xarray as xr
 
 from mlair.data_handler.abstract_data_handler import AbstractDataHandler
 from mlair.helpers import remove_items, to_list, TimeTrackingWrapper
-from mlair.helpers.join import EmptyQueryResult
+from mlair.helpers.data_sources.toar_data import EmptyQueryResult
 
 
 number = Union[float, int]
diff --git a/mlair/helpers/data_sources/__init__.py b/mlair/helpers/data_sources/__init__.py
new file mode 100644
index 00000000..6b753bc3
--- /dev/null
+++ b/mlair/helpers/data_sources/__init__.py
@@ -0,0 +1,10 @@
+"""
+Data Sources.
+
+The module data_sources collects different data sources, namely ERA5, TOAR-Data v1 (JOIN), and TOAR-Data v2
+"""
+
+__author__ = "Lukas Leufen"
+__date__ = "2022-07-05"
+
+from . import era5, join, toar_data, toar_data_v2
diff --git a/mlair/helpers/era5.py b/mlair/helpers/data_sources/era5.py
similarity index 64%
rename from mlair/helpers/era5.py
rename to mlair/helpers/data_sources/era5.py
index e0fb0746..a4f60afc 100644
--- a/mlair/helpers/era5.py
+++ b/mlair/helpers/data_sources/era5.py
@@ -5,14 +5,14 @@ __date__ = "2022-06-09"
 import logging
 import os
 
-import numpy as np
 import pandas as pd
 import xarray as xr
 
 from mlair import helpers
 from mlair.configuration.era5_settings import era5_settings
-from mlair.configuration.join_settings import join_settings
-from mlair.helpers.join import load_meta_data, EmptyQueryResult
+from mlair.configuration.toar_data_v2_settings import toar_data_v2_settings
+from mlair.helpers.data_sources.toar_data_v2 import load_station_information, combine_meta_data
+from mlair.helpers.data_sources.toar_data import EmptyQueryResult
 from mlair.helpers.meteo import relative_humidity_from_dewpoint
 
 
@@ -30,14 +30,15 @@ def load_era5(station_name, stat_var, sampling, data_origin):
     else:
         raise ValueError(f"Given sampling {sampling} is not supported, only hourly sampling can be used.")
 
-    # get data connection settings
-    # load series information (lat/lon) from join database
-    join_url_base, headers = join_settings()
-    meta = load_meta_data(station_name, None, None, join_url_base, headers)
+    # load station meta using toar-data v2 API
+    meta_url_base, headers = toar_data_v2_settings("meta")
+    station_meta = load_station_information(station_name, meta_url_base, headers)
 
     # sel data for station using sel method nearest
+    logging.info(f"load data for {station_meta['codes'][0]} from ERA5")
     with xr.open_mfdataset(os.path.join(data_path, file_names)) as data:
-        station_dask = data.sel(lon=meta["station_lon"], lat=meta["station_lat"], method="nearest", drop=True)
+        lon, lat = station_meta["coordinates"]["lng"],  station_meta["coordinates"]["lat"]
+        station_dask = data.sel(lon=lon, lat=lat, method="nearest", drop=True)
         station_data = station_dask.to_array().T.compute()
 
     # transform data and meta to pandas
@@ -55,10 +56,20 @@ def load_era5(station_name, stat_var, sampling, data_origin):
     else:
         station_data = station_data[stat_var]
 
-    meta = pd.DataFrame.from_dict(meta, orient="index", columns=station_name)
+    variable_meta = _emulate_meta_data(station_data)
+    meta = combine_meta_data(station_meta, variable_meta)
+    meta = pd.DataFrame.from_dict(meta, orient='index')
+    meta.columns = station_name
     return station_data, meta
 
 
+def _emulate_meta_data(station_data):
+    general_meta = {"sampling_frequency": "hourly", "data_origin": "model", "data_origin_type": "model"}
+    roles_meta = {"roles": [{"contact": {"organisation": {"name": "ERA5", "longname": "ECMWF"}}}]}
+    variable_meta = {var: {"variable": {"name": var}, **roles_meta, ** general_meta} for var in station_data.columns}
+    return variable_meta
+
+
 def _rename_era5_variables(era5_names):
     mapper = {"SP": "press", "U10M": "u", "V10M": "v", "T2M": "temp", "D2M": "dew", "BLH": "pblheight",
               "TCC": "cloudcover", "RHw": "relhum"}
diff --git a/mlair/helpers/join.py b/mlair/helpers/data_sources/join.py
similarity index 83%
rename from mlair/helpers/join.py
rename to mlair/helpers/data_sources/join.py
index 6d38887c..0ae1af1c 100644
--- a/mlair/helpers/join.py
+++ b/mlair/helpers/data_sources/join.py
@@ -7,23 +7,16 @@ import logging
 from typing import Iterator, Union, List, Dict, Tuple
 
 import pandas as pd
-import requests
-from requests.adapters import HTTPAdapter
-from requests.packages.urllib3.util.retry import Retry
 
 from mlair import helpers
 from mlair.configuration.join_settings import join_settings
+from mlair.helpers.data_sources import toar_data
+
 
 # join_url_base = 'https://join.fz-juelich.de/services/rest/surfacedata/'
 str_or_none = Union[str, None]
 
 
-class EmptyQueryResult(Exception):
-    """Exception that get raised if a query to JOIN returns empty results."""
-
-    pass
-
-
 def download_join(station_name: Union[str, List[str]], stat_var: dict, station_type: str = None,
                   sampling: str = "daily", data_origin: Dict = None) -> [pd.DataFrame, pd.DataFrame]:
     """
@@ -49,14 +42,15 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t
 
     # load series information
     vars_dict, data_origin = load_series_information(station_name, station_type, network_name, join_url_base, headers,
-                                                     data_origin)
+                                                     data_origin, stat_var)
 
     # check if all requested variables are available
     if set(stat_var).issubset(vars_dict) is False:
         missing_variables = set(stat_var).difference(vars_dict)
         origin = helpers.select_from_dict(data_origin, missing_variables)
         options = f"station={station_name}, type={station_type}, network={network_name}, origin={origin}"
-        raise EmptyQueryResult(f"No data found for variables {missing_variables} and options {options} in JOIN.")
+        raise toar_data.EmptyQueryResult(f"No data found for variables {missing_variables} and options {options} in "
+                                         f"JOIN.")
 
     # correct stat_var values if data is not aggregated (hourly)
     if sampling == "hourly":
@@ -76,7 +70,7 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t
                     'sampling': sampling, 'capture': 0, 'format': 'json'}
 
             # load data
-            data = get_data(opts, headers)
+            data = toar_data.get_data(opts, headers)
 
             # adjust data format if given as list of list
             # no branch cover because this just happens when downloading hourly data using a secret token, not available
@@ -97,7 +91,7 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t
         meta.columns = station_name
         return df, meta
     else:
-        raise EmptyQueryResult("No data found in JOIN.")
+        raise toar_data.EmptyQueryResult("No data found in JOIN.")
 
 
 def split_network_and_origin(origin_network_dict: dict) -> Tuple[Union[None, dict], Union[None, dict]]:
@@ -163,38 +157,6 @@ def correct_data_format(data):
     return formatted
 
 
-def get_data(opts: Dict, headers: Dict, as_json: bool = True) -> Union[Dict, List, str]:
-    """
-    Download join data using requests framework.
-
-    Data is returned as json like structure. Depending on the response structure, this can lead to a list or dictionary.
-
-    :param opts: options to create the request url
-    :param headers: additional headers information like authorization, can be empty
-    :param as_json: extract response as json if true (default True)
-
-    :return: requested data (either as list or dictionary)
-    """
-    url = create_url(**opts)
-    response = retries_session().get(url, headers=headers, timeout=(5, None))  # timeout=(open, read)
-    if response.status_code == 200:
-        return response.json() if as_json is True else response.text
-    else:
-        raise EmptyQueryResult(f"There was an error (STATUS {response.status_code}) for request {url}")
-
-
-def retries_session(max_retries=3):
-    retry_strategy = Retry(total=max_retries,
-                           backoff_factor=0.1,
-                           status_forcelist=[429, 500, 502, 503, 504],
-                           method_whitelist=["HEAD", "GET", "OPTIONS"])
-    adapter = HTTPAdapter(max_retries=retry_strategy)
-    http = requests.Session()
-    http.mount("https://", adapter)
-    http.mount("http://", adapter)
-    return http
-
-
 def load_meta_data(station_name: List[str], station_type: str_or_none, network_name: str_or_none,
                             join_url_base: str, headers: Dict) -> [Dict, Dict]:
     opts = {"base": join_url_base, "service": "search", "station_id": station_name[0], "station_type": station_type,
@@ -210,11 +172,11 @@ def load_meta_data(station_name: List[str], station_type: str_or_none, network_n
                        "google_resolution,station_comments,station_max_population_density_5km"}
     if network_name is None:
         opts["columns"] = opts["columns"].replace(",network_name", "")
-    return get_data(opts, headers)[-1]
+    return toar_data.get_data(opts, headers)[-1]
 
 
 def load_series_information(station_name: List[str], station_type: str_or_none, network_name: str_or_none,
-                            join_url_base: str, headers: Dict, data_origin: Dict = None) -> [Dict, Dict]:
+                            join_url_base: str, headers: Dict, data_origin: Dict = None, stat_var: Dict = None) -> [Dict, Dict]:
     """
     List all series ids that are available for given station id and network name.
 
@@ -229,14 +191,23 @@ def load_series_information(station_name: List[str], station_type: str_or_none,
         and the series id as value.
     """
     network_name_opts = _create_network_name_opts(network_name)
+    parameter_name_opts = _create_parameter_name_opts(stat_var)
     opts = {"base": join_url_base, "service": "search", "station_id": station_name[0], "station_type": station_type,
-            "network_name": network_name_opts, "as_dict": "true",
+            "network_name": network_name_opts, "as_dict": "true", "parameter_name": parameter_name_opts,
             "columns": "id,network_name,station_id,parameter_name,parameter_label,parameter_attribute"}
-    station_vars = get_data(opts, headers)
+    station_vars = toar_data.get_data(opts, headers)
     logging.debug(f"{station_name}: {station_vars}")
     return _select_distinct_series(station_vars, data_origin, network_name)
 
 
+def _create_parameter_name_opts(stat_var):
+    if stat_var is None:
+        parameter_name_opts = None
+    else:
+        parameter_name_opts = ",".join(stat_var.keys())
+    return parameter_name_opts
+
+
 def _create_network_name_opts(network_name):
     if network_name is None:
         network_name_opts = network_name
@@ -253,8 +224,8 @@ def _create_network_name_opts(network_name):
     return network_name_opts
 
 
-def _select_distinct_series(vars: List[Dict], data_origin: Dict = None, network_name: Union[str, List[str]] = None) -> \
-        [Dict, Dict]:
+def _select_distinct_series(vars: List[Dict], data_origin: Dict = None, network_name: Union[str, List[str]] = None) \
+        -> [Dict, Dict]:
     """
     Select distinct series ids for all variables. Also check if a parameter is from REA or not.
     """
@@ -295,10 +266,10 @@ def _select_distinct_network(vars: dict, network_name: Union[list, dict]) -> dic
         else:
             if len(network_list) == 0:  # just print message which network is used if none is provided
                 selected[var] = series[0]
-                logging.info(f"Could not find a valid match for variable {var} and networks {network_name}! "
-                             f"Therefore, use first answer from JOIN: {series[0]}")
+                logging.info(f"Could not find a valid match for variable {var} and networks {network_name.get(var, [])}"
+                             f"! Therefore, use first answer from JOIN: {series[0]}")
             else:  # raise error if network name is provided but no match could be found
-                raise ValueError(f"Cannot find a valid match for requested networks {network_name} and "
+                raise ValueError(f"Cannot find a valid match for requested networks {network_name.get(var, [])} and "
                                  f"variable {var} as only following networks are available in JOIN: "
                                  f"{list(map(lambda x: x['network_name'], series))}")
     return selected
@@ -380,30 +351,6 @@ def _lower_list(args: List[str]) -> Iterator[str]:
         yield string.lower()
 
 
-def create_url(base: str, service: str, param_id: Union[str, int, None] = None,
-               **kwargs: Union[str, int, float, None]) -> str:
-    """
-    Create a request url with given base url, service type and arbitrarily many additional keyword arguments.
-
-    :param base: basic url of the rest service
-    :param service: service type, e.g. series, stats
-    :param param_id: id for a distinct service, is added between ending / of service and ? of kwargs
-    :param kwargs: keyword pairs for optional request specifications, e.g. 'statistics=maximum'
-
-    :return: combined url as string
-    """
-    if not base.endswith("/"):
-        base += "/"
-    url = f"{base}{service}"
-    if not url.endswith("/"):
-        url += "/"
-    if param_id is not None:
-        url = f"{url}{param_id}"
-    if len(kwargs) > 0:
-        url = f"{url}?{'&'.join(f'{k}={v}' for k, v in kwargs.items() if v is not None)}"
-    return url
-
-
 if __name__ == "__main__":
     logging.basicConfig(level=logging.DEBUG)
     var_all_dic = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values',
diff --git a/mlair/helpers/data_sources/toar_data.py b/mlair/helpers/data_sources/toar_data.py
new file mode 100644
index 00000000..70d62238
--- /dev/null
+++ b/mlair/helpers/data_sources/toar_data.py
@@ -0,0 +1,89 @@
+__author__ = "Lukas Leufen"
+__date__ = "2022-07-05"
+
+
+from typing import Union, List, Dict
+
+from . import join, toar_data_v2
+
+import requests
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.retry import Retry
+
+
+class EmptyQueryResult(Exception):
+    """Exception that get raised if a query to JOIN returns empty results."""
+
+    pass
+
+
+def create_url(base: str, service: str, param_id: Union[str, int, None] = None,
+               **kwargs: Union[str, int, float, None]) -> str:
+    """
+    Create a request url with given base url, service type and arbitrarily many additional keyword arguments.
+
+    :param base: basic url of the rest service
+    :param service: service type, e.g. series, stats
+    :param param_id: id for a distinct service, is added between ending / of service and ? of kwargs
+    :param kwargs: keyword pairs for optional request specifications, e.g. 'statistics=maximum'
+
+    :return: combined url as string
+    """
+    if not base.endswith("/"):
+        base += "/"
+    url = f"{base}{service}"
+    if not url.endswith("/"):
+        url += "/"
+    if param_id is not None:
+        url = f"{url}{param_id}"
+    if len(kwargs) > 0:
+        url = f"{url}?{'&'.join(f'{k}={v}' for k, v in kwargs.items() if v is not None)}"
+    return url
+
+
+def get_data(opts: Dict, headers: Dict, as_json: bool = True) -> Union[Dict, List, str]:
+    """
+    Download join data using requests framework.
+
+    Data is returned as json like structure. Depending on the response structure, this can lead to a list or dictionary.
+
+    :param opts: options to create the request url
+    :param headers: additional headers information like authorization, can be empty
+    :param as_json: extract response as json if true (default True)
+
+    :return: requested data (either as list or dictionary)
+    """
+    url = create_url(**opts)
+    response = retries_session().get(url, headers=headers, timeout=(5, None))  # timeout=(open, read)
+    if response.status_code == 200:
+        return response.json() if as_json is True else response.text
+    else:
+        raise EmptyQueryResult(f"There was an error (STATUS {response.status_code}) for request {url}")
+
+
+def retries_session(max_retries=3):
+    retry_strategy = Retry(total=max_retries,
+                           backoff_factor=0.1,
+                           status_forcelist=[429, 500, 502, 503, 504],
+                           method_whitelist=["HEAD", "GET", "OPTIONS"])
+    adapter = HTTPAdapter(max_retries=retry_strategy)
+    http = requests.Session()
+    http.mount("https://", adapter)
+    http.mount("http://", adapter)
+    return http
+
+
+def download_toar(station, toar_stats, sampling, data_origin,  station_type=None):
+
+    # load data from toar-data (v2)
+    df_toar, meta_toar = toar_data_v2.download_toar(station, toar_stats, sampling=sampling, data_origin=data_origin)
+
+    # load join data (toar-data v1)
+    df_join, meta_join = join.download_join(station_name=station, stat_var=toar_stats, sampling=sampling,
+                                            station_type=station_type, data_origin=data_origin)
+
+    return df_toar
+
+
+def merge_toar_join(df_toar, df_join):
+    start_date = min([df_toar.index.min(), df_join.index.min()])
\ No newline at end of file
diff --git a/mlair/helpers/toar_data_v2.py b/mlair/helpers/data_sources/toar_data_v2.py
similarity index 96%
rename from mlair/helpers/toar_data_v2.py
rename to mlair/helpers/data_sources/toar_data_v2.py
index 5cc67b6d..bf85dd9e 100644
--- a/mlair/helpers/toar_data_v2.py
+++ b/mlair/helpers/data_sources/toar_data_v2.py
@@ -4,14 +4,14 @@ __date__ = '2022-06-30'
 
 
 import logging
-from typing import Iterator, Union, List, Dict
+from typing import Union, List, Dict
 from io import StringIO
 
 import pandas as pd
 
 from mlair.configuration.toar_data_v2_settings import toar_data_v2_settings
 from mlair.helpers import to_list
-from mlair.helpers.join import EmptyQueryResult, get_data
+from mlair.helpers.data_sources.toar_data import EmptyQueryResult, get_data
 
 
 str_or_none = Union[str, None]
@@ -90,12 +90,10 @@ def prepare_meta(meta, sampling, stat_var, var):
 def combine_meta_data(station_meta, timeseries_meta):
     meta = {}
     for k, v in station_meta.items():
-        print(k)
         if k == "codes":
             meta[k] = v[0]
         elif k in ["coordinates", "additional_metadata", "globalmeta"]:
             for _key, _val in v.items():
-                print(_key)
                 if _key == "lng":
                     meta["lon"] = _val
                 else:
@@ -105,9 +103,7 @@ def combine_meta_data(station_meta, timeseries_meta):
         else:
             meta[k] = v
     for var, var_meta in timeseries_meta.items():
-        print(var)
         for k, v in var_meta.items():
-            print(k)
             if k in ["additional_metadata", "station", "programme", "annotations", "changelog"]:
                 continue
             elif k == "roles":
@@ -192,7 +188,6 @@ def select_timeseries_by_origin(toar_meta, var_origin):
 def load_variables_information(var_dict, url_base, headers):
     var_meta_dict = {}
     for var in var_dict.keys():
-        # opts = {"base": url_base, "service": f"variables/{var}"}
         opts = {"base": url_base, "service": f"variables", "param_id": var}
         var_meta_dict[var] = get_data(opts, headers)
     return var_meta_dict
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 0e416acb..de700024 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -18,7 +18,7 @@ import pandas as pd
 from mlair.data_handler import DataCollection, AbstractDataHandler
 from mlair.helpers import TimeTracking, to_list, tables
 from mlair.configuration import path_config
-from mlair.helpers.join import EmptyQueryResult
+from mlair.helpers.data_sources.toar_data import EmptyQueryResult
 from mlair.run_modules.run_environment import RunEnvironment
 
 
diff --git a/test/test_helpers/test_join.py b/test/test_helpers/test_data_sources/test_join.py
similarity index 88%
rename from test/test_helpers/test_join.py
rename to test/test_helpers/test_data_sources/test_join.py
index 9a79d45e..0a9715f5 100644
--- a/test/test_helpers/test_join.py
+++ b/test/test_helpers/test_data_sources/test_join.py
@@ -2,11 +2,12 @@ from typing import Iterable
 
 import pytest
 
-from mlair.helpers.join import *
-from mlair.helpers.join import _save_to_pandas, _correct_stat_name, _lower_list, _select_distinct_series, \
+from mlair.helpers.data_sources.join import *
+from mlair.helpers.data_sources.join import _save_to_pandas, _correct_stat_name, _lower_list, _select_distinct_series, \
     _select_distinct_data_origin, _select_distinct_network
 from mlair.configuration.join_settings import join_settings
 from mlair.helpers.testing import check_nested_equality
+from mlair.helpers.data_sources.toar_data import EmptyQueryResult
 
 
 class TestDownloadJoin:
@@ -46,14 +47,6 @@ class TestCorrectDataFormat:
                              "metadata": {"station": "test_station_001", "author": "ME", "success": True}}
 
 
-class TestGetData:
-
-    def test(self):
-        opts = {"base": join_settings()[0], "service": "series", "station_id": 'DEBW107', "network_name": "UBA",
-                "parameter_name": "o3,no2"}
-        assert get_data(opts, headers={}) == [[17057, 'UBA', 'DEBW107', 'O3'], [17058, 'UBA', 'DEBW107', 'NO2']]
-
-
 class TestLoadSeriesInformation:
 
     def test_standard_query(self):
@@ -160,8 +153,7 @@ class TestSelectDistinctNetwork:
                       'parameter_label': 'PRESS-REA-MIUB', 'parameter_attribute': 'REA'}}
         assert check_nested_equality(res, expected) is True
 
-        message = "Could not find a valid match for variable %s and networks {'no2': [], 'o3': [], 'cloudcover': [], " \
-                  "'temp': [], 'press': []}! Therefore, use first answer from JOIN:"
+        message = "Could not find a valid match for variable %s and networks []! Therefore, use first answer from JOIN:"
         assert message % "no2" in caplog.messages[0]
         assert message % "o3" in caplog.messages[1]
         assert message % "cloudcover" in caplog.messages[2]
@@ -186,16 +178,13 @@ class TestSelectDistinctNetwork:
     def test_single_network_given_no_match(self, vars):
         with pytest.raises(ValueError) as e:  # AIRBASE not avail for all variables
             _select_distinct_network(vars, ["AIRBASE"])
-        assert e.value.args[-1] == "Cannot find a valid match for requested networks {'no2': ['AIRBASE'], 'o3': " \
-                                   "['AIRBASE'], 'cloudcover': ['AIRBASE'], 'temp': ['AIRBASE'], 'press': ['AIRBASE']" \
-                                   "} and variable no2 as only following networks are available in JOIN: ['UBA']"
+        assert e.value.args[-1] == "Cannot find a valid match for requested networks ['AIRBASE'] and variable no2 as " \
+                                   "only following networks are available in JOIN: ['UBA']"
 
         with pytest.raises(ValueError) as e:  # both requested networks are not available for all variables
             _select_distinct_network(vars, ["LUBW", "EMEP"])
-        assert e.value.args[-1] == "Cannot find a valid match for requested networks {'no2': ['LUBW', 'EMEP'], 'o3': " \
-                                   "['LUBW', 'EMEP'], 'cloudcover': ['LUBW', 'EMEP'], 'temp': ['LUBW', 'EMEP'], " \
-                                   "'press': ['LUBW', 'EMEP']} and variable no2 as only following networks are " \
-                                   "available in JOIN: ['UBA']"
+        assert e.value.args[-1] == "Cannot find a valid match for requested networks ['LUBW', 'EMEP'] and variable " \
+                                   "no2 as only following networks are available in JOIN: ['UBA']"
 
     def test_multiple_networks_given(self, vars):
         res = _select_distinct_network(vars, ["UBA", "AIRBASE"])
@@ -294,9 +283,8 @@ class TestSelectDistinctSeries:
     def test_network_not_available(self, vars):
         with pytest.raises(ValueError) as e:
             _select_distinct_series(vars, network_name="AIRBASE")
-        assert e.value.args[-1] == "Cannot find a valid match for requested networks {'no2': ['AIRBASE'], 'o3': " \
-                                   "['AIRBASE'], 'cloudcover': ['AIRBASE'], 'temp': ['AIRBASE'], 'press': ['AIRBASE']" \
-                                   "} and variable no2 as only following networks are available in JOIN: ['UBA']"
+        assert e.value.args[-1] == "Cannot find a valid match for requested networks ['AIRBASE'] and variable no2 as " \
+                                   "only following networks are available in JOIN: ['UBA']"
 
     def test_different_network_and_origin(self, vars):
         origin = {"no2": "test", "temp": "", "cloudcover": "REA"}
@@ -366,29 +354,3 @@ class TestLowerList:
         assert list(list_iterator) == ["capitalised", "already_small", "uppercase", "verystrange"]
 
 
-class TestCreateUrl:
-
-    def test_minimal_args_given(self):
-        url = create_url("www.base.edu", "testingservice")
-        assert url == "www.base.edu/testingservice/"
-
-    def test_given_kwargs(self):
-        url = create_url("www.base2.edu/", "testingservice", mood="happy", confidence=0.98)
-        assert url == "www.base2.edu/testingservice/?mood=happy&confidence=0.98"
-
-    def test_single_kwargs(self):
-        url = create_url("www.base2.edu/", "testingservice", mood="undefined")
-        assert url == "www.base2.edu/testingservice/?mood=undefined"
-
-    def test_none_kwargs(self):
-        url = create_url("www.base2.edu/", "testingservice", mood="sad", happiness=None, stress_factor=100)
-        assert url == "www.base2.edu/testingservice/?mood=sad&stress_factor=100"
-
-    def test_param_id(self):
-        url = create_url("www.base.edu", "testingservice", param_id="2001")
-        assert url == "www.base.edu/testingservice/2001"
-
-    def test_param_id_kwargs(self):
-        url = create_url("www.base.edu", "testingservice", param_id=2001, mood="sad", happiness=None, stress_factor=100)
-        assert url == "www.base.edu/testingservice/?2001&mood=sad&stress_factor=100"
-
diff --git a/test/test_helpers/test_data_sources/test_toar_data.py b/test/test_helpers/test_data_sources/test_toar_data.py
new file mode 100644
index 00000000..277a637b
--- /dev/null
+++ b/test/test_helpers/test_data_sources/test_toar_data.py
@@ -0,0 +1,40 @@
+from mlair.configuration.join_settings import join_settings
+from mlair.helpers.data_sources.toar_data import get_data, create_url
+
+
+class TestGetData:
+
+    def test(self):
+        opts = {"base": join_settings()[0], "service": "series", "station_id": 'DEBW107', "network_name": "UBA",
+                "parameter_name": "o3,no2"}
+        assert get_data(opts, headers={}) == [[17057, 'UBA', 'DEBW107', 'O3'], [17058, 'UBA', 'DEBW107', 'NO2']]
+
+
+class TestCreateUrl:
+
+    def test_minimal_args_given(self):
+        url = create_url("www.base.edu", "testingservice")
+        assert url == "www.base.edu/testingservice/"
+
+    def test_given_kwargs(self):
+        url = create_url("www.base2.edu/", "testingservice", mood="happy", confidence=0.98)
+        assert url == "www.base2.edu/testingservice/?mood=happy&confidence=0.98"
+
+    def test_single_kwargs(self):
+        url = create_url("www.base2.edu/", "testingservice", mood="undefined")
+        assert url == "www.base2.edu/testingservice/?mood=undefined"
+
+    def test_none_kwargs(self):
+        url = create_url("www.base2.edu/", "testingservice", mood="sad", happiness=None, stress_factor=100)
+        assert url == "www.base2.edu/testingservice/?mood=sad&stress_factor=100"
+
+    def test_param_id(self):
+        url = create_url("www.base.edu", "testingservice", param_id="2001")
+        assert url == "www.base.edu/testingservice/2001"
+
+    def test_param_id_kwargs(self):
+        url = create_url("www.base.edu", "testingservice", param_id=2001, mood="sad", happiness=None, stress_factor=100)
+        assert url == "www.base.edu/testingservice/2001?mood=sad&stress_factor=100"
+
+        url = create_url("www.base.edu", "testingservice", param_id=2001, mood="sad", series_id=222)
+        assert url == "www.base.edu/testingservice/2001?mood=sad&series_id=222"
-- 
GitLab