diff --git a/mlair/helpers/helpers.py b/mlair/helpers/helpers.py index 36470ebc1c3a008c0f6ecca11478d83d6fa57cec..d07d8cf1ca70ebdbd864cf58fb3b4a61ff183868 100644 --- a/mlair/helpers/helpers.py +++ b/mlair/helpers/helpers.py @@ -12,13 +12,15 @@ from typing import Dict, Callable, Union, List, Any def to_list(obj: Any) -> List: """ - Transform given object to list if obj is not already a list. + Transform given object to list if obj is not already a list. Sets are also transformed to a list. :param obj: object to transform to list :return: list containing obj, or obj itself (if obj was already a list) """ - if not isinstance(obj, list): + if isinstance(obj, set): + obj = list(obj) + elif not isinstance(obj, list): obj = [obj] return obj @@ -99,7 +101,7 @@ def remove_items(obj: Union[List, Dict], items: Any): raise TypeError(f"{inspect.stack()[0][3]} does not support type {type(obj)}.") -def select_from_dict(dict_obj: dict, sel_list: str): +def select_from_dict(dict_obj: dict, sel_list: Any): """ Extract all key values pairs whose key is contained in the sel_list. diff --git a/mlair/helpers/join.py b/mlair/helpers/join.py index 43a0176811b54fba2983c1dba108f4c7977f1431..b772a053a7131e5538cbb9da6b03f6dc1c775a4d 100644 --- a/mlair/helpers/join.py +++ b/mlair/helpers/join.py @@ -45,7 +45,15 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t join_url_base, headers = join_settings(sampling) # load series information - vars_dict = load_series_information(station_name, station_type, network_name, join_url_base, headers, data_origin) + vars_dict, data_origin = load_series_information(station_name, station_type, network_name, join_url_base, headers, + data_origin) + + # check if all requested variables are available + if set(stat_var).issubset(vars_dict) is False: + missing_variables = set(stat_var).difference(vars_dict) + origin = helpers.select_from_dict(data_origin, missing_variables) + options = f"station={station_name}, type={station_type}, network={network_name}, origin={origin}" + raise EmptyQueryResult(f"No data found for variables {missing_variables} and options {options} in JOIN.") # correct stat_var values if data is not aggregated (hourly) if sampling == "hourly": @@ -58,7 +66,7 @@ def download_join(station_name: Union[str, List[str]], stat_var: dict, station_t for var in _lower_list(sorted(vars_dict.keys())): if var in stat_var.keys(): - logging.debug('load: {}'.format(var)) # ToDo start here for #206 + logging.debug('load: {}'.format(var)) # create data link opts = {'base': join_url_base, 'service': 'stats', 'id': vars_dict[var], 'statistics': stat_var[var], @@ -126,7 +134,7 @@ def get_data(opts: Dict, headers: Dict) -> Union[Dict, List]: def load_series_information(station_name: List[str], station_type: str_or_none, network_name: str_or_none, - join_url_base: str, headers: Dict, data_origin: Dict = None) -> Dict: + join_url_base: str, headers: Dict, data_origin: Dict = None) -> [Dict, Dict]: """ List all series ids that are available for given station id and network name. @@ -144,27 +152,30 @@ def load_series_information(station_name: List[str], station_type: str_or_none, "network_name": network_name, "as_dict": "true", "columns": "id,network_name,station_id,parameter_name,parameter_label,parameter_attribute"} station_vars = get_data(opts, headers) - logging.debug(f"{station_name}: {station_vars}") # ToDo start here for #206 + logging.debug(f"{station_name}: {station_vars}") return _select_distinct_series(station_vars, data_origin) -def _select_distinct_series(vars: List[Dict], data_origin: Dict = None): +def _select_distinct_series(vars: List[Dict], data_origin: Dict = None) -> [Dict, Dict]: """ Select distinct series ids for all variables. Also check if a parameter is from REA or not. """ + data_origin_default = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA", "press": "REA", "relhum": "REA", + "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", + "no": "", "no2": "", "o3": "", "pm10": "", "so2": ""} if data_origin is None: - data_origin = {"cloudcover": "REA", "humidity": "REA", "pblheight": "REA", "press": "REA", "relhum": "REA", - "temp": "REA", "totprecip": "REA", "u": "REA", "v": "REA", - "no": "", "no2": "", "o3": "", "pm10": "", "so2": ""} + data_origin = {} # ToDo: maybe press, wdir, wspeed from obs? or also temp, ... ? selected = {} for var in vars: name = var["parameter_name"].lower() var_attr = var["parameter_attribute"].lower() + if name not in data_origin.keys(): + data_origin.update({name: data_origin_default.get(name, "")}) attr = data_origin.get(name, "").lower() if var_attr == attr: selected[name] = var["id"] - return selected + return selected, data_origin def _save_to_pandas(df: Union[pd.DataFrame, None], data: dict, stat: str, var: str) -> pd.DataFrame: diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index 21fbb3edfa2c9665db12f7ea6bb0f9310ca324a1..d125474e2224d4311137702c2796bd89b9f198ee 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -257,7 +257,7 @@ class PostProcessing(RunEnvironment): .. note:: Bootstrap plots are only created if bootstraps are evaluated. """ - logging.debug("Run plotting routines...") + logging.info("Run plotting routines...") path = self.data_store.get("forecast_path") plot_list = self.data_store.get("plot_list", "postprocessing") diff --git a/test/test_join.py b/test/test_join.py index a9a4c381cbf58a272389b0b11283c8b0cce3ab42..850571e317fd864a837a8ae68d2bc519b0f07c72 100644 --- a/test/test_join.py +++ b/test/test_join.py @@ -25,7 +25,18 @@ class TestDownloadJoin: def test_download_empty(self): with pytest.raises(EmptyQueryResult) as e: download_join("DEBW107", {"o3": "dma8eu"}, "traffic") - assert e.value.args[-1] == "No data found in JOIN." + assert e.value.args[-1] == "No data found for variables {'o3'} and options station=['DEBW107'], type=traffic," \ + " network=None, origin={} in JOIN." + + def test_download_incomplete(self): + with pytest.raises(EmptyQueryResult) as e: + download_join("DEBW107", {"o3": "dma8eu", "o10": "maximum"}, "background") + assert e.value.args[-1] == "No data found for variables {'o10'} and options station=['DEBW107'], " \ + "type=background, network=None, origin={} in JOIN." + with pytest.raises(EmptyQueryResult) as e: + download_join("DEBW107", {"o3": "dma8eu", "o10": "maximum"}, "background", data_origin={"o10": ""}) + assert e.value.args[-1] == "No data found for variables {'o10'} and options station=['DEBW107'], " \ + "type=background, network=None, origin={'o10': ''} in JOIN." class TestCorrectDataFormat: @@ -53,11 +64,12 @@ class TestLoadSeriesInformation: def test_standard_query(self): expected_subset = {'o3': 23031, 'no2': 39002, 'temp': 85584, 'wspeed': 17060} - assert expected_subset.items() <= load_series_information(['DEBW107'], None, None, join_settings()[0], - {}).items() + res, orig = load_series_information(['DEBW107'], None, None, join_settings()[0], {}) + assert expected_subset.items() <= res.items() def test_empty_result(self): - assert load_series_information(['DEBW107'], "traffic", None, join_settings()[0], {}) == {} + res, orig = load_series_information(['DEBW107'], "traffic", None, join_settings()[0], {}) + assert res == {} class TestSelectDistinctSeries: @@ -81,15 +93,18 @@ class TestSelectDistinctSeries: 'parameter_label': 'PRESS-REA-MIUB', 'parameter_attribute': 'REA'}] def test_no_origin_given(self, vars): - res = _select_distinct_series(vars) + res, orig = _select_distinct_series(vars) assert res == {"no2": 16686, "o3": 16687, "cloudcover": 54036, "temp": 88491, "press": 102660} + assert orig == {"no2": "", "o3": "", "cloudcover": "REA", "temp": "REA", "press": "REA"} def test_different_origins(self, vars): origin = {"no2": "test", "temp": "", "cloudcover": "REA"} - res = _select_distinct_series(vars, data_origin=origin) - assert res == {"o3": 16687, "press": 16692, "temp": 16693, "cloudcover": 54036} - res = _select_distinct_series(vars, data_origin={}) - assert res == {"no2": 16686, "o3": 16687, "press": 16692, "temp": 16693} + res, orig = _select_distinct_series(vars, data_origin=origin) + assert res == {"o3": 16687, "press": 102660, "temp": 16693, "cloudcover": 54036} + assert orig == {"no2": "test", "o3": "", "cloudcover": "REA", "temp": "", "press": "REA"} + res, orig = _select_distinct_series(vars, data_origin={}) + assert res == {"cloudcover": 54036, "no2": 16686, "o3": 16687, "press": 102660, "temp": 88491} + assert orig == {"no2": "", "o3": "", "temp": "REA", "press": "REA", "cloudcover": "REA"} class TestSaveToPandas: