diff --git a/src/data_preparation.py b/src/data_preparation.py index 64fd7f52b2309a79217d6d24d65757b35904a2e9..3c50ba893563780dfd8ac92f36fffabc38ed16a9 100644 --- a/src/data_preparation.py +++ b/src/data_preparation.py @@ -77,12 +77,13 @@ class DataPrep(object): file_name = self._set_file_name() meta_file = self._set_meta_file_name() try: + logging.debug(f"try to load local data from: {file_name}") data = self._slice_prep(xr.open_dataarray(file_name)) self.data = self.check_for_negative_concentrations(data) self.meta = pd.read_csv(meta_file, index_col=0) if self.station_type is not None: - self.check_station_type() + self.check_station_meta() logging.debug("loading finished") except FileNotFoundError as e: logging.warning(e) @@ -91,13 +92,21 @@ class DataPrep(object): self.data = self.check_for_negative_concentrations(data) logging.debug("loaded new data from JOIN") - def check_station_type(self): + def check_station_meta(self): """ - Search for the `station_type` entry in meta data and compare the value with the requested station_type. Raise - an EmptyQueryResult error if the values mismatch. + Search for the entries in meta data and compare the value with the requested values. Raise a FileNotFoundError + if the values mismatch. """ - if self.meta.at["station_type", self.station[0]] != self.station_type: - raise join.EmptyQueryResult + check_dict = { + "station_type": self.station_type, + "network_name": self.network + } + for (k, v) in check_dict.items(): + if self.meta.at[k, self.station[0]] != v: + logging.debug(f"meta data does not agree which given request for {k}: {v} (requested) != " + f"{self.meta.at[k, self.station[0]]} (local). Raise FileNotFoundError to trigger new " + f"grapping from web.") + raise FileNotFoundError def download_data_from_join(self, file_name: str, meta_file: str) -> [xr.DataArray, pd.DataFrame]: """ diff --git a/test/test_data_generator.py b/test/test_data_generator.py index d3f07524f4495f6d6ddd818b01707061035f0a81..2fe8b8c0b5a7f4f8be9626b0061702acb53ecb6b 100644 --- a/test/test_data_generator.py +++ b/test/test_data_generator.py @@ -1,12 +1,6 @@ import pytest import os from src.data_generator import DataGenerator -import logging -import numpy as np -import xarray as xr -import datetime as dt -import pandas as pd -from operator import itemgetter class TestDataGenerator: diff --git a/test/test_data_preparation.py b/test/test_data_preparation.py index ba3a4cdffb862940ee00fa0f45680e0dc3f9f615..30f93e6d734885252d2c7a438d6065aa680f32f8 100644 --- a/test/test_data_preparation.py +++ b/test/test_data_preparation.py @@ -1,6 +1,7 @@ import pytest import os from src.data_preparation import DataPrep +from src.join import EmptyQueryResult import logging import numpy as np import xarray as xr @@ -13,14 +14,16 @@ class TestDataPrep: @pytest.fixture def data(self): - return DataPrep(os.path.join(os.path.dirname(__file__), 'data'), 'dummy', 'DEBW107', ['o3', 'temp'], - test='testKWARGS', statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) + return DataPrep(os.path.join(os.path.dirname(__file__), 'data'), 'AIRBASE', 'DEBW107', ['o3', 'temp'], + station_type='background', test='testKWARGS', + statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) def test_init(self, data): assert data.path == os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') - assert data.network == 'dummy' + assert data.network == 'AIRBASE' assert data.station == ['DEBW107'] assert data.variables == ['o3', 'temp'] + assert data.station_type == "background" assert data.statistics_per_var == {'o3': 'dma8eu', 'temp': 'maximum'} assert not all([data.mean, data.std, data.history, data.label, data.station_type]) assert {'test': 'testKWARGS'}.items() <= data.kwargs.items() @@ -230,3 +233,9 @@ class TestDataPrep: assert res.sel({'variables': 'o3'}).min() >= 0 res = data.check_for_negative_concentrations(data.data, minimum=2) assert res.sel({'variables': 'o3'}).min() >= 2 + + def test_check_station(self, data): + with pytest.raises(EmptyQueryResult): + data_new = DataPrep(os.path.join(os.path.dirname(__file__), 'data'), 'dummy', 'DEBW107', ['o3', 'temp'], + station_type='traffic', statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) +