diff --git a/src/data_handling/data_preparation.py b/src/data_handling/data_preparation.py index d0d89438c14c4f2cfbbc3e76504b82f310dc1a8a..c39625b1e02506696ee5b4c13ac86c7e73420acf 100644 --- a/src/data_handling/data_preparation.py +++ b/src/data_handling/data_preparation.py @@ -108,7 +108,7 @@ class DataPrep(object): check_dict = {"station_type": self.station_type, "network_name": self.network} for (k, v) in check_dict.items(): if self.meta.at[k, self.station[0]] != v: - logging.debug(f"meta data does not agree which given request for {k}: {v} (requested) != " + logging.debug(f"meta data does not agree with given request for {k}: {v} (requested) != " f"{self.meta.at[k, self.station[0]]} (local). Raise FileNotFoundError to trigger new " f"grapping from web.") raise FileNotFoundError diff --git a/test/test_data_handling/test_data_preparation.py b/test/test_data_handling/test_data_preparation.py index 12b619d9e31990f6cc24216ff84ad9d030265e36..d67b8addc8a2a4ab524c97f6ee6e25c80a7ba1ea 100644 --- a/test/test_data_handling/test_data_preparation.py +++ b/test/test_data_handling/test_data_preparation.py @@ -7,6 +7,8 @@ import xarray as xr import datetime as dt import pandas as pd from operator import itemgetter +import logging +from src.helpers import PyTestRegex class TestDataPrep: @@ -17,6 +19,17 @@ class TestDataPrep: station_type='background', test='testKWARGS', statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) + @pytest.fixture + def data_prep_no_init(self): + d = object.__new__(DataPrep) + d.path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') + d.network = 'UBA' + d.station = ['DEBW107'] + d.variables = ['o3', 'temp'] + d.station_type = "background" + d.kwargs = None + return d + def test_init(self, data): assert data.path == os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') assert data.network == 'AIRBASE' @@ -31,16 +44,79 @@ class TestDataPrep: with pytest.raises(NotImplementedError): DataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp']) - def test_repr(self): - d = object.__new__(DataPrep) - d.path = 'data/test' - d.network = 'dummy' - d.station = ['DEBW107'] - d.variables = ['o3', 'temp'] - d.station_type = "traffic" - d.kwargs = None - assert d.__repr__().rstrip() == "Dataprep(path='data/test', network='dummy', station=['DEBW107'], "\ - "variables=['o3', 'temp'], station_type=traffic, **None)".rstrip() + def test_download_data(self, data_prep_no_init): + file_name = data_prep_no_init._set_file_name() + meta_file = data_prep_no_init._set_meta_file_name() + data_prep_no_init.kwargs = {"store_data_locally": False} + data_prep_no_init.statistics_per_var = {'o3': 'dma8eu', 'temp': 'maximum'} + data_prep_no_init.download_data(file_name, meta_file) + assert isinstance(data_prep_no_init.data, xr.DataArray) + + def test_download_data_from_join(self, data_prep_no_init): + file_name = data_prep_no_init._set_file_name() + meta_file = data_prep_no_init._set_meta_file_name() + data_prep_no_init.kwargs = {"store_data_locally": False} + data_prep_no_init.statistics_per_var = {'o3': 'dma8eu', 'temp': 'maximum'} + xarr, meta = data_prep_no_init.download_data_from_join(file_name, meta_file) + assert isinstance(xarr, xr.DataArray) + assert isinstance(meta, pd.DataFrame) + + def test_check_station_meta(self, caplog, data_prep_no_init): + caplog.set_level(logging.DEBUG) + file_name = data_prep_no_init._set_file_name() + meta_file = data_prep_no_init._set_meta_file_name() + data_prep_no_init.kwargs = {"store_data_locally": False} + data_prep_no_init.statistics_per_var = {'o3': 'dma8eu', 'temp': 'maximum'} + data_prep_no_init.download_data(file_name, meta_file) + assert data_prep_no_init.check_station_meta() is None + data_prep_no_init.station_type = "traffic" + with pytest.raises(FileNotFoundError) as e: + data_prep_no_init.check_station_meta() + msg = "meta data does not agree with given request for station_type: traffic (requested) != background (local)" + assert caplog.record_tuples[-1][:-1] == ('root', 10) + assert msg in caplog.record_tuples[-1][-1] + + def test_load_data_overwrite_local_data(self, data_prep_no_init): + data_prep_no_init.statistics_per_var = {'o3': 'dma8eu', 'temp': 'maximum'} + file_path = data_prep_no_init._set_file_name() + meta_file_path = data_prep_no_init._set_meta_file_name() + os.remove(file_path) + os.remove(meta_file_path) + assert not os.path.exists(file_path) + assert not os.path.exists(meta_file_path) + data_prep_no_init.kwargs = {"overwrite_local_data": True} + data_prep_no_init.load_data() + assert os.path.exists(file_path) + assert os.path.exists(meta_file_path) + t = os.stat(file_path).st_ctime + tm = os.stat(meta_file_path).st_ctime + data_prep_no_init.load_data() + assert os.path.exists(file_path) + assert os.path.exists(meta_file_path) + assert os.stat(file_path).st_ctime > t + assert os.stat(meta_file_path).st_ctime > tm + assert isinstance(data_prep_no_init.data, xr.DataArray) + assert isinstance(data_prep_no_init.meta, pd.DataFrame) + + def test_load_data_keep_local_data(self, data_prep_no_init): + data_prep_no_init.statistics_per_var = {'o3': 'dma8eu', 'temp': 'maximum'} + data_prep_no_init.station_type = None + data_prep_no_init.kwargs = {} + file_path = data_prep_no_init._set_file_name() + data_prep_no_init.load_data() + assert os.path.exists(file_path) + t = os.stat(file_path).st_ctime + data_prep_no_init.load_data() + assert os.path.exists(data_prep_no_init._set_file_name()) + assert os.stat(file_path).st_ctime == t + assert isinstance(data_prep_no_init.data, xr.DataArray) + assert isinstance(data_prep_no_init.meta, pd.DataFrame) + + def test_repr(self, data_prep_no_init): + path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') + assert data_prep_no_init.__repr__().rstrip() == f"Dataprep(path='{path}', network='UBA', " \ + f"station=['DEBW107'], variables=['o3', 'temp'], " \ + f"station_type=background, **None)".rstrip() def test_set_file_name_and_meta(self): d = object.__new__(DataPrep) @@ -133,6 +209,16 @@ class TestDataPrep: with pytest.raises(NotImplementedError): data.inverse_transform() + def test_get_transformation_information(self, data): + assert (None, None, None) == data.get_transformation_information("o3") + mean_test = data.data.mean("datetime").sel(variables='o3').values + std_test = data.data.std("datetime").sel(variables='o3').values + data.transform('datetime') + mean, std, info = data.get_transformation_information("o3") + assert np.testing.assert_almost_equal(mean, mean_test) is None + assert np.testing.assert_almost_equal(std, std_test) is None + assert info == "standardise" + def test_nan_remove_no_hist_or_label(self, data): assert data.history is None assert data.label is None