diff --git a/src/data_generator.py b/src/data_generator.py index dcd02bdd9530901026176df6de533ac0c3e84114..860791235f111a7ffb151f2b06424be76dc8eba7 100644 --- a/src/data_generator.py +++ b/src/data_generator.py @@ -42,7 +42,7 @@ class DataGenerator(keras.utils.Sequence): display all class attributes """ return f"DataGenerator(path='{self.data_path}', network='{self.network}', stations={self.stations}, " \ - f"variables={self.variables}, station_type='{self.station_type}', " \ + f"variables={self.variables}, station_type={self.station_type}, " \ f"interpolate_dim='{self.interpolate_dim}', target_dim='{self.target_dim}', " \ f"target_var='{self.target_var}', **{self.kwargs})" diff --git a/src/data_preparation.py b/src/data_preparation.py index 061eddc453fc6f2a8802f0b6cce0a1b0b7893e0d..3cadf590778e39174f76a139514fed5d8cbc505f 100644 --- a/src/data_preparation.py +++ b/src/data_preparation.py @@ -89,9 +89,11 @@ class DataPrep(object): self.data = self.check_for_negative_concentrations(data) def check_station_type(self): + """ + Search for the `station_type` entry in meta data and compare the value with the requested station_type. Raise + an EmptyQueryResult error if the values mismatch. + """ if self.meta.at["station_type", self.station[0]] != self.station_type: - self.data = None - self.meta = None raise join.EmptyQueryResult def download_data_from_join(self, file_name: str, meta_file: str) -> [xr.DataArray, pd.DataFrame]: diff --git a/test/test_data_generator.py b/test/test_data_generator.py index 7c745782bc057060dd439af1fb6e03c5b3ef5730..39a352327b7e3f5eb4f9b046a52aa763372df61c 100644 --- a/test/test_data_generator.py +++ b/test/test_data_generator.py @@ -21,6 +21,7 @@ class TestDataGenerator: assert gen.network == 'UBA' assert gen.stations == ['DEBW107'] assert gen.variables == ['o3', 'temp'] + assert gen.station_type is None assert gen.interpolate_dim == 'datetime' assert gen.target_dim == 'variables' assert gen.target_var == 'o3' @@ -34,7 +35,7 @@ class TestDataGenerator: def test_repr(self, gen): path = os.path.join(os.path.dirname(__file__), 'data') assert gen.__repr__().rstrip() == f"DataGenerator(path='{path}', network='UBA', stations=['DEBW107'], "\ - f"variables=['o3', 'temp'], interpolate_dim='datetime', " \ + f"variables=['o3', 'temp'], station_type=None, interpolate_dim='datetime', " \ f"target_dim='variables', target_var='o3', **{{}})".rstrip() def test_len(self, gen): diff --git a/test/test_data_preparation.py b/test/test_data_preparation.py index 5d45c041b6e669cced56172d41fc2f9653dd30e7..0a520eafc40e6bdd31ed2738488af1292fc00f59 100644 --- a/test/test_data_preparation.py +++ b/test/test_data_preparation.py @@ -22,7 +22,7 @@ class TestDataPrep: assert data.station == ['DEBW107'] assert data.variables == ['o3', 'temp'] assert data.statistics_per_var == {'o3': 'dma8eu', 'temp': 'maximum'} - assert not all([data.mean, data.std, data.history, data.label]) + assert not all([data.mean, data.std, data.history, data.label, data.station_type]) assert {'test': 'testKWARGS'}.items() <= data.kwargs.items() def test_init_no_stats(self): @@ -35,9 +35,10 @@ class TestDataPrep: d.network = 'dummy' d.station = ['DEBW107'] d.variables = ['o3', 'temp'] + d.station_type = "traffic" d.kwargs = None assert d.__repr__().rstrip() == "Dataprep(path='data/test', network='dummy', station=['DEBW107'], "\ - "variables=['o3', 'temp'], **None)".rstrip() + "variables=['o3', 'temp'], station_type='traffic', **None)".rstrip() def test_set_file_name_and_meta(self): d = object.__new__(DataPrep) diff --git a/test/test_modules/test_experiment_setup.py b/test/test_modules/test_experiment_setup.py index 832ff45a0a3b1384e1300c0fa38ed3d1ec2204b8..be3db59e2415f28ea63d42b7cc6ced6b2c095700 100644 --- a/test/test_modules/test_experiment_setup.py +++ b/test/test_modules/test_experiment_setup.py @@ -48,7 +48,7 @@ class TestExperimentSetup: # experiment setup assert data_store.get("data_path", "general") == prepare_host() assert data_store.get("trainable", "general") is False - assert data_store.get("fraction_of_train", "general") == 0.8 + assert data_store.get("fraction_of_training", "general") == 0.8 # set experiment name assert data_store.get("experiment_name", "general") == "TestExperiment" path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "TestExperiment")) @@ -67,6 +67,7 @@ class TestExperimentSetup: 'DEBW052', 'DEBW034', 'DEBY088', ] assert data_store.get("stations", "general") == default_stations assert data_store.get("network", "general") == "AIRBASE" + assert data_store.get("station_type", "general") is None assert data_store.get("variables", "general") == list(default_var_all_dict.keys()) assert data_store.get("statistics_per_var", "general") == default_var_all_dict assert data_store.get("start", "general") == "1997-01-01" @@ -97,7 +98,8 @@ class TestExperimentSetup: experiment_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data", "testExperimentFolder")) kwargs = dict(parser_args={"experiment_date": "TODAY"}, var_all_dict={'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum'}, - stations=['DEBY053', 'DEBW059', 'DEBW027'], network="INTERNET", variables=["o3", "temp"], + stations=['DEBY053', 'DEBW059', 'DEBW027'], network="INTERNET", station_type="background", + variables=["o3", "temp"], statistics_per_var=None, start="1999-01-01", end="2001-01-01", window_history=4, target_var="temp", target_dim="target", window_lead_time=10, dimensions="dim1", interpolate_dim="int_dim", interpolate_method="cubic", limit_nan_fill=5, train_start="2000-01-01", @@ -109,7 +111,7 @@ class TestExperimentSetup: # experiment setup assert data_store.get("data_path", "general") == prepare_host() assert data_store.get("trainable", "general") is True - assert data_store.get("fraction_of_train", "general") == 0.5 + assert data_store.get("fraction_of_training", "general") == 0.5 # set experiment name assert data_store.get("experiment_name", "general") == "TODAY_network/" path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data", "testExperimentFolder")) @@ -119,6 +121,7 @@ class TestExperimentSetup: 'temp': 'maximum'} assert data_store.get("stations", "general") == ['DEBY053', 'DEBW059', 'DEBW027'] assert data_store.get("network", "general") == "INTERNET" + assert data_store.get("station_type", "general") == "background" assert data_store.get("variables", "general") == ["o3", "temp"] assert data_store.get("statistics_per_var", "general") == {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum'} diff --git a/test/test_modules/test_pre_processing.py b/test/test_modules/test_pre_processing.py index bc121885ddb8ee20b0f571e7f0250845c6e99e6a..eb7ba4e8f4d909711f92ca667b98ae403cb30756 100644 --- a/test/test_modules/test_pre_processing.py +++ b/test/test_modules/test_pre_processing.py @@ -1,7 +1,7 @@ import logging import pytest -from src.helpers import PyTestRegex, TimeTracking +from src.helpers import PyTestRegex from src.modules.experiment_setup import ExperimentSetup from src.modules.pre_processing import PreProcessing, DEFAULT_ARGS_LIST, DEFAULT_KWARGS_LIST from src.data_generator import DataGenerator @@ -29,8 +29,8 @@ class TestPreProcessing: @pytest.fixture def obj_with_exp_setup(self): - ExperimentSetup(parser_args={}, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087'], - var_all_dict={'o3': 'dma8eu', 'temp': 'maximum'}) + ExperimentSetup(parser_args={}, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'], + var_all_dict={'o3': 'dma8eu', 'temp': 'maximum'}, station_type="background") pre = object.__new__(PreProcessing) super(PreProcessing, pre).__init__() yield pre @@ -73,8 +73,8 @@ class TestPreProcessing: def test_create_set_split_all_stations(self, caplog, obj_with_exp_setup): caplog.set_level(logging.DEBUG) obj_with_exp_setup.create_set_split(slice(0, 2), "awesome") - assert caplog.record_tuples[0] == ('root', 10, "Awesome stations (len=5): ['DEBW107', 'DEBY081', 'DEBW013', " - "'DEBW076', 'DEBW087']") + assert caplog.record_tuples[0] == ('root', 10, "Awesome stations (len=6): ['DEBW107', 'DEBY081', 'DEBW013', " + "'DEBW076', 'DEBW087', 'DEBW001']") data_store = obj_with_exp_setup.data_store assert isinstance(data_store.get("generator", "general.awesome"), DataGenerator) with pytest.raises(NameNotFoundInScope): @@ -88,9 +88,11 @@ class TestPreProcessing: kwargs = pre._create_args_dict(DEFAULT_KWARGS_LIST) stations = pre.data_store.get("stations", "general") valid_stations = pre.check_valid_stations(args, kwargs, stations) - assert valid_stations == stations + assert len(valid_stations) < len(stations) + assert valid_stations == stations[:-1] assert caplog.record_tuples[0] == ('root', 20, 'check valid stations started') - assert caplog.record_tuples[-1] == ('root', 20, PyTestRegex(r'run for \d+\.\d+s to check 5 station\(s\)')) + assert caplog.record_tuples[-1] == ('root', 20, PyTestRegex(r'run for \d+\.\d+s to check 6 station\(s\). Found ' + r'5/6 valid stations.')) def test_split_set_indices(self, obj_no_init): dummy_list = list(range(0, 15))