diff --git a/conftest.py b/conftest.py index 207606e6ec111459302360f5f2c4f917771bf80d..08641ff36543dbfba7109f84616ead8d2b472891 100644 --- a/conftest.py +++ b/conftest.py @@ -1,6 +1,8 @@ import os import re import shutil +import pytest +import mock def pytest_runtest_teardown(item, nextitem): @@ -48,3 +50,18 @@ def remove_files_from_regex(list_dir, path, regex): shutil.rmtree(os.path.join(path, e), ignore_errors=True) except: pass + + +@pytest.fixture(scope="session", autouse=True) +def default_session_fixture(request): + """ + :type request: _pytest.python.SubRequest + :return: + """ + patched = mock.patch("multiprocessing.cpu_count", return_value=1) + patched.__enter__() + + def unpatch(): + patched.__exit__() + + request.addfinalizer(unpatch) diff --git a/mlair/configuration/path_config.py b/mlair/configuration/path_config.py index bf40c361e121c409efec08b85fdf4e19848049ee..67c6bce4a3478443323b4ef49b5dc36258271ccd 100644 --- a/mlair/configuration/path_config.py +++ b/mlair/configuration/path_config.py @@ -29,11 +29,11 @@ def prepare_host(create_new=True, data_path=None, sampling="daily") -> str: user = getpass.getuser() runner_regex = re.compile(r"runner-.*-project-2411-concurrent-\d+") if hostname == "ZAM144": - data_path = f"/home/{user}/Data/toar_{sampling}/" + data_path = f"/home/{user}/Data/toar/" elif hostname == "zam347": - data_path = f"/home/{user}/Data/toar_{sampling}/" + data_path = f"/home/{user}/Data/toar/" elif (len(hostname) > 2) and (hostname[:2] == "jr"): - data_path = f"/p/project/cjjsc42/{user}/DATA/toar_{sampling}/" + data_path = f"/p/project/cjjsc42/{user}/DATA/toar/" elif (len(hostname) > 2) and (hostname[:2] in ['jw', 'ju'] or hostname[:5] in ['hdfml']): data_path = f"/p/project/deepacf/intelliaq/{user}/DATA/MLAIR/" elif runner_regex.match(hostname) is not None: diff --git a/mlair/data_handler/iterator.py b/mlair/data_handler/iterator.py index 49569405a587920da795820d48f8d968a8142cc7..39e20020f4f80a872428681d53e2ec9f1a3dd3f7 100644 --- a/mlair/data_handler/iterator.py +++ b/mlair/data_handler/iterator.py @@ -55,7 +55,7 @@ class DataCollection(Iterable): def add(self, element): self._collection.append(element) - self._mapping[str(element)] = len(self._collection) + self._mapping[str(element)] = len(self._collection) - 1 def _set_mapping(self): for i, e in enumerate(self._collection): diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index 4add6abbc98c2b0d97f3512a011c0ff5a3aefa70..c9e92e7deb22dcff12e9d4ab982f14289f764a97 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -137,7 +137,7 @@ class PreProcessing(RunEnvironment): if percentiles is None: percentiles = [.05, .1, .25, .5, .75, .9, .95] df_descr = df.iloc[:-ignore_last_lines].astype('float32').describe( - percentiles=percentiles).astype('int32') + percentiles=percentiles).astype("int32", errors="ignore") df_descr = pd.concat([df.loc[['# Samples']], df_descr]).T df_descr.rename(columns={"# Samples": "no. samples", "count": "no. stations"}, inplace=True) df_descr_colnames = list(df_descr.columns) diff --git a/test/test_configuration/test_path_config.py b/test/test_configuration/test_path_config.py index 2ba80a3bdf62b7fdf10b645da75769435cf7b6b9..846f252e1de8b082b681d2e1133b7f4894338212 100644 --- a/test/test_configuration/test_path_config.py +++ b/test/test_configuration/test_path_config.py @@ -16,9 +16,9 @@ class TestPrepareHost: @mock.patch("getpass.getuser", return_value="testUser") @mock.patch("os.path.exists", return_value=True) def test_prepare_host(self, mock_host, mock_user, mock_path): - assert prepare_host() == "/home/testUser/Data/toar_daily/" - assert prepare_host() == "/home/testUser/Data/toar_daily/" - assert prepare_host() == "/p/project/cjjsc42/testUser/DATA/toar_daily/" + assert prepare_host() == "/home/testUser/Data/toar/" + assert prepare_host() == "/home/testUser/Data/toar/" + assert prepare_host() == "/p/project/cjjsc42/testUser/DATA/toar/" assert prepare_host() == "/p/project/deepacf/intelliaq/testUser/DATA/MLAIR/" assert prepare_host() == '/home/testUser/mlair/data/' @@ -47,7 +47,7 @@ class TestPrepareHost: @mock.patch("os.makedirs", side_effect=None) def test_os_path_exists(self, mock_host, mock_user, mock_path, mock_check): path = prepare_host() - assert path == "/home/testUser/Data/toar_daily/" + assert path == "/home/testUser/Data/toar/" class TestSetExperimentName: diff --git a/test/test_data_handler/test_iterator.py b/test/test_data_handler/test_iterator.py index ec224c06e358297972097f2cc75cea86f768784f..678f3d369d4b6424f94557d7d739fc65a995aacc 100644 --- a/test/test_data_handler/test_iterator.py +++ b/test/test_data_handler/test_iterator.py @@ -52,6 +52,13 @@ class TestDataCollection: for e, i in enumerate(data_collection): assert i == e + def test_add(self): + data_collection = DataCollection() + data_collection.add("first_element") + assert len(data_collection) == 1 + assert data_collection["first_element"] == "first_element" + assert data_collection[0] == "first_element" + class DummyData: diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py index e7d05bdb51aeb069beda002f9b53439853edff91..11c46e99fb38489f5cbb26a8a87032049c96c7ca 100644 --- a/test/test_run_modules/test_pre_processing.py +++ b/test/test_run_modules/test_pre_processing.py @@ -11,6 +11,7 @@ from mlair.run_modules.pre_processing import PreProcessing from mlair.run_modules.run_environment import RunEnvironment import pandas as pd import numpy as np +import multiprocessing class TestPreProcessing: @@ -37,8 +38,7 @@ class TestPreProcessing: yield pre RunEnvironment().__del__() - @mock.patch("multiprocessing.cpu_count", return_value=1) - def test_init(self, mock_cpu, caplog): + def test_init(self, caplog): ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) caplog.clear() @@ -90,7 +90,7 @@ class TestPreProcessing: assert data_store.get("stations", "general.awesome") == ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087'] @pytest.mark.parametrize("name", (None, "tester")) - def test_validate_station(self, caplog, obj_with_exp_setup, name): + def test_validate_station_serial(self, caplog, obj_with_exp_setup, name): pre = obj_with_exp_setup caplog.set_level(logging.INFO) stations = pre.data_store.get("stations", "general") @@ -101,6 +101,25 @@ class TestPreProcessing: assert valid_stations == stations[:-1] expected = "check valid stations started" + ' (%s)' % (name if name else 'all') assert caplog.record_tuples[0] == ('root', 20, expected) + assert caplog.record_tuples[1] == ('root', 20, "use serial validate station approach") + assert caplog.record_tuples[-1] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 6 ' + r'station\(s\). Found 5/6 valid stations.')) + + @mock.patch("multiprocessing.cpu_count", return_value=3) + @mock.patch("multiprocessing.Pool", return_value=multiprocessing.Pool(3)) + def test_validate_station_parallel(self, mock_pool, mock_cpu, caplog, obj_with_exp_setup): + pre = obj_with_exp_setup + caplog.clear() + caplog.set_level(logging.INFO) + stations = pre.data_store.get("stations", "general") + data_preparation = pre.data_store.get("data_handler") + collection, valid_stations = pre.validate_station(data_preparation, stations, set_name=None) + assert isinstance(collection, DataCollection) + assert len(valid_stations) < len(stations) + assert valid_stations == stations[:-1] + assert caplog.record_tuples[0] == ('root', 20, "check valid stations started (all)") + assert caplog.record_tuples[1] == ('root', 20, "use parallel validate station approach") + assert caplog.record_tuples[2] == ('root', 20, "running 3 processes in parallel") assert caplog.record_tuples[-1] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 6 ' r'station\(s\). Found 5/6 valid stations.'))