Skip to content
Snippets Groups Projects
Select Git revision
  • ad302db1fb194e46731af79dca057e881c73e965
  • master default protected
  • enxhi_issue460_remove_TOAR-I_access
  • michael_issue459_preprocess_german_stations
  • sh_pollutants
  • develop protected
  • release_v2.4.0
  • michael_issue450_feat_load-ifs-data
  • lukas_issue457_feat_set-config-paths-as-parameter
  • lukas_issue454_feat_use-toar-statistics-api-v2
  • lukas_issue453_refac_advanced-retry-strategy
  • lukas_issue452_bug_update-proj-version
  • lukas_issue449_refac_load-era5-data-from-toar-db
  • lukas_issue451_feat_robust-apriori-estimate-for-short-timeseries
  • lukas_issue448_feat_load-model-from-path
  • lukas_issue447_feat_store-and-load-local-clim-apriori-data
  • lukas_issue445_feat_data-insight-plot-monthly-distribution
  • lukas_issue442_feat_bias-free-evaluation
  • lukas_issue444_feat_choose-interp-method-cams
  • 414-include-crps-analysis-and-other-ens-verif-methods-or-plots
  • lukas_issue384_feat_aqw-data-handler
  • v2.4.0 protected
  • v2.3.0 protected
  • v2.2.0 protected
  • v2.1.0 protected
  • Kleinert_etal_2022_initial_submission
  • v2.0.0 protected
  • v1.5.0 protected
  • v1.4.0 protected
  • v1.3.0 protected
  • v1.2.1 protected
  • v1.2.0 protected
  • v1.1.0 protected
  • IntelliO3-ts-v1.0_R1-submit
  • v1.0.0 protected
  • v0.12.2 protected
  • v0.12.1 protected
  • v0.12.0 protected
  • v0.11.0 protected
  • v0.10.0 protected
  • IntelliO3-ts-v1.0_initial-submit
41 results

model_class.py

Blame
  • old_t_pre_processing.py 5.94 KiB
    import logging
    
    import pytest
    
    from src.data_handling import DataPrepJoin
    from src.data_handling.data_generator import DataGenerator
    from src.helpers.datastore import NameNotFoundInScope
    from src.helpers import PyTestRegex
    from src.run_modules.experiment_setup import ExperimentSetup
    from src.run_modules.pre_processing import PreProcessing, DEFAULT_ARGS_LIST, DEFAULT_KWARGS_LIST
    from src.run_modules.run_environment import RunEnvironment
    
    
    class TestPreProcessing:
    
        @pytest.fixture
        def obj_super_init(self):
            obj = object.__new__(PreProcessing)
            super(PreProcessing, obj).__init__()
            obj.data_store.set("NAME1", 1, "general")
            obj.data_store.set("NAME2", 2, "general")
            obj.data_store.set("NAME3", 3, "general")
            obj.data_store.set("NAME1", 10, "general.sub")
            obj.data_store.set("NAME4", 4, "general.sub.sub")
            yield obj
            RunEnvironment().__del__()
    
        @pytest.fixture
        def obj_with_exp_setup(self):
            ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'],
                            statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}, station_type="background",
                            data_preparation=DataPrepJoin)
            pre = object.__new__(PreProcessing)
            super(PreProcessing, pre).__init__()
            yield pre
            RunEnvironment().__del__()
    
        def test_init(self, caplog):
            ExperimentSetup(stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087'],
                            statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
            caplog.clear()
            caplog.set_level(logging.INFO)
            with PreProcessing():
                assert caplog.record_tuples[0] == ('root', 20, 'PreProcessing started')
                assert caplog.record_tuples[1] == ('root', 20, 'check valid stations started (preprocessing)')
                assert caplog.record_tuples[-1] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 5 '
                                                                            r'station\(s\). Found 5/5 valid stations.'))
            RunEnvironment().__del__()
    
        def test_run(self, obj_with_exp_setup):
            assert obj_with_exp_setup.data_store.search_name("generator") == []
            assert obj_with_exp_setup._run() is None
            assert obj_with_exp_setup.data_store.search_name("generator") == sorted(["general.train", "general.val",
                                                                                     "general.train_val", "general.test"])
    
        def test_split_train_val_test(self, obj_with_exp_setup):
            assert obj_with_exp_setup.data_store.search_name("generator") == []
            obj_with_exp_setup.split_train_val_test()
            data_store = obj_with_exp_setup.data_store
            expected_params = ["generator", "start", "end", "stations", "permute_data", "min_length", "extreme_values",
                               "extremes_on_right_tail_only", "upsampling"]
            assert data_store.search_scope("general.train") == sorted(expected_params)
            assert data_store.search_name("generator") == sorted(["general.train", "general.val", "general.test",
                                                                  "general.train_val"])
    
        def test_create_set_split_not_all_stations(self, caplog, obj_with_exp_setup):
            caplog.set_level(logging.DEBUG)
            obj_with_exp_setup.data_store.set("use_all_stations_on_all_data_sets", False, "general")
            obj_with_exp_setup.create_set_split(slice(0, 2), "awesome")
            assert ('root', 10, "Awesome stations (len=2): ['DEBW107', 'DEBY081']") in caplog.record_tuples
            data_store = obj_with_exp_setup.data_store
            assert isinstance(data_store.get("generator", "general.awesome"), DataGenerator)
            with pytest.raises(NameNotFoundInScope):
                data_store.get("generator", "general")
            assert data_store.get("stations", "general.awesome") == ["DEBW107", "DEBY081"]
    
        def test_create_set_split_all_stations(self, caplog, obj_with_exp_setup):
            caplog.set_level(logging.DEBUG)
            obj_with_exp_setup.create_set_split(slice(0, 2), "awesome")
            message = "Awesome stations (len=6): ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001']"
            assert ('root', 10, message) in caplog.record_tuples
            data_store = obj_with_exp_setup.data_store
            assert isinstance(data_store.get("generator", "general.awesome"), DataGenerator)
            with pytest.raises(NameNotFoundInScope):
                data_store.get("generator", "general")
            assert data_store.get("stations", "general.awesome") == ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']
    
        @pytest.mark.parametrize("name", (None, "tester"))
        def test_check_valid_stations(self, caplog, obj_with_exp_setup, name):
            pre = obj_with_exp_setup
            caplog.set_level(logging.INFO)
            args = pre.data_store.create_args_dict(DEFAULT_ARGS_LIST)
            kwargs = pre.data_store.create_args_dict(DEFAULT_KWARGS_LIST)
            stations = pre.data_store.get("stations", "general")
            valid_stations = pre.check_valid_stations(args, kwargs, stations, name=name)
            assert len(valid_stations) < len(stations)
            assert valid_stations == stations[:-1]
            expected = 'check valid stations started (tester)' if name else 'check valid stations started'
            assert caplog.record_tuples[0] == ('root', 20, expected)
            assert caplog.record_tuples[-1] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 6 '
                                                                        r'station\(s\). Found 5/6 valid stations.'))
    
        def test_split_set_indices(self, obj_super_init):
            dummy_list = list(range(0, 15))
            train, val, test, train_val = obj_super_init.split_set_indices(len(dummy_list), 0.9)
            assert dummy_list[train] == list(range(0, 10))
            assert dummy_list[val] == list(range(10, 13))
            assert dummy_list[test] == list(range(13, 15))
            assert dummy_list[train_val] == list(range(0, 13))