diff --git a/test/test_helpers/test_data_sources/test_join.py b/test/test_helpers/test_data_sources/test_join.py index f9b12f5a7ff20e898695de0a0f035bed023674f2..eba99a42115e263a54d313fe399bf6046e8f0fe6 100644 --- a/test/test_helpers/test_data_sources/test_join.py +++ b/test/test_helpers/test_data_sources/test_join.py @@ -14,7 +14,7 @@ class TestDownloadJoin: def test_download_single_var(self): data, meta = download_join("DEBW107", {"o3": "dma8eu"}) - assert data.columns == "o3" + assert data.columns == "o3_dma8eu" assert meta.columns == "DEBW107" def test_download_empty(self): @@ -316,22 +316,46 @@ class TestSaveToPandas: return [20.0, 25.2, 25.1, 23.6] @pytest.fixture - def create_df(self, date, values): - return pd.DataFrame(values, index=self.convert_date(date), columns=['cloudcover']) + def var(self): + return "cloudcover" - def test_empty_df(self, date, values, create_df): - data = {'datetime': date, 'mean': values, 'metadata': None} - assert pd.testing.assert_frame_equal(create_df, _save_to_pandas(None, data, 'mean', 'cloudcover')) is None + @pytest.fixture + def alternative_var(self): + return "temperature" + + @pytest.fixture + def stat(self): + return "mean" - def test_not_empty_df(self, date, alternative_values, create_df): - data = {'datetime': date, 'max': alternative_values, 'metadata': None} - next_df = pd.DataFrame(data["max"], index=self.convert_date(date), columns=['temperature']) + @pytest.fixture + def alternative_stat(self): + return "max" + + + @pytest.fixture + def create_df(self, date, values, var, stat): + return pd.DataFrame(values, index=self.convert_date(date), columns=[f'{var}_{stat}']) + + def test_empty_df(self, date, values, var, stat, create_df): + data = {'datetime': date, stat: values, 'metadata': None} + # assert pd.testing.assert_frame_equal(create_df, _save_to_pandas(None, data, 'mean', 'cloudcover')) is None + assert pd.testing.assert_frame_equal(create_df, _save_to_pandas(None, data, stat, var)) is None + + def test_not_empty_df(self, date, alternative_values, alternative_var, alternative_stat, create_df): + data = {'datetime': date, alternative_stat: alternative_values, 'metadata': None} + # next_df = pd.DataFrame(data["max"], index=self.convert_date(date), columns=['temperature']) + next_df = pd.DataFrame(data[alternative_stat], index=self.convert_date(date), + columns=[f'{alternative_var}_{alternative_stat}'] + ) df_concat = pd.concat([create_df, next_df], axis=1) - assert pd.testing.assert_frame_equal(df_concat, _save_to_pandas(create_df, data, 'max', 'temperature')) is None + # assert pd.testing.assert_frame_equal(df_concat, _save_to_pandas(create_df, data, 'max', 'temperature')) is None + assert pd.testing.assert_frame_equal( + df_concat, _save_to_pandas(create_df, data, alternative_stat, alternative_var) + ) is None - def test_alternative_date_format(self, date_len19, values, create_df): + def test_alternative_date_format(self, date_len19, values, var, stat, create_df): data = {'datetime': date_len19, 'mean': values, 'metadata': None} - assert pd.testing.assert_frame_equal(create_df, _save_to_pandas(None, data, 'mean', 'cloudcover')) is None + assert pd.testing.assert_frame_equal(create_df, _save_to_pandas(None, data, stat, var)) is None class TestLowerList: diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py index 6646e1a4795756edd1792ef91f535132e8cde61d..38972460f5497efdb78bf6d26ca985e2392af931 100644 --- a/test/test_run_modules/test_pre_processing.py +++ b/test/test_run_modules/test_pre_processing.py @@ -37,9 +37,30 @@ class TestPreProcessing: RunEnvironment().__del__() def test_init(self, caplog): - ExperimentSetup(stations=['DEBW107', 'DEBW013', 'DEBW087'], + ExperimentSetup(stations=['DEBW087', 'DEBW107', 'DEBW013'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}, - data_origin={'o3': 'UBA', 'temp': 'UBA'}) + data_origin={'o3': 'UBA', 'temp': 'UBA'} + ) + caplog.clear() + caplog.set_level(logging.INFO) + with PreProcessing(): + assert caplog.record_tuples[0] == ('root', 20, 'PreProcessing started') + assert caplog.record_tuples[1] == ('root', 20, 'check valid stations started (preprocessing)') + assert caplog.record_tuples[-6] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 3 ' + r'station\(s\). Found 3/3 valid stations.')) + assert caplog.record_tuples[-5] == ('root', 20, "use serial create_info_df (train)") + assert caplog.record_tuples[-4] == ('root', 20, "use serial create_info_df (val)") + assert caplog.record_tuples[-3] == ('root', 20, "use serial create_info_df (test)") + assert caplog.record_tuples[-2] == ('root', 20, "Searching for competitors to be prepared for use.") + assert caplog.record_tuples[-1] == ('root', 20, "No preparation required for competitor ols as no specific " + "instruction is provided.") + RunEnvironment().__del__() + + def test_init_multiple_stat_mix(self, caplog): + ExperimentSetup(stations=['DEBW087', 'DEBW107', 'DEBW013'], + statistics_per_var={'o3': ['dma8eu', 'perc95'], 'temp': 'maximum'}, + data_origin={'o3': 'UBA', 'temp': 'UBA'} + ) caplog.clear() caplog.set_level(logging.INFO) with PreProcessing():