diff --git a/src/data_handling/data_preparation.py b/src/data_handling/data_preparation.py index d0d89438c14c4f2cfbbc3e76504b82f310dc1a8a..e3506724eb5f78a6cd7af6217c768b183d671dba 100644 --- a/src/data_handling/data_preparation.py +++ b/src/data_handling/data_preparation.py @@ -58,12 +58,11 @@ class DataPrep(object): self.meta = None self._transform_method = None self.statistics_per_var = kwargs.get("statistics_per_var", None) - if self.statistics_per_var is not None: + self.sampling = kwargs.get("sampling", "daily") + if self.statistics_per_var is not None or self.sampling == "hourly": self.load_data() else: - raise NotImplementedError # hourly data usage is not implemented yet - # self.data, self.meta = Fkf.read_hourly_data_from_csv_to_xarray(self.path, self.network, self.station, - # self.variables, **kwargs) + raise NotImplementedError("Either select hourly data or provide statistics_per_var.") def load_data(self): """ @@ -122,7 +121,7 @@ class DataPrep(object): """ df_all = {} df, meta = join.download_join(station_name=self.station, stat_var=self.statistics_per_var, - station_type=self.station_type, network_name=self.network) + station_type=self.station_type, network_name=self.network, sampling=self.sampling) df_all[self.station[0]] = df # convert df_all to xarray xarr = {k: xr.DataArray(v, dims=['datetime', 'variables']) for k, v in df_all.items()} diff --git a/test/test_join.py b/test/test_join.py index 865ae80dfaaa0244eb7592e65ef134a23b36634c..029d7ec87efba1003b62b5c92361f574711355de 100644 --- a/test/test_join.py +++ b/test/test_join.py @@ -4,12 +4,15 @@ import pytest from src.join import * from src.join import _save_to_pandas, _correct_stat_name, _lower_list +from src.join_settings import join_settings class TestJoinUrlBase: def test_url(self): - assert join_url_base == 'https://join.fz-juelich.de/services/rest/surfacedata/' + url, headers = join_settings() + assert url == 'https://join.fz-juelich.de/services/rest/surfacedata/' + assert headers == {} class TestDownloadJoin: @@ -25,22 +28,35 @@ class TestDownloadJoin: assert e.value.args[-1] == "No data found in JOIN." +class TestCorrectDataFormat: + + def test_correct_data_format(self): + list_data = [["2020-01-01 06:00:01", 23.], ["2020-01-01 06:00:11", 24.], ["2020-01-01 06:00:21", 25.], + ["2020-01-01 06:00:31", 26.], ["2020-01-01 06:00:41", 27.], ["2020-01-01 06:00:51", 23.], + {"station": "test_station_001", "author": "ME", "success": True}] + dict_data = correct_data_format(list_data) + assert dict_data == {"datetime": ["2020-01-01 06:00:01", "2020-01-01 06:00:11", "2020-01-01 06:00:21", + "2020-01-01 06:00:31", "2020-01-01 06:00:41", "2020-01-01 06:00:51"], + "values": [23., 24., 25., 26., 27., 23.], + "metadata": {"station": "test_station_001", "author": "ME", "success": True}} + + class TestGetData: def test(self): - opts = {"base": join_url_base, "service": "series", "station_id": 'DEBW107', "network_name": "UBA", + opts = {"base": join_settings()[0], "service": "series", "station_id": 'DEBW107', "network_name": "UBA", "parameter_name": "o3,no2"} - assert get_data(opts) == [[17057, 'UBA', 'DEBW107', 'O3'], [17058, 'UBA', 'DEBW107', 'NO2']] + assert get_data(opts, headers={}) == [[17057, 'UBA', 'DEBW107', 'O3'], [17058, 'UBA', 'DEBW107', 'NO2']] class TestLoadSeriesInformation: def test_standard_query(self): expected_subset = {'o3': 23031, 'no2': 39002, 'temp--lubw': 17059, 'wspeed': 17060} - assert expected_subset.items() <= load_series_information(['DEBW107'], None, None).items() + assert expected_subset.items() <= load_series_information(['DEBW107'], None, None, join_settings()[0], {}).items() def test_empty_result(self): - assert load_series_information(['DEBW107'], "traffic", None) == {} + assert load_series_information(['DEBW107'], "traffic", None, join_settings()[0], {}) == {} class TestSaveToPandas: @@ -53,6 +69,10 @@ class TestSaveToPandas: def date(self): return ['1997-01-01 00:00', '1997-01-02 00:00', '1997-01-03 00:00', '1997-01-04 00:00'] + @pytest.fixture + def date_len19(self): + return ['1997-01-01 00:00:00', '1997-01-02 00:00:00', '1997-01-03 00:00:00', '1997-01-04 00:00:00'] + @pytest.fixture def values(self): return [86.21, 94.76, 76.96, 99.89] @@ -75,6 +95,10 @@ class TestSaveToPandas: df_concat = pd.concat([create_df, next_df], axis=1) assert pd.testing.assert_frame_equal(df_concat, _save_to_pandas(create_df, data, 'max', 'temperature')) is None + def test_alternative_date_format(self, date_len19, values, create_df): + data = {'datetime': date_len19, 'mean': values, 'metadata': None} + assert pd.testing.assert_frame_equal(create_df, _save_to_pandas(None, data, 'mean', 'cloudcover')) is None + class TestCorrectStatName: