Skip to content
Snippets Groups Projects
Commit 582b8641 authored by lukas leufen's avatar lukas leufen
Browse files

data prep can handle hourly data now

parent ab95345e
Branches
Tags
2 merge requests!37include new development,!36include using of hourly data
......@@ -58,12 +58,11 @@ class DataPrep(object):
self.meta = None
self._transform_method = None
self.statistics_per_var = kwargs.get("statistics_per_var", None)
if self.statistics_per_var is not None:
self.sampling = kwargs.get("sampling", "daily")
if self.statistics_per_var is not None or self.sampling == "hourly":
self.load_data()
else:
raise NotImplementedError # hourly data usage is not implemented yet
# self.data, self.meta = Fkf.read_hourly_data_from_csv_to_xarray(self.path, self.network, self.station,
# self.variables, **kwargs)
raise NotImplementedError("Either select hourly data or provide statistics_per_var.")
def load_data(self):
"""
......@@ -122,7 +121,7 @@ class DataPrep(object):
"""
df_all = {}
df, meta = join.download_join(station_name=self.station, stat_var=self.statistics_per_var,
station_type=self.station_type, network_name=self.network)
station_type=self.station_type, network_name=self.network, sampling=self.sampling)
df_all[self.station[0]] = df
# convert df_all to xarray
xarr = {k: xr.DataArray(v, dims=['datetime', 'variables']) for k, v in df_all.items()}
......
......@@ -4,12 +4,15 @@ import pytest
from src.join import *
from src.join import _save_to_pandas, _correct_stat_name, _lower_list
from src.join_settings import join_settings
class TestJoinUrlBase:
def test_url(self):
assert join_url_base == 'https://join.fz-juelich.de/services/rest/surfacedata/'
url, headers = join_settings()
assert url == 'https://join.fz-juelich.de/services/rest/surfacedata/'
assert headers == {}
class TestDownloadJoin:
......@@ -25,22 +28,35 @@ class TestDownloadJoin:
assert e.value.args[-1] == "No data found in JOIN."
class TestCorrectDataFormat:
def test_correct_data_format(self):
list_data = [["2020-01-01 06:00:01", 23.], ["2020-01-01 06:00:11", 24.], ["2020-01-01 06:00:21", 25.],
["2020-01-01 06:00:31", 26.], ["2020-01-01 06:00:41", 27.], ["2020-01-01 06:00:51", 23.],
{"station": "test_station_001", "author": "ME", "success": True}]
dict_data = correct_data_format(list_data)
assert dict_data == {"datetime": ["2020-01-01 06:00:01", "2020-01-01 06:00:11", "2020-01-01 06:00:21",
"2020-01-01 06:00:31", "2020-01-01 06:00:41", "2020-01-01 06:00:51"],
"values": [23., 24., 25., 26., 27., 23.],
"metadata": {"station": "test_station_001", "author": "ME", "success": True}}
class TestGetData:
def test(self):
opts = {"base": join_url_base, "service": "series", "station_id": 'DEBW107', "network_name": "UBA",
opts = {"base": join_settings()[0], "service": "series", "station_id": 'DEBW107', "network_name": "UBA",
"parameter_name": "o3,no2"}
assert get_data(opts) == [[17057, 'UBA', 'DEBW107', 'O3'], [17058, 'UBA', 'DEBW107', 'NO2']]
assert get_data(opts, headers={}) == [[17057, 'UBA', 'DEBW107', 'O3'], [17058, 'UBA', 'DEBW107', 'NO2']]
class TestLoadSeriesInformation:
def test_standard_query(self):
expected_subset = {'o3': 23031, 'no2': 39002, 'temp--lubw': 17059, 'wspeed': 17060}
assert expected_subset.items() <= load_series_information(['DEBW107'], None, None).items()
assert expected_subset.items() <= load_series_information(['DEBW107'], None, None, join_settings()[0], {}).items()
def test_empty_result(self):
assert load_series_information(['DEBW107'], "traffic", None) == {}
assert load_series_information(['DEBW107'], "traffic", None, join_settings()[0], {}) == {}
class TestSaveToPandas:
......@@ -53,6 +69,10 @@ class TestSaveToPandas:
def date(self):
return ['1997-01-01 00:00', '1997-01-02 00:00', '1997-01-03 00:00', '1997-01-04 00:00']
@pytest.fixture
def date_len19(self):
return ['1997-01-01 00:00:00', '1997-01-02 00:00:00', '1997-01-03 00:00:00', '1997-01-04 00:00:00']
@pytest.fixture
def values(self):
return [86.21, 94.76, 76.96, 99.89]
......@@ -75,6 +95,10 @@ class TestSaveToPandas:
df_concat = pd.concat([create_df, next_df], axis=1)
assert pd.testing.assert_frame_equal(df_concat, _save_to_pandas(create_df, data, 'max', 'temperature')) is None
def test_alternative_date_format(self, date_len19, values, create_df):
data = {'datetime': date_len19, 'mean': values, 'metadata': None}
assert pd.testing.assert_frame_equal(create_df, _save_to_pandas(None, data, 'mean', 'cloudcover')) is None
class TestCorrectStatName:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment