Skip to content
Snippets Groups Projects
Commit 1754830c authored by lukas leufen's avatar lukas leufen
Browse files

update on data prep tests

parent af1ecb8a
Branches
Tags
2 merge requests!37include new development,!33Lukas issue036 feat local temp data storage
Pipeline #29107 passed
...@@ -108,7 +108,7 @@ class DataPrep(object): ...@@ -108,7 +108,7 @@ class DataPrep(object):
check_dict = {"station_type": self.station_type, "network_name": self.network} check_dict = {"station_type": self.station_type, "network_name": self.network}
for (k, v) in check_dict.items(): for (k, v) in check_dict.items():
if self.meta.at[k, self.station[0]] != v: if self.meta.at[k, self.station[0]] != v:
logging.debug(f"meta data does not agree which given request for {k}: {v} (requested) != " logging.debug(f"meta data does not agree with given request for {k}: {v} (requested) != "
f"{self.meta.at[k, self.station[0]]} (local). Raise FileNotFoundError to trigger new " f"{self.meta.at[k, self.station[0]]} (local). Raise FileNotFoundError to trigger new "
f"grapping from web.") f"grapping from web.")
raise FileNotFoundError raise FileNotFoundError
......
...@@ -7,6 +7,8 @@ import xarray as xr ...@@ -7,6 +7,8 @@ import xarray as xr
import datetime as dt import datetime as dt
import pandas as pd import pandas as pd
from operator import itemgetter from operator import itemgetter
import logging
from src.helpers import PyTestRegex
class TestDataPrep: class TestDataPrep:
...@@ -17,6 +19,17 @@ class TestDataPrep: ...@@ -17,6 +19,17 @@ class TestDataPrep:
station_type='background', test='testKWARGS', station_type='background', test='testKWARGS',
statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
@pytest.fixture
def data_prep_no_init(self):
d = object.__new__(DataPrep)
d.path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
d.network = 'UBA'
d.station = ['DEBW107']
d.variables = ['o3', 'temp']
d.station_type = "background"
d.kwargs = None
return d
def test_init(self, data): def test_init(self, data):
assert data.path == os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') assert data.path == os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
assert data.network == 'AIRBASE' assert data.network == 'AIRBASE'
...@@ -31,16 +44,79 @@ class TestDataPrep: ...@@ -31,16 +44,79 @@ class TestDataPrep:
with pytest.raises(NotImplementedError): with pytest.raises(NotImplementedError):
DataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp']) DataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'])
def test_repr(self): def test_download_data(self, data_prep_no_init):
d = object.__new__(DataPrep) file_name = data_prep_no_init._set_file_name()
d.path = 'data/test' meta_file = data_prep_no_init._set_meta_file_name()
d.network = 'dummy' data_prep_no_init.kwargs = {"store_data_locally": False}
d.station = ['DEBW107'] data_prep_no_init.statistics_per_var = {'o3': 'dma8eu', 'temp': 'maximum'}
d.variables = ['o3', 'temp'] data_prep_no_init.download_data(file_name, meta_file)
d.station_type = "traffic" assert isinstance(data_prep_no_init.data, xr.DataArray)
d.kwargs = None
assert d.__repr__().rstrip() == "Dataprep(path='data/test', network='dummy', station=['DEBW107'], "\ def test_download_data_from_join(self, data_prep_no_init):
"variables=['o3', 'temp'], station_type=traffic, **None)".rstrip() file_name = data_prep_no_init._set_file_name()
meta_file = data_prep_no_init._set_meta_file_name()
data_prep_no_init.kwargs = {"store_data_locally": False}
data_prep_no_init.statistics_per_var = {'o3': 'dma8eu', 'temp': 'maximum'}
xarr, meta = data_prep_no_init.download_data_from_join(file_name, meta_file)
assert isinstance(xarr, xr.DataArray)
assert isinstance(meta, pd.DataFrame)
def test_check_station_meta(self, caplog, data_prep_no_init):
caplog.set_level(logging.DEBUG)
file_name = data_prep_no_init._set_file_name()
meta_file = data_prep_no_init._set_meta_file_name()
data_prep_no_init.kwargs = {"store_data_locally": False}
data_prep_no_init.statistics_per_var = {'o3': 'dma8eu', 'temp': 'maximum'}
data_prep_no_init.download_data(file_name, meta_file)
assert data_prep_no_init.check_station_meta() is None
data_prep_no_init.station_type = "traffic"
with pytest.raises(FileNotFoundError) as e:
data_prep_no_init.check_station_meta()
msg = "meta data does not agree with given request for station_type: traffic (requested) != background (local)"
assert caplog.record_tuples[-1][:-1] == ('root', 10)
assert msg in caplog.record_tuples[-1][-1]
def test_load_data_overwrite_local_data(self, data_prep_no_init):
data_prep_no_init.statistics_per_var = {'o3': 'dma8eu', 'temp': 'maximum'}
file_path = data_prep_no_init._set_file_name()
meta_file_path = data_prep_no_init._set_meta_file_name()
os.remove(file_path)
os.remove(meta_file_path)
assert not os.path.exists(file_path)
assert not os.path.exists(meta_file_path)
data_prep_no_init.kwargs = {"overwrite_local_data": True}
data_prep_no_init.load_data()
assert os.path.exists(file_path)
assert os.path.exists(meta_file_path)
t = os.stat(file_path).st_ctime
tm = os.stat(meta_file_path).st_ctime
data_prep_no_init.load_data()
assert os.path.exists(file_path)
assert os.path.exists(meta_file_path)
assert os.stat(file_path).st_ctime > t
assert os.stat(meta_file_path).st_ctime > tm
assert isinstance(data_prep_no_init.data, xr.DataArray)
assert isinstance(data_prep_no_init.meta, pd.DataFrame)
def test_load_data_keep_local_data(self, data_prep_no_init):
data_prep_no_init.statistics_per_var = {'o3': 'dma8eu', 'temp': 'maximum'}
data_prep_no_init.station_type = None
data_prep_no_init.kwargs = {}
file_path = data_prep_no_init._set_file_name()
data_prep_no_init.load_data()
assert os.path.exists(file_path)
t = os.stat(file_path).st_ctime
data_prep_no_init.load_data()
assert os.path.exists(data_prep_no_init._set_file_name())
assert os.stat(file_path).st_ctime == t
assert isinstance(data_prep_no_init.data, xr.DataArray)
assert isinstance(data_prep_no_init.meta, pd.DataFrame)
def test_repr(self, data_prep_no_init):
path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
assert data_prep_no_init.__repr__().rstrip() == f"Dataprep(path='{path}', network='UBA', " \
f"station=['DEBW107'], variables=['o3', 'temp'], " \
f"station_type=background, **None)".rstrip()
def test_set_file_name_and_meta(self): def test_set_file_name_and_meta(self):
d = object.__new__(DataPrep) d = object.__new__(DataPrep)
...@@ -133,6 +209,16 @@ class TestDataPrep: ...@@ -133,6 +209,16 @@ class TestDataPrep:
with pytest.raises(NotImplementedError): with pytest.raises(NotImplementedError):
data.inverse_transform() data.inverse_transform()
def test_get_transformation_information(self, data):
assert (None, None, None) == data.get_transformation_information("o3")
mean_test = data.data.mean("datetime").sel(variables='o3').values
std_test = data.data.std("datetime").sel(variables='o3').values
data.transform('datetime')
mean, std, info = data.get_transformation_information("o3")
assert np.testing.assert_almost_equal(mean, mean_test) is None
assert np.testing.assert_almost_equal(std, std_test) is None
assert info == "standardise"
def test_nan_remove_no_hist_or_label(self, data): def test_nan_remove_no_hist_or_label(self, data):
assert data.history is None assert data.history is None
assert data.label is None assert data.label is None
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment