Skip to content
Snippets Groups Projects
Commit 0ec39ab7 authored by lukas leufen's avatar lukas leufen
Browse files

added inverse transformation for dataprep class, setup pytest coverage

parent a2299e50
Branches
Tags
2 merge requests!6updated inception model and data prep class,!4data prep class
# .coveragerc to control coverage.py
[run]
branch = True
[report]
# Regexes for lines to exclude from consideration
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover
# Don't complain about missing debug-only code:
def __repr__
if self\.debug
# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError
# Don't complain if non-runnable code isn't run:
if 0:
if __name__ == .__main__.:
# Don't complain about import statements
import
...@@ -39,6 +39,7 @@ ehthumbs.db ...@@ -39,6 +39,7 @@ ehthumbs.db
Thumbs.db Thumbs.db
.idea/ .idea/
/venv/ /venv/
.coverage
# don't check data and plot folder # # don't check data and plot folder #
#################################### ####################################
......
...@@ -6,3 +6,4 @@ pandas==0.25.1 ...@@ -6,3 +6,4 @@ pandas==0.25.1
requests==2.22.0 requests==2.22.0
pytest==5.2.1 pytest==5.2.1
pytest-lazy-fixture==0.6.1 pytest-lazy-fixture==0.6.1
pytest-cov
\ No newline at end of file
...@@ -113,33 +113,51 @@ class DataPrep(object): ...@@ -113,33 +113,51 @@ class DataPrep(object):
self.data = self.data.interpolate_na(dim=dim, method=method, limit=limit, use_coordinate=use_coordinate, self.data = self.data.interpolate_na(dim=dim, method=method, limit=limit, use_coordinate=use_coordinate,
**kwargs) **kwargs)
def restandardise(self, data, dim='variables', **kwargs): @staticmethod
def check_inverse_transform_params(mean, std, method) -> None:
msg = ""
if method in ['standardise', 'centre'] and mean is None:
msg += "mean, "
if method == 'standardise' and std is None:
msg += "std, "
if len(msg) > 0:
raise AttributeError(f"Inverse transform {method} can not be executed because following is None: {msg}")
def inverse_transform(self) -> None:
""" """
Perform inverse transformation
:param data:
:param dim:
:param kwargs:
:return: :return:
""" """
variables = kwargs.get('variables', None) def f_inverse(data, mean, std, method_inverse):
if variables is None: if method_inverse == 'standardise':
return FKf.restandardize(data, mean=self.mean, std=self.std, stand=True) return statistics.standardise_inverse(data, mean, std), None, None
elif method_inverse == 'centre':
return statistics.centre_inverse(data, mean), None, None
elif method_inverse == 'normalise':
raise NotImplementedError
else: else:
return FKf.restandardize(data, raise NotImplementedError
mean=self.mean.sel({dim: variables}).values,
std=self.std.sel({dim: variables}).values,
stand=True)
def transform(self, dim: Union[str, int] = 0, method: str = 'standardise') -> None: if self._transform_method is None:
raise AssertionError("Inverse transformation method is not set. Data cannot be inverse transformed.")
self.check_inverse_transform_params(self.mean, self.std, self._transform_method)
self.data, self.mean, self.std = f_inverse(self.data, self.mean, self.std, self._transform_method)
self._transform_method = None
def transform(self, dim: Union[str, int] = 0, method: str = 'standardise', inverse: bool = False) -> None:
""" """
This function transforms a xarray.dataarray (along dim) or pandas.DataFrame (along axis) either with mean=0 This function transforms a xarray.dataarray (along dim) or pandas.DataFrame (along axis) either with mean=0
and std=1 (`method=standardise`) or centers the data with mean=0 and no change in data scale and std=1 (`method=standardise`) or centers the data with mean=0 and no change in data scale
(`method=centre`). Furthermore, this sets an internal instance attribute for later inverse transformation (`method=centre`). Furthermore, this sets an internal instance attribute for later inverse transformation. This
method will raise an AssertionError if an internal transform method was already set ('inverse=False') or if the
internal transform method, internal mean and internal standard deviation weren't set ('inverse=True').
:param string/int dim: :param string/int dim: This param is not used for inverse transformation.
| for xarray.DataArray as string: name of dimension which should be standardised | for xarray.DataArray as string: name of dimension which should be standardised
| for pandas.DataFrame as int: axis of dimension which should be standardised | for pandas.DataFrame as int: axis of dimension which should be standardised
:param method: :param method: Choose the transformation method from 'standardise' and 'centre'. 'normalise' is not implemented
yet. This param is not used for inverse transformation.
:param inverse: Switch between transformation and inverse transformation.
:return: xarray.DataArrays or pandas.DataFrames: :return: xarray.DataArrays or pandas.DataFrames:
#. mean: Mean of data #. mean: Mean of data
#. std: Standard deviation of data #. std: Standard deviation of data
...@@ -157,11 +175,14 @@ class DataPrep(object): ...@@ -157,11 +175,14 @@ class DataPrep(object):
else: else:
raise NotImplementedError raise NotImplementedError
if not inverse:
if self._transform_method is not None: if self._transform_method is not None:
raise AssertionError(f"Transform method is already set. Therefore, data was already transformed with " raise AssertionError(f"Transform method is already set. Therefore, data was already transformed with "
f"{self._transform_method}. Please perform inverse transformation of data first.") f"{self._transform_method}. Please perform inverse transformation of data first.")
self._transform_method = method
self.mean, self.std, self.data = f(self.data) self.mean, self.std, self.data = f(self.data)
self._transform_method = method
else:
self.inverse_transform()
def make_history_window(self, dim, window): def make_history_window(self, dim, window):
raise NotImplementedError raise NotImplementedError
......
...@@ -10,7 +10,7 @@ class TestDataPrep: ...@@ -10,7 +10,7 @@ class TestDataPrep:
@pytest.fixture @pytest.fixture
def data(self): def data(self):
return DataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], test='testKWARGS', return DataPrep('test/data/', 'dummy', 'DEBW107', ['o3', 'temp'], test='testKWARGS',
statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
def test_init(self, data): def test_init(self, data):
...@@ -50,7 +50,7 @@ class TestDataPrep: ...@@ -50,7 +50,7 @@ class TestDataPrep:
def test_set_file_name_and_meta(self): def test_set_file_name_and_meta(self):
d = object.__new__(DataPrep) d = object.__new__(DataPrep)
d.path = os.path.abspath('data/test') d.path = os.path.abspath('test/data/test')
d.station = 'TESTSTATION' d.station = 'TESTSTATION'
d.variables = ['a', 'bc'] d.variables = ['a', 'bc']
assert d._set_file_name() == os.path.join(os.path.abspath(os.path.dirname(__file__)), assert d._set_file_name() == os.path.join(os.path.abspath(os.path.dirname(__file__)),
...@@ -72,6 +72,18 @@ class TestDataPrep: ...@@ -72,6 +72,18 @@ class TestDataPrep:
assert isinstance(data.mean, xr.DataArray) assert isinstance(data.mean, xr.DataArray)
assert isinstance(data.std, xr.DataArray) assert isinstance(data.std, xr.DataArray)
@pytest.mark.parametrize('mean, std, method, msg', [(10, 3, 'standardise', ''), (6, None, 'standardise', 'std, '),
(None, 3, 'standardise', 'mean, '), (19, None, 'centre', ''),
(None, 2, 'centre', 'mean, '), (8, 2, 'centre', ''),
(None, None, 'standardise', 'mean, std, ')])
def test_check_inverse_transform_params(self, data, mean, std, method, msg):
if len(msg) > 0:
with pytest.raises(AttributeError) as e:
data.check_inverse_transform_params(mean, std, method)
assert msg in e.value.args[0]
else:
assert data.check_inverse_transform_params(mean, std, method) is None
def test_transform_centre(self, data): def test_transform_centre(self, data):
assert data._transform_method is None assert data._transform_method is None
assert data.mean is None assert data.mean is None
...@@ -83,3 +95,38 @@ class TestDataPrep: ...@@ -83,3 +95,38 @@ class TestDataPrep:
assert np.testing.assert_almost_equal(data.data.std('datetime').variable.values, data_std_org) is None assert np.testing.assert_almost_equal(data.data.std('datetime').variable.values, data_std_org) is None
assert data.std is None assert data.std is None
@pytest.mark.parametrize('method', ['standardise', 'centre'])
def test_transform_inverse(self, data, method):
data_org = data.data
data.transform('datetime', method)
data.inverse_transform()
assert data._transform_method is None
assert data.mean is None
assert data.std is None
assert np.testing.assert_array_almost_equal(data_org, data.data) is None
data.transform('datetime', method)
data.transform('datetime', inverse=True)
assert data._transform_method is None
assert data.mean is None
assert data.std is None
assert np.testing.assert_array_almost_equal(data_org, data.data) is None
@pytest.mark.parametrize('method', ['normalise', 'unknownmethod'])
def test_transform_errors(self, data, method):
with pytest.raises(NotImplementedError):
data.transform('datetime', method)
data._transform_method = method
with pytest.raises(AssertionError) as e:
data.transform('datetime', method)
assert "Transform method is already set." in e.value.args[0]
@pytest.mark.parametrize('method', ['normalise', 'unknownmethod'])
def test_transform_inverse_errors(self, data, method):
with pytest.raises(AssertionError) as e:
data.inverse_transform()
assert "Inverse transformation method is not set." in e.value.args[0]
data.mean = 1
data.std = 1
data._transform_method = method
with pytest.raises(NotImplementedError):
data.inverse_transform()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment