Skip to content
Snippets Groups Projects
Commit 0ec39ab7 authored by lukas leufen's avatar lukas leufen
Browse files

added inverse transformation for dataprep class, setup pytest coverage

parent a2299e50
No related branches found
No related tags found
2 merge requests!6updated inception model and data prep class,!4data prep class
# .coveragerc to control coverage.py
[run]
branch = True
[report]
# Regexes for lines to exclude from consideration
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover
# Don't complain about missing debug-only code:
def __repr__
if self\.debug
# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError
# Don't complain if non-runnable code isn't run:
if 0:
if __name__ == .__main__.:
# Don't complain about import statements
import
......@@ -39,6 +39,7 @@ ehthumbs.db
Thumbs.db
.idea/
/venv/
.coverage
# don't check data and plot folder #
####################################
......
......@@ -6,3 +6,4 @@ pandas==0.25.1
requests==2.22.0
pytest==5.2.1
pytest-lazy-fixture==0.6.1
pytest-cov
\ No newline at end of file
......@@ -113,33 +113,51 @@ class DataPrep(object):
self.data = self.data.interpolate_na(dim=dim, method=method, limit=limit, use_coordinate=use_coordinate,
**kwargs)
def restandardise(self, data, dim='variables', **kwargs):
@staticmethod
def check_inverse_transform_params(mean, std, method) -> None:
msg = ""
if method in ['standardise', 'centre'] and mean is None:
msg += "mean, "
if method == 'standardise' and std is None:
msg += "std, "
if len(msg) > 0:
raise AttributeError(f"Inverse transform {method} can not be executed because following is None: {msg}")
def inverse_transform(self) -> None:
"""
:param data:
:param dim:
:param kwargs:
Perform inverse transformation
:return:
"""
variables = kwargs.get('variables', None)
if variables is None:
return FKf.restandardize(data, mean=self.mean, std=self.std, stand=True)
def f_inverse(data, mean, std, method_inverse):
if method_inverse == 'standardise':
return statistics.standardise_inverse(data, mean, std), None, None
elif method_inverse == 'centre':
return statistics.centre_inverse(data, mean), None, None
elif method_inverse == 'normalise':
raise NotImplementedError
else:
return FKf.restandardize(data,
mean=self.mean.sel({dim: variables}).values,
std=self.std.sel({dim: variables}).values,
stand=True)
raise NotImplementedError
def transform(self, dim: Union[str, int] = 0, method: str = 'standardise') -> None:
if self._transform_method is None:
raise AssertionError("Inverse transformation method is not set. Data cannot be inverse transformed.")
self.check_inverse_transform_params(self.mean, self.std, self._transform_method)
self.data, self.mean, self.std = f_inverse(self.data, self.mean, self.std, self._transform_method)
self._transform_method = None
def transform(self, dim: Union[str, int] = 0, method: str = 'standardise', inverse: bool = False) -> None:
"""
This function transforms a xarray.dataarray (along dim) or pandas.DataFrame (along axis) either with mean=0
and std=1 (`method=standardise`) or centers the data with mean=0 and no change in data scale
(`method=centre`). Furthermore, this sets an internal instance attribute for later inverse transformation
(`method=centre`). Furthermore, this sets an internal instance attribute for later inverse transformation. This
method will raise an AssertionError if an internal transform method was already set ('inverse=False') or if the
internal transform method, internal mean and internal standard deviation weren't set ('inverse=True').
:param string/int dim:
:param string/int dim: This param is not used for inverse transformation.
| for xarray.DataArray as string: name of dimension which should be standardised
| for pandas.DataFrame as int: axis of dimension which should be standardised
:param method:
:param method: Choose the transformation method from 'standardise' and 'centre'. 'normalise' is not implemented
yet. This param is not used for inverse transformation.
:param inverse: Switch between transformation and inverse transformation.
:return: xarray.DataArrays or pandas.DataFrames:
#. mean: Mean of data
#. std: Standard deviation of data
......@@ -157,11 +175,14 @@ class DataPrep(object):
else:
raise NotImplementedError
if not inverse:
if self._transform_method is not None:
raise AssertionError(f"Transform method is already set. Therefore, data was already transformed with "
f"{self._transform_method}. Please perform inverse transformation of data first.")
self._transform_method = method
self.mean, self.std, self.data = f(self.data)
self._transform_method = method
else:
self.inverse_transform()
def make_history_window(self, dim, window):
raise NotImplementedError
......
......@@ -10,7 +10,7 @@ class TestDataPrep:
@pytest.fixture
def data(self):
return DataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], test='testKWARGS',
return DataPrep('test/data/', 'dummy', 'DEBW107', ['o3', 'temp'], test='testKWARGS',
statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'})
def test_init(self, data):
......@@ -50,7 +50,7 @@ class TestDataPrep:
def test_set_file_name_and_meta(self):
d = object.__new__(DataPrep)
d.path = os.path.abspath('data/test')
d.path = os.path.abspath('test/data/test')
d.station = 'TESTSTATION'
d.variables = ['a', 'bc']
assert d._set_file_name() == os.path.join(os.path.abspath(os.path.dirname(__file__)),
......@@ -72,6 +72,18 @@ class TestDataPrep:
assert isinstance(data.mean, xr.DataArray)
assert isinstance(data.std, xr.DataArray)
@pytest.mark.parametrize('mean, std, method, msg', [(10, 3, 'standardise', ''), (6, None, 'standardise', 'std, '),
(None, 3, 'standardise', 'mean, '), (19, None, 'centre', ''),
(None, 2, 'centre', 'mean, '), (8, 2, 'centre', ''),
(None, None, 'standardise', 'mean, std, ')])
def test_check_inverse_transform_params(self, data, mean, std, method, msg):
if len(msg) > 0:
with pytest.raises(AttributeError) as e:
data.check_inverse_transform_params(mean, std, method)
assert msg in e.value.args[0]
else:
assert data.check_inverse_transform_params(mean, std, method) is None
def test_transform_centre(self, data):
assert data._transform_method is None
assert data.mean is None
......@@ -83,3 +95,38 @@ class TestDataPrep:
assert np.testing.assert_almost_equal(data.data.std('datetime').variable.values, data_std_org) is None
assert data.std is None
@pytest.mark.parametrize('method', ['standardise', 'centre'])
def test_transform_inverse(self, data, method):
data_org = data.data
data.transform('datetime', method)
data.inverse_transform()
assert data._transform_method is None
assert data.mean is None
assert data.std is None
assert np.testing.assert_array_almost_equal(data_org, data.data) is None
data.transform('datetime', method)
data.transform('datetime', inverse=True)
assert data._transform_method is None
assert data.mean is None
assert data.std is None
assert np.testing.assert_array_almost_equal(data_org, data.data) is None
@pytest.mark.parametrize('method', ['normalise', 'unknownmethod'])
def test_transform_errors(self, data, method):
with pytest.raises(NotImplementedError):
data.transform('datetime', method)
data._transform_method = method
with pytest.raises(AssertionError) as e:
data.transform('datetime', method)
assert "Transform method is already set." in e.value.args[0]
@pytest.mark.parametrize('method', ['normalise', 'unknownmethod'])
def test_transform_inverse_errors(self, data, method):
with pytest.raises(AssertionError) as e:
data.inverse_transform()
assert "Inverse transformation method is not set." in e.value.args[0]
data.mean = 1
data.std = 1
data._transform_method = method
with pytest.raises(NotImplementedError):
data.inverse_transform()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment