diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000000000000000000000000000000000000..baa10de454893675aeedc6275e2c6725b0b84966 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,25 @@ +# .coveragerc to control coverage.py +[run] +branch = True + + +[report] +# Regexes for lines to exclude from consideration +exclude_lines = + # Have to re-enable the standard pragma + pragma: no cover + + # Don't complain about missing debug-only code: + def __repr__ + if self\.debug + + # Don't complain if tests don't hit defensive assertion code: + raise AssertionError + raise NotImplementedError + + # Don't complain if non-runnable code isn't run: + if 0: + if __name__ == .__main__.: + + # Don't complain about import statements + import diff --git a/.gitignore b/.gitignore index 22113e4edc78c3bd956b15cd31b80ecc26754c46..d115e562c57ec8f353f32944bdcb38e68e89edc7 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ ehthumbs.db Thumbs.db .idea/ /venv/ +.coverage # don't check data and plot folder # #################################### diff --git a/requirements.txt b/requirements.txt index e07c28b5540aed318aa2a0fbfe73fc9085bdefc0..d04862751c0a737c89e991c6fef365a11db840d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ xarray==0.14.0 pandas==0.25.1 requests==2.22.0 pytest==5.2.1 -pytest-lazy-fixture==0.6.1 \ No newline at end of file +pytest-lazy-fixture==0.6.1 +pytest-cov \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/data_preparation.py b/src/data_preparation.py index 34ff8dfb00b89fdee6494273503e47132a48a61f..23f4450e07feb9f67806987540713e125989ff38 100644 --- a/src/data_preparation.py +++ b/src/data_preparation.py @@ -113,33 +113,51 @@ class DataPrep(object): self.data = self.data.interpolate_na(dim=dim, method=method, limit=limit, use_coordinate=use_coordinate, **kwargs) - def restandardise(self, data, dim='variables', **kwargs): + @staticmethod + def check_inverse_transform_params(mean, std, method) -> None: + msg = "" + if method in ['standardise', 'centre'] and mean is None: + msg += "mean, " + if method == 'standardise' and std is None: + msg += "std, " + if len(msg) > 0: + raise AttributeError(f"Inverse transform {method} can not be executed because following is None: {msg}") + + def inverse_transform(self) -> None: """ - - :param data: - :param dim: - :param kwargs: + Perform inverse transformation :return: """ - variables = kwargs.get('variables', None) - if variables is None: - return FKf.restandardize(data, mean=self.mean, std=self.std, stand=True) - else: - return FKf.restandardize(data, - mean=self.mean.sel({dim: variables}).values, - std=self.std.sel({dim: variables}).values, - stand=True) + def f_inverse(data, mean, std, method_inverse): + if method_inverse == 'standardise': + return statistics.standardise_inverse(data, mean, std), None, None + elif method_inverse == 'centre': + return statistics.centre_inverse(data, mean), None, None + elif method_inverse == 'normalise': + raise NotImplementedError + else: + raise NotImplementedError + + if self._transform_method is None: + raise AssertionError("Inverse transformation method is not set. Data cannot be inverse transformed.") + self.check_inverse_transform_params(self.mean, self.std, self._transform_method) + self.data, self.mean, self.std = f_inverse(self.data, self.mean, self.std, self._transform_method) + self._transform_method = None - def transform(self, dim: Union[str, int] = 0, method: str = 'standardise') -> None: + def transform(self, dim: Union[str, int] = 0, method: str = 'standardise', inverse: bool = False) -> None: """ This function transforms a xarray.dataarray (along dim) or pandas.DataFrame (along axis) either with mean=0 and std=1 (`method=standardise`) or centers the data with mean=0 and no change in data scale - (`method=centre`). Furthermore, this sets an internal instance attribute for later inverse transformation + (`method=centre`). Furthermore, this sets an internal instance attribute for later inverse transformation. This + method will raise an AssertionError if an internal transform method was already set ('inverse=False') or if the + internal transform method, internal mean and internal standard deviation weren't set ('inverse=True'). - :param string/int dim: + :param string/int dim: This param is not used for inverse transformation. | for xarray.DataArray as string: name of dimension which should be standardised | for pandas.DataFrame as int: axis of dimension which should be standardised - :param method: + :param method: Choose the transformation method from 'standardise' and 'centre'. 'normalise' is not implemented + yet. This param is not used for inverse transformation. + :param inverse: Switch between transformation and inverse transformation. :return: xarray.DataArrays or pandas.DataFrames: #. mean: Mean of data #. std: Standard deviation of data @@ -157,11 +175,14 @@ class DataPrep(object): else: raise NotImplementedError - if self._transform_method is not None: - raise AssertionError(f"Transform method is already set. Therefore, data was already transformed with " - f"{self._transform_method}. Please perform inverse transformation of data first.") - self._transform_method = method - self.mean, self.std, self.data = f(self.data) + if not inverse: + if self._transform_method is not None: + raise AssertionError(f"Transform method is already set. Therefore, data was already transformed with " + f"{self._transform_method}. Please perform inverse transformation of data first.") + self.mean, self.std, self.data = f(self.data) + self._transform_method = method + else: + self.inverse_transform() def make_history_window(self, dim, window): raise NotImplementedError diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/test/test_data_preparation.py b/test/test_data_preparation.py index 7283196fe479f1aecee84c8491d956ac57a097fe..25df36a6509405ce7deaef8f7ed3a403bb12d3b6 100644 --- a/test/test_data_preparation.py +++ b/test/test_data_preparation.py @@ -10,7 +10,7 @@ class TestDataPrep: @pytest.fixture def data(self): - return DataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], test='testKWARGS', + return DataPrep('test/data/', 'dummy', 'DEBW107', ['o3', 'temp'], test='testKWARGS', statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) def test_init(self, data): @@ -50,7 +50,7 @@ class TestDataPrep: def test_set_file_name_and_meta(self): d = object.__new__(DataPrep) - d.path = os.path.abspath('data/test') + d.path = os.path.abspath('test/data/test') d.station = 'TESTSTATION' d.variables = ['a', 'bc'] assert d._set_file_name() == os.path.join(os.path.abspath(os.path.dirname(__file__)), @@ -72,6 +72,18 @@ class TestDataPrep: assert isinstance(data.mean, xr.DataArray) assert isinstance(data.std, xr.DataArray) + @pytest.mark.parametrize('mean, std, method, msg', [(10, 3, 'standardise', ''), (6, None, 'standardise', 'std, '), + (None, 3, 'standardise', 'mean, '), (19, None, 'centre', ''), + (None, 2, 'centre', 'mean, '), (8, 2, 'centre', ''), + (None, None, 'standardise', 'mean, std, ')]) + def test_check_inverse_transform_params(self, data, mean, std, method, msg): + if len(msg) > 0: + with pytest.raises(AttributeError) as e: + data.check_inverse_transform_params(mean, std, method) + assert msg in e.value.args[0] + else: + assert data.check_inverse_transform_params(mean, std, method) is None + def test_transform_centre(self, data): assert data._transform_method is None assert data.mean is None @@ -83,3 +95,38 @@ class TestDataPrep: assert np.testing.assert_almost_equal(data.data.std('datetime').variable.values, data_std_org) is None assert data.std is None + @pytest.mark.parametrize('method', ['standardise', 'centre']) + def test_transform_inverse(self, data, method): + data_org = data.data + data.transform('datetime', method) + data.inverse_transform() + assert data._transform_method is None + assert data.mean is None + assert data.std is None + assert np.testing.assert_array_almost_equal(data_org, data.data) is None + data.transform('datetime', method) + data.transform('datetime', inverse=True) + assert data._transform_method is None + assert data.mean is None + assert data.std is None + assert np.testing.assert_array_almost_equal(data_org, data.data) is None + + @pytest.mark.parametrize('method', ['normalise', 'unknownmethod']) + def test_transform_errors(self, data, method): + with pytest.raises(NotImplementedError): + data.transform('datetime', method) + data._transform_method = method + with pytest.raises(AssertionError) as e: + data.transform('datetime', method) + assert "Transform method is already set." in e.value.args[0] + + @pytest.mark.parametrize('method', ['normalise', 'unknownmethod']) + def test_transform_inverse_errors(self, data, method): + with pytest.raises(AssertionError) as e: + data.inverse_transform() + assert "Inverse transformation method is not set." in e.value.args[0] + data.mean = 1 + data.std = 1 + data._transform_method = method + with pytest.raises(NotImplementedError): + data.inverse_transform()