From c91b77fe330f591c364a53cea6546cd998c7aba7 Mon Sep 17 00:00:00 2001 From: lukas leufen <l.leufen@fz.juelich.de> Date: Wed, 23 Oct 2019 14:30:23 +0200 Subject: [PATCH] introduced new module statistics with the methods standardise and centre --- requirements.txt | 8 +++++--- src/statistics.py | 41 +++++++++++++++++++++++++++++++++++++++ test/test_statistics.py | 43 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 src/statistics.py create mode 100644 test/test_statistics.py diff --git a/requirements.txt b/requirements.txt index 3bf05cf6..e07c28b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ Keras==2.2.4 numpy==1.15.4 tensorflow==1.12.0 -xarray -pandas -requests \ No newline at end of file +xarray==0.14.0 +pandas==0.25.1 +requests==2.22.0 +pytest==5.2.1 +pytest-lazy-fixture==0.6.1 \ No newline at end of file diff --git a/src/statistics.py b/src/statistics.py new file mode 100644 index 00000000..5a3c4a65 --- /dev/null +++ b/src/statistics.py @@ -0,0 +1,41 @@ +__author__ = 'Lukas Leufen' +__date__ = '2019-10-23' + +import xarray as xr +import pandas as pd +from typing import Union, Tuple + + +Data = Union[xr.DataArray, pd.DataFrame] + + +def standardise(data: Data, dim: Union[str, int]) -> Tuple[Data, Data, Data]: + """ + This function standardises a xarray.dataarray (along dim) or pandas.DataFrame (along axis) with mean=0 and std=1 + :param data: + :param string/int dim: + | for xarray.DataArray as string: name of dimension which should be standardised + | for pandas.DataFrame as int: axis of dimension which should be standardised + :return: xarray.DataArrays or pandas.DataFrames: + #. mean: Mean of data + #. std: Standard deviation of data + #. data: Standardised data + """ + + return data.mean(dim), data.std(dim), (data - data.mean(dim)) / data.std(dim) + + +def centre(data: Data, dim: Union[str, int]) -> Tuple[Data, None, Data]: + """ + This function centres a xarray.dataarray (along dim) or pandas.DataFrame (along axis) to mean=0 + :param data: + :param string/int dim: + | for xarray.DataArray as string: name of dimension which should be standardised + | for pandas.DataFrame as int: axis of dimension which should be standardised + :return: xarray.DataArrays or pandas.DataFrames: + #. mean: Mean of data + #. std: Standard deviation of data + #. data: Standardised data + """ + + return data.mean(dim), None, data - data.mean(dim) diff --git a/test/test_statistics.py b/test/test_statistics.py new file mode 100644 index 00000000..518d817f --- /dev/null +++ b/test/test_statistics.py @@ -0,0 +1,43 @@ +import pytest +import xarray as xr +import pandas as pd +import numpy as np +from src.statistics import standardise, centre + + +@pytest.fixture(scope='module') +def input_data(): + return np.array([np.random.normal(2, 2, 2000), + np.random.normal(-5, 3, 2000), + np.random.normal(10, 1, 2000)]).T + + +@pytest.fixture(scope='module') +def pandas(input_data): + return pd.DataFrame(input_data) + + +@pytest.fixture(scope='module') +def xarray(input_data): + return xr.DataArray(input_data, dims=['index', 'value']) + + +class TestStandardise: + + @pytest.mark.parametrize('data_org, dim', [(pytest.lazy_fixture('pandas'), 0), (pytest.lazy_fixture('xarray'), 'index')]) + def test_standardise(self, data_org, dim): + mean, std, data = standardise(data_org, dim) + assert np.testing.assert_almost_equal(mean, [2, -5, 10], decimal=1) is None + assert np.testing.assert_almost_equal(std, [2, 3, 1], decimal=1) is None + assert np.testing.assert_almost_equal(data.mean(dim), [0, 0, 0]) is None + assert np.testing.assert_almost_equal(data.std(dim), [1, 1, 1]) is None + + +class TestCentre: + + @pytest.mark.parametrize('data_org, dim', [(pytest.lazy_fixture('pandas'), 0), (pytest.lazy_fixture('xarray'), 'index')]) + def test_centre(self, data_org, dim): + mean, std, data = centre(data_org, dim) + assert np.testing.assert_almost_equal(mean, [2, -5, 10], decimal=1) is None + assert std is None + assert np.testing.assert_almost_equal(data.mean(dim), [0, 0, 0]) is None -- GitLab