From c91b77fe330f591c364a53cea6546cd998c7aba7 Mon Sep 17 00:00:00 2001
From: lukas leufen <l.leufen@fz.juelich.de>
Date: Wed, 23 Oct 2019 14:30:23 +0200
Subject: [PATCH] introduced new module statistics with the methods standardise
 and centre

---
 requirements.txt        |  8 +++++---
 src/statistics.py       | 41 +++++++++++++++++++++++++++++++++++++++
 test/test_statistics.py | 43 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 89 insertions(+), 3 deletions(-)
 create mode 100644 src/statistics.py
 create mode 100644 test/test_statistics.py

diff --git a/requirements.txt b/requirements.txt
index 3bf05cf6..e07c28b5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,8 @@
 Keras==2.2.4
 numpy==1.15.4
 tensorflow==1.12.0
-xarray
-pandas
-requests
\ No newline at end of file
+xarray==0.14.0
+pandas==0.25.1
+requests==2.22.0
+pytest==5.2.1
+pytest-lazy-fixture==0.6.1
\ No newline at end of file
diff --git a/src/statistics.py b/src/statistics.py
new file mode 100644
index 00000000..5a3c4a65
--- /dev/null
+++ b/src/statistics.py
@@ -0,0 +1,41 @@
+__author__ = 'Lukas Leufen'
+__date__ = '2019-10-23'
+
+import xarray as xr
+import pandas as pd
+from typing import Union, Tuple
+
+
+Data = Union[xr.DataArray, pd.DataFrame]
+
+
+def standardise(data: Data, dim: Union[str, int]) -> Tuple[Data, Data, Data]:
+    """
+    This function standardises a xarray.dataarray (along dim) or pandas.DataFrame (along axis) with mean=0 and std=1
+    :param data:
+    :param string/int dim:
+            | for xarray.DataArray as string: name of dimension which should be standardised
+            | for pandas.DataFrame as int: axis of dimension which should be standardised
+    :return: xarray.DataArrays or pandas.DataFrames:
+            #. mean: Mean of data
+            #. std: Standard deviation of data
+            #. data: Standardised data
+    """
+
+    return data.mean(dim), data.std(dim), (data - data.mean(dim)) / data.std(dim)
+
+
+def centre(data: Data, dim: Union[str, int]) -> Tuple[Data, None, Data]:
+    """
+    This function centres a xarray.dataarray (along dim) or pandas.DataFrame (along axis) to mean=0
+    :param data:
+    :param string/int dim:
+            | for xarray.DataArray as string: name of dimension which should be standardised
+            | for pandas.DataFrame as int: axis of dimension which should be standardised
+    :return: xarray.DataArrays or pandas.DataFrames:
+            #. mean: Mean of data
+            #. std: Standard deviation of data
+            #. data: Standardised data
+    """
+
+    return data.mean(dim), None, data - data.mean(dim)
diff --git a/test/test_statistics.py b/test/test_statistics.py
new file mode 100644
index 00000000..518d817f
--- /dev/null
+++ b/test/test_statistics.py
@@ -0,0 +1,43 @@
+import pytest
+import xarray as xr
+import pandas as pd
+import numpy as np
+from src.statistics import standardise, centre
+
+
+@pytest.fixture(scope='module')
+def input_data():
+    return np.array([np.random.normal(2, 2, 2000),
+                     np.random.normal(-5, 3, 2000),
+                     np.random.normal(10, 1, 2000)]).T
+
+
+@pytest.fixture(scope='module')
+def pandas(input_data):
+    return pd.DataFrame(input_data)
+
+
+@pytest.fixture(scope='module')
+def xarray(input_data):
+    return xr.DataArray(input_data, dims=['index', 'value'])
+
+
+class TestStandardise:
+
+    @pytest.mark.parametrize('data_org, dim', [(pytest.lazy_fixture('pandas'), 0), (pytest.lazy_fixture('xarray'), 'index')])
+    def test_standardise(self, data_org, dim):
+        mean, std, data = standardise(data_org, dim)
+        assert np.testing.assert_almost_equal(mean, [2, -5, 10], decimal=1) is None
+        assert np.testing.assert_almost_equal(std, [2, 3, 1], decimal=1) is None
+        assert np.testing.assert_almost_equal(data.mean(dim), [0, 0, 0]) is None
+        assert np.testing.assert_almost_equal(data.std(dim), [1, 1, 1]) is None
+
+
+class TestCentre:
+
+    @pytest.mark.parametrize('data_org, dim', [(pytest.lazy_fixture('pandas'), 0), (pytest.lazy_fixture('xarray'), 'index')])
+    def test_centre(self, data_org, dim):
+        mean, std, data = centre(data_org, dim)
+        assert np.testing.assert_almost_equal(mean, [2, -5, 10], decimal=1) is None
+        assert std is None
+        assert np.testing.assert_almost_equal(data.mean(dim), [0, 0, 0]) is None
-- 
GitLab