Skip to content
Snippets Groups Projects
Select Git revision
  • cdda7dfa6e30858b2f4ff8b8476690573f9e2cc1
  • master default protected
  • 2020
  • 2021
4 results

assign-1.csv

Blame
  • test_statistics.py 6.66 KiB
    import numpy as np
    import pandas as pd
    import pytest
    import xarray as xr
    
    from mlair.helpers.statistics import DataClass, TransformationClass
    from mlair.helpers.statistics import standardise, standardise_inverse, standardise_apply, centre, centre_inverse, \
        centre_apply, \
        apply_inverse_transformation
    
    lazy = pytest.lazy_fixture
    
    
    @pytest.fixture(scope='module')
    def input_data():
        return np.array([np.random.normal(2, 2, 3000),
                         np.random.normal(-5, 3, 3000),
                         np.random.normal(10, 1, 3000)]).T
    
    
    @pytest.fixture(scope='module')
    def pandas(input_data):
        return pd.DataFrame(input_data)
    
    
    @pytest.fixture(scope='module')
    def pd_mean():
        return [2, 10, 3]
    
    
    @pytest.fixture(scope='module')
    def pd_std():
        return [3, 2, 3]
    
    
    @pytest.fixture(scope='module')
    def xarray(input_data):
        shape = input_data.shape
        coords = {'index': range(shape[0]), 'value': range(shape[1])}
        return xr.DataArray(input_data, coords=coords, dims=coords.keys())
    
    
    @pytest.fixture(scope='module')
    def xr_mean(input_data):
        return xr.DataArray([2, 10, 3], coords={'value': range(3)}, dims=['value'])
    
    
    @pytest.fixture(scope='module')
    def xr_std(input_data):
        return xr.DataArray([3, 2, 3], coords={'value': range(3)}, dims=['value'])
    
    
    class TestStandardise:
    
        @pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
                                                    (lazy('xarray'), 'index')])
        def test_standardise(self, data_orig, dim):
            mean, std, data = standardise(data_orig, dim)
            assert np.testing.assert_almost_equal(mean, [2, -5, 10], decimal=1) is None
            assert np.testing.assert_almost_equal(std, [2, 3, 1], decimal=1) is None
            assert np.testing.assert_almost_equal(data.mean(dim), [0, 0, 0]) is None
            assert np.testing.assert_almost_equal(data.std(dim), [1, 1, 1]) is None
    
        @pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
                                                    (lazy('xarray'), 'index')])
        def test_standardise_inverse(self, data_orig, dim):
            mean, std, data = standardise(data_orig, dim)
            data_recovered = standardise_inverse(data, mean, std)
            assert np.testing.assert_array_almost_equal(data_orig, data_recovered) is None
    
        @pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
                                                    (lazy('xarray'), 'index')])
        def test_apply_standardise_inverse(self, data_orig, dim):
            mean, std, data = standardise(data_orig, dim)
            data_recovered = apply_inverse_transformation(data, mean, std)
            assert np.testing.assert_array_almost_equal(data_orig, data_recovered) is None
    
        @pytest.mark.parametrize('data_orig, mean, std, dim', [(lazy('pandas'), lazy('pd_mean'), lazy('pd_std'), 0),
                                                               (lazy('xarray'), lazy('xr_mean'), lazy('xr_std'), 'index')])
        def test_standardise_apply(self, data_orig, mean, std, dim):
            data = standardise_apply(data_orig, mean, std)
            mean_expected = (np.array([2, -5, 10]) - np.array([2, 10, 3])) / np.array([3, 2, 3])
            std_expected = np.array([2, 3, 1]) / np.array([3, 2, 3])
            assert np.testing.assert_almost_equal(data.mean(dim), mean_expected, decimal=1) is None
            assert np.testing.assert_almost_equal(data.std(dim), std_expected, decimal=1) is None
    
    
    class TestCentre:
    
        @pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
                                                    (lazy('xarray'), 'index')])
        def test_centre(self, data_orig, dim):
            mean, std, data = centre(data_orig, dim)
            assert np.testing.assert_almost_equal(mean, [2, -5, 10], decimal=1) is None
            assert std is None
            assert np.testing.assert_almost_equal(data.mean(dim), [0, 0, 0]) is None
    
        @pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
                                                    (lazy('xarray'), 'index')])
        def test_centre_inverse(self, data_orig, dim):
            mean, _, data = centre(data_orig, dim)
            data_recovered = centre_inverse(data, mean)
            assert np.testing.assert_array_almost_equal(data_orig, data_recovered) is None
    
        @pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
                                                    (lazy('xarray'), 'index')])
        def test_apply_centre_inverse(self, data_orig, dim):
            mean, _, data = centre(data_orig, dim)
            data_recovered = apply_inverse_transformation(data, mean, method="centre")
            assert np.testing.assert_array_almost_equal(data_orig, data_recovered) is None
    
        @pytest.mark.parametrize('data_orig, mean, dim', [(lazy('pandas'), lazy('pd_mean'), 0),
                                                          (lazy('xarray'), lazy('xr_mean'), 'index')])
        def test_centre_apply(self, data_orig, mean, dim):
            data = centre_apply(data_orig, mean)
            mean_expected = np.array([2, -5, 10]) - np.array([2, 10, 3])
            assert np.testing.assert_almost_equal(data.mean(dim), mean_expected, decimal=1) is None
    
    
    class TestDataClass:
    
        def test_init(self):
            dc = DataClass()
            assert all([obj is None for obj in [dc.data, dc.mean, dc.std, dc.max, dc.min, dc.transform_method, dc._method]])
    
        def test_init_values(self):
            dc = DataClass(data=12, mean=2, std="test", max=23.4, min=np.array([3]), transform_method="f")
            assert dc.data == 12
            assert dc.mean == 2
            assert dc.std == "test"
            assert dc.max == 23.4
            assert np.testing.assert_array_equal(dc.min, np.array([3])) is None
            assert dc.transform_method == "f"
            assert dc._method is None
    
        def test_as_dict(self):
            dc = DataClass(std=23)
            dc._method = "f(x)"
            assert dc.as_dict() == {"data": None, "mean": None, "std": 23, "max": None, "min": None,
                                    "transform_method": None}
    
    
    class TestTransformationClass:
    
        def test_init(self):
            tc = TransformationClass()
            assert hasattr(tc, "inputs")
            assert isinstance(tc.inputs, DataClass)
            assert hasattr(tc, "targets")
            assert isinstance(tc.targets, DataClass)
            assert tc.inputs.mean is None
            assert tc.targets.std is None
    
        def test_init_values(self):
            tc = TransformationClass(inputs_mean=1, inputs_std=2, inputs_method="f", targets_mean=3, targets_std=4,
                                     targets_method="g")
            assert tc.inputs.mean == 1
            assert tc.inputs.std == 2
            assert tc.inputs.transform_method == "f"
            assert tc.inputs.max is None
            assert tc.targets.mean == 3
            assert tc.targets.std == 4
            assert tc.targets.transform_method == "g"
            assert tc.inputs.min is None