Select Git revision
assign-1.csv
test_statistics.py 6.66 KiB
import numpy as np
import pandas as pd
import pytest
import xarray as xr
from mlair.helpers.statistics import DataClass, TransformationClass
from mlair.helpers.statistics import standardise, standardise_inverse, standardise_apply, centre, centre_inverse, \
centre_apply, \
apply_inverse_transformation
lazy = pytest.lazy_fixture
@pytest.fixture(scope='module')
def input_data():
return np.array([np.random.normal(2, 2, 3000),
np.random.normal(-5, 3, 3000),
np.random.normal(10, 1, 3000)]).T
@pytest.fixture(scope='module')
def pandas(input_data):
return pd.DataFrame(input_data)
@pytest.fixture(scope='module')
def pd_mean():
return [2, 10, 3]
@pytest.fixture(scope='module')
def pd_std():
return [3, 2, 3]
@pytest.fixture(scope='module')
def xarray(input_data):
shape = input_data.shape
coords = {'index': range(shape[0]), 'value': range(shape[1])}
return xr.DataArray(input_data, coords=coords, dims=coords.keys())
@pytest.fixture(scope='module')
def xr_mean(input_data):
return xr.DataArray([2, 10, 3], coords={'value': range(3)}, dims=['value'])
@pytest.fixture(scope='module')
def xr_std(input_data):
return xr.DataArray([3, 2, 3], coords={'value': range(3)}, dims=['value'])
class TestStandardise:
@pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
(lazy('xarray'), 'index')])
def test_standardise(self, data_orig, dim):
mean, std, data = standardise(data_orig, dim)
assert np.testing.assert_almost_equal(mean, [2, -5, 10], decimal=1) is None
assert np.testing.assert_almost_equal(std, [2, 3, 1], decimal=1) is None
assert np.testing.assert_almost_equal(data.mean(dim), [0, 0, 0]) is None
assert np.testing.assert_almost_equal(data.std(dim), [1, 1, 1]) is None
@pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
(lazy('xarray'), 'index')])
def test_standardise_inverse(self, data_orig, dim):
mean, std, data = standardise(data_orig, dim)
data_recovered = standardise_inverse(data, mean, std)
assert np.testing.assert_array_almost_equal(data_orig, data_recovered) is None
@pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
(lazy('xarray'), 'index')])
def test_apply_standardise_inverse(self, data_orig, dim):
mean, std, data = standardise(data_orig, dim)
data_recovered = apply_inverse_transformation(data, mean, std)
assert np.testing.assert_array_almost_equal(data_orig, data_recovered) is None
@pytest.mark.parametrize('data_orig, mean, std, dim', [(lazy('pandas'), lazy('pd_mean'), lazy('pd_std'), 0),
(lazy('xarray'), lazy('xr_mean'), lazy('xr_std'), 'index')])
def test_standardise_apply(self, data_orig, mean, std, dim):
data = standardise_apply(data_orig, mean, std)
mean_expected = (np.array([2, -5, 10]) - np.array([2, 10, 3])) / np.array([3, 2, 3])
std_expected = np.array([2, 3, 1]) / np.array([3, 2, 3])
assert np.testing.assert_almost_equal(data.mean(dim), mean_expected, decimal=1) is None
assert np.testing.assert_almost_equal(data.std(dim), std_expected, decimal=1) is None
class TestCentre:
@pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
(lazy('xarray'), 'index')])
def test_centre(self, data_orig, dim):
mean, std, data = centre(data_orig, dim)
assert np.testing.assert_almost_equal(mean, [2, -5, 10], decimal=1) is None
assert std is None
assert np.testing.assert_almost_equal(data.mean(dim), [0, 0, 0]) is None
@pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
(lazy('xarray'), 'index')])
def test_centre_inverse(self, data_orig, dim):
mean, _, data = centre(data_orig, dim)
data_recovered = centre_inverse(data, mean)
assert np.testing.assert_array_almost_equal(data_orig, data_recovered) is None
@pytest.mark.parametrize('data_orig, dim', [(lazy('pandas'), 0),
(lazy('xarray'), 'index')])
def test_apply_centre_inverse(self, data_orig, dim):
mean, _, data = centre(data_orig, dim)
data_recovered = apply_inverse_transformation(data, mean, method="centre")
assert np.testing.assert_array_almost_equal(data_orig, data_recovered) is None
@pytest.mark.parametrize('data_orig, mean, dim', [(lazy('pandas'), lazy('pd_mean'), 0),
(lazy('xarray'), lazy('xr_mean'), 'index')])
def test_centre_apply(self, data_orig, mean, dim):
data = centre_apply(data_orig, mean)
mean_expected = np.array([2, -5, 10]) - np.array([2, 10, 3])
assert np.testing.assert_almost_equal(data.mean(dim), mean_expected, decimal=1) is None
class TestDataClass:
def test_init(self):
dc = DataClass()
assert all([obj is None for obj in [dc.data, dc.mean, dc.std, dc.max, dc.min, dc.transform_method, dc._method]])
def test_init_values(self):
dc = DataClass(data=12, mean=2, std="test", max=23.4, min=np.array([3]), transform_method="f")
assert dc.data == 12
assert dc.mean == 2
assert dc.std == "test"
assert dc.max == 23.4
assert np.testing.assert_array_equal(dc.min, np.array([3])) is None
assert dc.transform_method == "f"
assert dc._method is None
def test_as_dict(self):
dc = DataClass(std=23)
dc._method = "f(x)"
assert dc.as_dict() == {"data": None, "mean": None, "std": 23, "max": None, "min": None,
"transform_method": None}
class TestTransformationClass:
def test_init(self):
tc = TransformationClass()
assert hasattr(tc, "inputs")
assert isinstance(tc.inputs, DataClass)
assert hasattr(tc, "targets")
assert isinstance(tc.targets, DataClass)
assert tc.inputs.mean is None
assert tc.targets.std is None
def test_init_values(self):
tc = TransformationClass(inputs_mean=1, inputs_std=2, inputs_method="f", targets_mean=3, targets_std=4,
targets_method="g")
assert tc.inputs.mean == 1
assert tc.inputs.std == 2
assert tc.inputs.transform_method == "f"
assert tc.inputs.max is None
assert tc.targets.mean == 3
assert tc.targets.std == 4
assert tc.targets.transform_method == "g"
assert tc.inputs.min is None