Skip to content
Snippets Groups Projects
Commit c91b77fe authored by lukas leufen's avatar lukas leufen
Browse files

introduced new module statistics with the methods standardise and centre

parent b4c6d786
Branches
Tags
2 merge requests!6updated inception model and data prep class,!4data prep class
Keras==2.2.4 Keras==2.2.4
numpy==1.15.4 numpy==1.15.4
tensorflow==1.12.0 tensorflow==1.12.0
xarray xarray==0.14.0
pandas pandas==0.25.1
requests requests==2.22.0
\ No newline at end of file pytest==5.2.1
pytest-lazy-fixture==0.6.1
\ No newline at end of file
__author__ = 'Lukas Leufen'
__date__ = '2019-10-23'
import xarray as xr
import pandas as pd
from typing import Union, Tuple
Data = Union[xr.DataArray, pd.DataFrame]
def standardise(data: Data, dim: Union[str, int]) -> Tuple[Data, Data, Data]:
"""
This function standardises a xarray.dataarray (along dim) or pandas.DataFrame (along axis) with mean=0 and std=1
:param data:
:param string/int dim:
| for xarray.DataArray as string: name of dimension which should be standardised
| for pandas.DataFrame as int: axis of dimension which should be standardised
:return: xarray.DataArrays or pandas.DataFrames:
#. mean: Mean of data
#. std: Standard deviation of data
#. data: Standardised data
"""
return data.mean(dim), data.std(dim), (data - data.mean(dim)) / data.std(dim)
def centre(data: Data, dim: Union[str, int]) -> Tuple[Data, None, Data]:
"""
This function centres a xarray.dataarray (along dim) or pandas.DataFrame (along axis) to mean=0
:param data:
:param string/int dim:
| for xarray.DataArray as string: name of dimension which should be standardised
| for pandas.DataFrame as int: axis of dimension which should be standardised
:return: xarray.DataArrays or pandas.DataFrames:
#. mean: Mean of data
#. std: Standard deviation of data
#. data: Standardised data
"""
return data.mean(dim), None, data - data.mean(dim)
import pytest
import xarray as xr
import pandas as pd
import numpy as np
from src.statistics import standardise, centre
@pytest.fixture(scope='module')
def input_data():
return np.array([np.random.normal(2, 2, 2000),
np.random.normal(-5, 3, 2000),
np.random.normal(10, 1, 2000)]).T
@pytest.fixture(scope='module')
def pandas(input_data):
return pd.DataFrame(input_data)
@pytest.fixture(scope='module')
def xarray(input_data):
return xr.DataArray(input_data, dims=['index', 'value'])
class TestStandardise:
@pytest.mark.parametrize('data_org, dim', [(pytest.lazy_fixture('pandas'), 0), (pytest.lazy_fixture('xarray'), 'index')])
def test_standardise(self, data_org, dim):
mean, std, data = standardise(data_org, dim)
assert np.testing.assert_almost_equal(mean, [2, -5, 10], decimal=1) is None
assert np.testing.assert_almost_equal(std, [2, 3, 1], decimal=1) is None
assert np.testing.assert_almost_equal(data.mean(dim), [0, 0, 0]) is None
assert np.testing.assert_almost_equal(data.std(dim), [1, 1, 1]) is None
class TestCentre:
@pytest.mark.parametrize('data_org, dim', [(pytest.lazy_fixture('pandas'), 0), (pytest.lazy_fixture('xarray'), 'index')])
def test_centre(self, data_org, dim):
mean, std, data = centre(data_org, dim)
assert np.testing.assert_almost_equal(mean, [2, -5, 10], decimal=1) is None
assert std is None
assert np.testing.assert_almost_equal(data.mean(dim), [0, 0, 0]) is None
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment