diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py index a79d201eb9a6b77e38f0cec0a269a0ca7f96478b..e1887e62eb682b9ba1cab335b690ae4de5cd7966 100644 --- a/mlair/helpers/statistics.py +++ b/mlair/helpers/statistics.py @@ -12,7 +12,7 @@ from typing import Union, Tuple, Dict, List from matplotlib import pyplot as plt import itertools -from mlair.helpers import to_list, remove_items +from mlair.helpers import to_list Data = Union[xr.DataArray, pd.DataFrame] @@ -23,9 +23,11 @@ def apply_inverse_transformation(data: Data, method: str = "standardise", mean: Apply inverse transformation for given statistics. :param data: transform this data back - :param method: transformation method - :param mean: mean of transformation + :param method: transformation method (optional) + :param mean: mean of transformation (optional) :param std: standard deviation of transformation (optional) + :param max: maximum value for min/max transformation (optional) + :param min: minimum value for min/max transformation (optional) :return: inverse transformed data """ @@ -45,7 +47,7 @@ def standardise(data: Data, dim: Union[str, int]) -> Tuple[Data, Dict[(str, Data :param data: data to standardise :param dim: name (xarray) or axis (pandas) of dimension which should be standardised - :return: standardised data, mean, and standard deviation + :return: standardised data, and dictionary with keys method, mean, and standard deviation """ return (data - data.mean(dim)) / data.std(dim), {"mean": data.mean(dim), "std": data.std(dim), "method": "standardise"} @@ -84,7 +86,7 @@ def centre(data: Data, dim: Union[str, int]) -> Tuple[Data, Dict[(str, Data)]]: :param data: data to centre :param dim: name (xarray) or axis (pandas) of dimension which should be centred - :return: centred data, mean, and None placeholder + :return: centred data, and dictionary with keys method, and mean """ return data - data.mean(dim), {"mean": data.mean(dim), "method": "centre"} @@ -114,16 +116,39 @@ def centre_apply(data: Data, mean: Data) -> Data: def min_max(data: Data, dim: Union[str, int]) -> Tuple[Data, Dict[(str, Data)]]: + """ + Apply min/max scaling using (x - x_min) / (x_max - x_min). Returned data is in interval [0, 1]. + + :param data: data to transform + :param dim: name (xarray) or axis (pandas) of dimension which should be centred + :return: transformed data, and dictionary with keys method, min, and max + """ d_max = data.max(dim) d_min = data.min(dim) return (data - d_min) / (d_max - d_min), {"min": d_min, "max": d_max, "method": "min_max"} def min_max_inverse(data: Data, min: Data, max: Data) -> Data: + """ + Apply inverse transformation of `min_max` scaling. + + :param data: data to apply inverse scaling + :param min: minimum value to use for min/max scaling + :param max: maximum value to use for min/max scaling + :return: inverted min/max scaled data + """ return data * (max - min) + min def min_max_apply(data: Data, min: Data, max: Data) -> Data: + """ + Apply `min_max` scaling with given minimum and maximum. + + :param data: data to apply scaling + :param min: minimum value to use for min/max scaling + :param max: maximum value to use for min/max scaling + :return: min/max scaled data + """ return (data - min) / (max - min) diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index bf54b0619f94d21524edc95a52c2ad49dab788c5..f696b0065b1db2692110488bd41513cd74aca233 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -186,7 +186,6 @@ class PreProcessing(RunEnvironment): column_format = ''.join(column_format.tolist()) return column_format - def split_train_val_test(self) -> None: """ Split data into subsets.