diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py index e9db27a9ff88efa2cc800723ac99279ec66d6cbb..89aafa2c7030427e105b663c97998c3ecf09eaaf 100644 --- a/mlair/data_handler/data_handler_single_station.py +++ b/mlair/data_handler/data_handler_single_station.py @@ -183,13 +183,14 @@ class DataHandlerSingleStation(AbstractDataHandler): #. data: Standardised data """ - def f(data, method="standardise"): + def f(data, method="standardise", feature_range=None): if method == "standardise": return statistics.standardise(data, dim) elif method == "centre": return statistics.centre(data, dim) elif method == "min_max": - return statistics.min_max(data, dim) + kwargs = {"feature_range": feature_range} if feature_range is not None else {} + return statistics.min_max(data, dim, **kwargs) elif method == "log": return statistics.log(data, dim) else: @@ -205,13 +206,15 @@ class DataHandlerSingleStation(AbstractDataHandler): std = kwargs.pop('std', None) min = kwargs.pop('min', None) max = kwargs.pop('max', None) + feature_range = kwargs.pop('feature_range', None) if method == "standardise": return statistics.standardise_apply(data, mean, std), {"mean": mean, "std": std, "method": method} elif method == "centre": return statistics.centre_apply(data, mean), {"mean": mean, "method": method} elif method == "min_max": - return statistics.min_max_apply(data, min, max), {"min": min, "max": max, "method": method} + return statistics.min_max_apply(data, min, max), {"min": min, "max": max, "method": method, + "feature_range": feature_range} elif method == "log": return statistics.log_apply(data, mean, std), {"mean": mean, "std": std, "method": method} else: @@ -658,13 +661,13 @@ class DataHandlerSingleStation(AbstractDataHandler): current data is not transformed. """ - def f_inverse(data, method, mean=None, std=None, min=None, max=None): + def f_inverse(data, method, mean=None, std=None, min=None, max=None, feature_range=None): if method == "standardise": return statistics.standardise_inverse(data, mean, std) elif method == "centre": return statistics.centre_inverse(data, mean) elif method == "min_max": - return statistics.min_max_inverse(data, min, max) + return statistics.min_max_inverse(data, min, max, feature_range) elif method == "log": return statistics.log_inverse(data, mean, std) else: diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py index 3a57d9febc714c81a68c21facab55957eabf32d9..11461ad77c3e910a897a9a1be48aef7cef45480a 100644 --- a/mlair/data_handler/default_data_handler.py +++ b/mlair/data_handler/default_data_handler.py @@ -287,6 +287,8 @@ class DefaultDataHandler(AbstractDataHandler): old = transformation_dict[i][var].get(k, None) new = opts.get(k) transformation_dict[i][var][k] = new if old is None else old.combine_first(new) + if "feature_range" in opts.keys(): + transformation_dict[i][var]["feature_range"] = opts.get("feature_range", None) if multiprocessing.cpu_count() > 1 and kwargs.get("use_multiprocessing", True) is True: # parallel solution logging.info("use parallel transformation approach") @@ -320,6 +322,8 @@ class DefaultDataHandler(AbstractDataHandler): transformation_dict[i][k]["min"] = transformation[k]["min"].min(iter_dim) if transformation[k]["max"] is not None: transformation_dict[i][k]["max"] = transformation[k]["max"].max(iter_dim) + if "feature_range" in transformation[k].keys(): + transformation_dict[i][k]["feature_range"] = transformation[k]["feature_range"] except KeyError: pop_list.append((i, k)) for (i, k) in pop_list: diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py index 3e99357c36d556f093701325964500bf8d46c698..30391998c65950f12fc6824626638788e1bd721b 100644 --- a/mlair/helpers/statistics.py +++ b/mlair/helpers/statistics.py @@ -20,7 +20,7 @@ Data = Union[xr.DataArray, pd.DataFrame] def apply_inverse_transformation(data: Data, method: str = "standardise", mean: Data = None, std: Data = None, - max: Data = None, min: Data = None) -> Data: + max: Data = None, min: Data = None, feature_range: Data = None) -> Data: """ Apply inverse transformation for given statistics. @@ -38,7 +38,7 @@ def apply_inverse_transformation(data: Data, method: str = "standardise", mean: elif method == 'centre': # pragma: no branch return centre_inverse(data, mean) elif method == 'min_max': # pragma: no branch - return min_max_inverse(data, min, max) + return min_max_inverse(data, min, max, feature_range) elif method == "log": return log_inverse(data, mean, std) else: @@ -119,41 +119,45 @@ def centre_apply(data: Data, mean: Data) -> Data: return data - mean -def min_max(data: Data, dim: Union[str, int]) -> Tuple[Data, Dict[(str, Data)]]: +def min_max(data: Data, dim: Union[str, int], feature_range: Tuple = (0, 1)) -> Tuple[Data, Dict[(str, Data)]]: """ Apply min/max scaling using (x - x_min) / (x_max - x_min). Returned data is in interval [0, 1]. :param data: data to transform :param dim: name (xarray) or axis (pandas) of dimension which should be centred + :param feature_range: scale data to any interval given in feature range. Default is scaling on interval [0, 1]. :return: transformed data, and dictionary with keys method, min, and max """ d_max = data.max(dim) d_min = data.min(dim) - return (data - d_min) / (d_max - d_min), {"min": d_min, "max": d_max, "method": "min_max"} + d_scaled = (data - d_min) / (d_max - d_min) * (max(feature_range) - min(feature_range)) + min(feature_range) + return d_scaled, {"min": d_min, "max": d_max, "method": "min_max", "feature_range": feature_range} -def min_max_inverse(data: Data, min: Data, max: Data) -> Data: +def min_max_inverse(data: Data, _min: Data, _max: Data, feature_range: Tuple = (0, 1)) -> Data: """ Apply inverse transformation of `min_max` scaling. :param data: data to apply inverse scaling - :param min: minimum value to use for min/max scaling - :param max: maximum value to use for min/max scaling + :param _min: minimum value to use for min/max scaling + :param _max: maximum value to use for min/max scaling + :param feature_range: scale data to any interval given in feature range. Default is scaling on interval [0, 1]. :return: inverted min/max scaled data """ - return data * (max - min) + min + return (data - min(feature_range)) / (max(feature_range) - min(feature_range)) * (_max - _min) + _min -def min_max_apply(data: Data, min: Data, max: Data) -> Data: +def min_max_apply(data: Data, _min: Data, _max: Data, feature_range: Data = (0, 1)) -> Data: """ Apply `min_max` scaling with given minimum and maximum. :param data: data to apply scaling - :param min: minimum value to use for min/max scaling - :param max: maximum value to use for min/max scaling + :param _min: minimum value to use for min/max scaling + :param _max: maximum value to use for min/max scaling + :param feature_range: scale data to any interval given in feature range. Default is scaling on interval [0, 1]. :return: min/max scaled data """ - return (data - min) / (max - min) + return (data - _min) / (_max - _min) * (max(feature_range) - min(feature_range)) + min(feature_range) def log(data: Data, dim: Union[str, int]) -> Tuple[Data, Dict[(str, Data)]]: