Skip to content
Snippets Groups Projects
Commit 28ef8cbe authored by lukas leufen's avatar lukas leufen
Browse files

Merge branch 'lukas_issue302_refac_adjust-min-max-scaler' into 'develop'

Resolve "adjust min/max scaler"

See merge request !293
parents 0f62f9b2 5f9fdceb
Branches
Tags
5 merge requests!319add all changes of dev into release v1.4.0 branch,!318Resolve "release v1.4.0",!299Draft: Merge default data handler and preprocessing support parameter use_multiprocessing....,!293Resolve "adjust min/max scaler",!259Draft: Resolve "WRF-Datahandler should inherit from SingleStationDatahandler"
Pipeline #65797 passed
......@@ -183,13 +183,14 @@ class DataHandlerSingleStation(AbstractDataHandler):
#. data: Standardised data
"""
def f(data, method="standardise"):
def f(data, method="standardise", feature_range=None):
if method == "standardise":
return statistics.standardise(data, dim)
elif method == "centre":
return statistics.centre(data, dim)
elif method == "min_max":
return statistics.min_max(data, dim)
kwargs = {"feature_range": feature_range} if feature_range is not None else {}
return statistics.min_max(data, dim, **kwargs)
elif method == "log":
return statistics.log(data, dim)
else:
......@@ -205,13 +206,15 @@ class DataHandlerSingleStation(AbstractDataHandler):
std = kwargs.pop('std', None)
min = kwargs.pop('min', None)
max = kwargs.pop('max', None)
feature_range = kwargs.pop('feature_range', None)
if method == "standardise":
return statistics.standardise_apply(data, mean, std), {"mean": mean, "std": std, "method": method}
elif method == "centre":
return statistics.centre_apply(data, mean), {"mean": mean, "method": method}
elif method == "min_max":
return statistics.min_max_apply(data, min, max), {"min": min, "max": max, "method": method}
return statistics.min_max_apply(data, min, max), {"min": min, "max": max, "method": method,
"feature_range": feature_range}
elif method == "log":
return statistics.log_apply(data, mean, std), {"mean": mean, "std": std, "method": method}
else:
......@@ -658,13 +661,13 @@ class DataHandlerSingleStation(AbstractDataHandler):
current data is not transformed.
"""
def f_inverse(data, method, mean=None, std=None, min=None, max=None):
def f_inverse(data, method, mean=None, std=None, min=None, max=None, feature_range=None):
if method == "standardise":
return statistics.standardise_inverse(data, mean, std)
elif method == "centre":
return statistics.centre_inverse(data, mean)
elif method == "min_max":
return statistics.min_max_inverse(data, min, max)
return statistics.min_max_inverse(data, min, max, feature_range)
elif method == "log":
return statistics.log_inverse(data, mean, std)
else:
......
......@@ -287,6 +287,8 @@ class DefaultDataHandler(AbstractDataHandler):
old = transformation_dict[i][var].get(k, None)
new = opts.get(k)
transformation_dict[i][var][k] = new if old is None else old.combine_first(new)
if "feature_range" in opts.keys():
transformation_dict[i][var]["feature_range"] = opts.get("feature_range", None)
if multiprocessing.cpu_count() > 1 and kwargs.get("use_multiprocessing", True) is True: # parallel solution
logging.info("use parallel transformation approach")
......@@ -320,6 +322,8 @@ class DefaultDataHandler(AbstractDataHandler):
transformation_dict[i][k]["min"] = transformation[k]["min"].min(iter_dim)
if transformation[k]["max"] is not None:
transformation_dict[i][k]["max"] = transformation[k]["max"].max(iter_dim)
if "feature_range" in transformation[k].keys():
transformation_dict[i][k]["feature_range"] = transformation[k]["feature_range"]
except KeyError:
pop_list.append((i, k))
for (i, k) in pop_list:
......
......@@ -20,7 +20,7 @@ Data = Union[xr.DataArray, pd.DataFrame]
def apply_inverse_transformation(data: Data, method: str = "standardise", mean: Data = None, std: Data = None,
max: Data = None, min: Data = None) -> Data:
max: Data = None, min: Data = None, feature_range: Data = None) -> Data:
"""
Apply inverse transformation for given statistics.
......@@ -38,7 +38,7 @@ def apply_inverse_transformation(data: Data, method: str = "standardise", mean:
elif method == 'centre': # pragma: no branch
return centre_inverse(data, mean)
elif method == 'min_max': # pragma: no branch
return min_max_inverse(data, min, max)
return min_max_inverse(data, min, max, feature_range)
elif method == "log":
return log_inverse(data, mean, std)
else:
......@@ -119,41 +119,45 @@ def centre_apply(data: Data, mean: Data) -> Data:
return data - mean
def min_max(data: Data, dim: Union[str, int]) -> Tuple[Data, Dict[(str, Data)]]:
def min_max(data: Data, dim: Union[str, int], feature_range: Tuple = (0, 1)) -> Tuple[Data, Dict[(str, Data)]]:
"""
Apply min/max scaling using (x - x_min) / (x_max - x_min). Returned data is in interval [0, 1].
:param data: data to transform
:param dim: name (xarray) or axis (pandas) of dimension which should be centred
:param feature_range: scale data to any interval given in feature range. Default is scaling on interval [0, 1].
:return: transformed data, and dictionary with keys method, min, and max
"""
d_max = data.max(dim)
d_min = data.min(dim)
return (data - d_min) / (d_max - d_min), {"min": d_min, "max": d_max, "method": "min_max"}
d_scaled = (data - d_min) / (d_max - d_min) * (max(feature_range) - min(feature_range)) + min(feature_range)
return d_scaled, {"min": d_min, "max": d_max, "method": "min_max", "feature_range": feature_range}
def min_max_inverse(data: Data, min: Data, max: Data) -> Data:
def min_max_inverse(data: Data, _min: Data, _max: Data, feature_range: Tuple = (0, 1)) -> Data:
"""
Apply inverse transformation of `min_max` scaling.
:param data: data to apply inverse scaling
:param min: minimum value to use for min/max scaling
:param max: maximum value to use for min/max scaling
:param _min: minimum value to use for min/max scaling
:param _max: maximum value to use for min/max scaling
:param feature_range: scale data to any interval given in feature range. Default is scaling on interval [0, 1].
:return: inverted min/max scaled data
"""
return data * (max - min) + min
return (data - min(feature_range)) / (max(feature_range) - min(feature_range)) * (_max - _min) + _min
def min_max_apply(data: Data, min: Data, max: Data) -> Data:
def min_max_apply(data: Data, _min: Data, _max: Data, feature_range: Data = (0, 1)) -> Data:
"""
Apply `min_max` scaling with given minimum and maximum.
:param data: data to apply scaling
:param min: minimum value to use for min/max scaling
:param max: maximum value to use for min/max scaling
:param _min: minimum value to use for min/max scaling
:param _max: maximum value to use for min/max scaling
:param feature_range: scale data to any interval given in feature range. Default is scaling on interval [0, 1].
:return: min/max scaled data
"""
return (data - min) / (max - min)
return (data - _min) / (_max - _min) * (max(feature_range) - min(feature_range)) + min(feature_range)
def log(data: Data, dim: Union[str, int]) -> Tuple[Data, Dict[(str, Data)]]:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment