diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py index 054713481478826af2c5220f2b9d9e9c08c4a0c2..8e95e76365181ee76f91a91319b912f2626a223a 100644 --- a/mlair/data_handler/data_handler_single_station.py +++ b/mlair/data_handler/data_handler_single_station.py @@ -223,7 +223,8 @@ class DataHandlerSingleStation(AbstractDataHandler): elif method == "centre": return statistics.centre_apply(data, mean), {"mean": mean, "method": method} elif method == "min_max": - return statistics.min_max_apply(data, min, max), {"min": min, "max": max, "method": method, + kws = {"feature_range": feature_range} if feature_range is not None else {} + return statistics.min_max_apply(data, min, max, **kws), {"min": min, "max": max, "method": method, "feature_range": feature_range} elif method == "log": return statistics.log_apply(data, mean, std), {"mean": mean, "std": std, "method": method} @@ -416,8 +417,7 @@ class DataHandlerSingleStation(AbstractDataHandler): """ chem_vars = ["benzene", "ch4", "co", "ethane", "no", "no2", "nox", "o3", "ox", "pm1", "pm10", "pm2p5", "propane", "so2", "toluene"] - # used_chem_vars = list(set(chem_vars) & set(self.statistics_per_var.keys())) - used_chem_vars = list(set(chem_vars) & set(data.variables.values)) + used_chem_vars = list(set(chem_vars) & set(data.coords[self.target_dim].values)) if len(used_chem_vars) > 0: data.loc[..., used_chem_vars] = data.loc[..., used_chem_vars].clip(min=minimum) return data @@ -463,11 +463,8 @@ class DataHandlerSingleStation(AbstractDataHandler): :return: this array """ ind = pd.DataFrame({'val': index_value}, index=index_value) - # res = xr.Dataset.from_dataframe(ind).to_array().rename({'index': index_name}).squeeze(dim=squeez/e_dim, drop=True) res = xr.Dataset.from_dataframe(ind).to_array(squeeze_dim).rename({'index': index_name}).squeeze( - dim=squeeze_dim, - drop=True - ) + dim=squeeze_dim, drop=True) res.name = index_name return res @@ -750,8 +747,6 @@ class DataHandlerSingleStation(AbstractDataHandler): if __name__ == "__main__": - # dp = AbstractDataPrep('data/', 'dummy', 'DEBW107', ['o3', 'temp'], statistics_per_var={'o3': 'dma8eu', 'temp': 'maximum'}) - # print(dp) statistics_per_var = {'o3': 'dma8eu', 'temp-rea-miub': 'maximum'} sp = DataHandlerSingleStation(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122', statistics_per_var=statistics_per_var, station_type='background', diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py index fef52fb27d602b5931587ff0fa2d8edd7e0c2d8f..87f780f9a6133edfcb2f9c71c2956b92f332e915 100644 --- a/mlair/helpers/statistics.py +++ b/mlair/helpers/statistics.py @@ -152,7 +152,10 @@ def min_max_apply(data: Data, _min: Data, _max: Data, feature_range: Data = (0, :param feature_range: scale data to any interval given in feature range. Default is scaling on interval [0, 1]. :return: min/max scaled data """ - return (data - _min) / (_max - _min) * (max(feature_range) - min(feature_range)) + min(feature_range) + if not isinstance(feature_range, xr.DataArray): + return (data - _min) / (_max - _min) * (max(feature_range) - min(feature_range)) + min(feature_range) + else: + return (data - _min) / (_max - _min) * (feature_range.max() - feature_range.min()) + feature_range.min() def log(data: Data, dim: Union[str, int]) -> Tuple[Data, Dict[(str, Data)]]: