Select Git revision
CITATION.cff
-
Thomas Baumann authored
* Version bump for 5.6 * Update CHANGELOG.md --------- Co-authored-by:
Robert Speck <pancetta@users.noreply.github.com>
Thomas Baumann authored* Version bump for 5.6 * Update CHANGELOG.md --------- Co-authored-by:
Robert Speck <pancetta@users.noreply.github.com>
normalization.py NaN GiB
# SPDX-FileCopyrightText: 2021 Earth System Data Exploration (ESDE), Jülich Supercomputing Center (JSC)
#
# SPDX-License-Identifier: MIT
"""
Class for normalizing data. The statistical data for normalization (minimum, maximum, average, standard deviation etc.) is expected to be available from a statistics-dictionary
created with the calc_data_stat-class (see 'process_netCDF_v2.py'.
"""
from general_utils import get_unique_vars
from statistics import Calc_data_stat
import numpy as np
class Norm_data:
### set known norms and the requested statistics (to be retrieved from statistics.json) here ###
known_norms = {}
known_norms["minmax"] = ["min", "max"]
known_norms["znorm"] = ["avg", "sigma"]
def __init__(self, varnames):
"""Initialize the instance by setting the variable names to be handled and the status (for sanity checks only) as attributes."""
varnames_uni, _, nvars = get_unique_vars(varnames)
self.varnames = varnames_uni
self.status_ok = False
def check_and_set_norm(self, stat_dict, norm):
"""
Checks if the statistics-dictionary provides the required data for selected normalization method and expands the instance's attributes accordingly.
Example: minmax-normalization requires the minimum and maximum value of a variable named var1.
If the requested values are provided by the statistics-dictionary, the instance gets the attributes 'var1min' and 'var1max',respectively.
"""
# some sanity checks
if not norm in self.known_norms.keys(): # valid normalization requested?
print("Please select one of the following known normalizations: ")
for norm_avail in self.known_norms.keys():
print(norm_avail)
raise ValueError("Passed normalization '" + norm + "' is unknown.")
if not all(items in stat_dict for items in self.varnames): # all variables found in dictionary?
print("Keys in stat_dict:")
print(stat_dict.keys())
print("Requested variables:")
print(self.varnames)
raise ValueError("Could not find all requested variables in statistics dictionary.")
# create all attributes for the instance
for varname in self.varnames:
for stat_name in self.known_norms[norm]:
# setattr(self,varname+stat_name,stat_dict[varname][0][stat_name])
setattr(self, varname + stat_name, Calc_data_stat.get_stat_vars(stat_dict, stat_name, varname))
self.status_ok = True # set status for normalization -> ready
def norm_var(self, data, varname, norm):
"""
Performs given normalization on input data (given that the instance is already set up)
"""
# some sanity checks
if not self.status_ok: raise ValueError(
"Norm_data-instance needs to be initialized and checked first.") # status ready?
if not norm in self.known_norms.keys(): # valid normalization requested?
print("Please select one of the following known normalizations: ")
for norm_avail in self.known_norms.keys():
print(norm_avail)
raise ValueError("Passed normalization '" + norm + "' is unknown.")
# do the normalization and return
if norm == "minmax":
return ((data[...] - getattr(self, varname + "min")) / (
getattr(self, varname + "max") - getattr(self, varname + "min")))
elif norm == "znorm":
return ((data[...] - getattr(self, varname + "avg")) / getattr(self, varname + "sigma") ** 2)
def denorm_var(self, data, varname, norm):
"""
Performs given denormalization on input data (given that the instance is already set up), i.e. inverse method to norm_var
"""
# some sanity checks
if not self.status_ok: raise ValueError(
"Norm_data-instance needs to be initialized and checked first.") # status ready?
if not norm in self.known_norms.keys(): # valid normalization requested?
print("Please select one of the following known normalizations: ")
for norm_avail in self.known_norms.keys():
print(norm_avail)
raise ValueError("Passed normalization '" + norm + "' is unknown.")
# do the denormalization and return
if norm == "minmax":
return (data[...] * (getattr(self, varname + "max") - getattr(self, varname + "min")) + getattr(self,
varname + "min"))
elif norm == "znorm":
return (data[...] * getattr(self, varname + "sigma") ** 2 + getattr(self, varname + "avg"))