Skip to content
Snippets Groups Projects
Commit cbe9655d authored by Michael Langguth's avatar Michael Langguth
Browse files

Added description to every method of metadata-class and added requirement for...

Added description to every method of metadata-class and added requirement for exp_id. The only exception holds for the method where a pre-existing meta data json file is read in order to ensure backwards compatibilty.
parent 5baf5478
Branches
Tags
No related merge requests found
Pipeline #47074 failed
...@@ -10,6 +10,7 @@ import json ...@@ -10,6 +10,7 @@ import json
from netCDF4 import Dataset from netCDF4 import Dataset
from general_utils import is_integer, add_str_to_path from general_utils import is_integer, add_str_to_path
class MetaData: class MetaData:
""" """
Class for handling, storing and retrieving meta-data Class for handling, storing and retrieving meta-data
...@@ -71,10 +72,9 @@ class MetaData: ...@@ -71,10 +72,9 @@ class MetaData:
MetaData.write_metadata_to_file(self) MetaData.write_metadata_to_file(self)
def get_and_set_metadata_from_file(self, suffix_indir, exp_id, datafile_name, slices, variables): def get_and_set_metadata_from_file(self, suffix_indir, exp_id, datafile_name, slices, variables):
""" '''
Retrieves several meta data from netCDF-file and sets corresponding class instance attributes. Retrieves several meta data from an ERA5 netCDF-file and sets corresponding class instance attributes.
Besides, the name of the experiment directory is constructed following the naming convention (see below) Besides, the name of the experiment directory is constructed following the naming convention (see below)
Naming convention: Naming convention:
...@@ -82,7 +82,23 @@ class MetaData: ...@@ -82,7 +82,23 @@ class MetaData:
---------------- Given ----------------|---------------- Created dynamically -------------- ---------------- Given ----------------|---------------- Created dynamically --------------
Note that the model-base as well as the date-identifiers must already be included in target_dir_in. Note that the model-base as well as the date-identifiers must already be included in target_dir_in.
""" :param suffix_indir: Path to directory where the preprocessed data will be stored
:param exp_id: Experimental identifier
:param datafile_name: ERA 5 reanalysis netCDF file
:param slices: indices of lat- and lon-coordinates defining the region of interest
:param variables: meteorological variables to be processed during preprocessing
:return: A class instance with the following attributes set:
* varnames : name of variables to be processed
* nx : number of grid points of sliced region in zonal direction
* ny : same as nx, but in meridional direction
* sw_c : south-west corner [lat,lon] coordinates of region of interest
* lat : latitude coordinates of grid points (on a rectangular grid)
* lon : longitude coordinates of grid points (on a rectangular grid)
* expname : name of target experiment directory following anming convention (see above)
* expdir : basename of experiment diretory
* exp_id : experimental identifier
* status : status to indicate if a new metadata was set up or if it's pre-exsting (left empty here!)
'''
method_name = MetaData.get_and_set_metadata_from_file.__name__ + " of Class " + MetaData.__name__ method_name = MetaData.get_and_set_metadata_from_file.__name__ + " of Class " + MetaData.__name__
...@@ -103,7 +119,8 @@ class MetaData: ...@@ -103,7 +119,8 @@ class MetaData:
self.varnames = variables self.varnames = variables
self.nx, self.ny = np.abs(slices['lon_e'] - slices['lon_s']), np.abs(slices['lat_e'] - slices['lat_s']) self.nx, self.ny = np.abs(slices['lon_e'] - slices['lon_s']), np.abs(slices['lat_e'] - slices['lat_s'])
sw_c = [float(datafile.variables['lat'][slices['lat_e']-1]),float(datafile.variables['lon'][slices['lon_s']])] # meridional axis lat is oriented from north to south (i.e. monotonically decreasing) sw_c = [float(datafile.variables['lat'][slices['lat_e'] - 1]), float(datafile.variables['lon'][slices[
'lon_s']])] # meridional axis lat is oriented from north to south (i.e. monotonically decreasing)
self.sw_c = sw_c self.sw_c = sw_c
self.lat = datafile.variables['lat'][slices['lat_s']:slices['lat_e']] self.lat = datafile.variables['lat'][slices['lat_s']:slices['lat_e']]
self.lon = datafile.variables['lon'][slices['lon_s']:slices['lon_e']] self.lon = datafile.variables['lon'][slices['lon_s']:slices['lon_e']]
...@@ -131,7 +148,8 @@ class MetaData: ...@@ -131,7 +148,8 @@ class MetaData:
# extend expdir_in successively (splitted up for better readability) # extend expdir_in successively (splitted up for better readability)
expname += "-" + str(self.nx) + "x" + str(self.ny) expname += "-" + str(self.nx) + "x" + str(self.ny)
expname += "-"+(("{0: 05.2f}"+flag_coords[0]+"{1:05.2f}"+flag_coords[1]).format(*sw_c)).strip().replace(".","")+"-" expname += "-" + (("{0: 05.2f}" + flag_coords[0] + "{1:05.2f}" + flag_coords[1]).format(*sw_c)).strip().replace(
".", "") + "-"
# reduced for-loop length as last variable-name is not followed by an underscore (see above) # reduced for-loop length as last variable-name is not followed by an underscore (see above)
for i in range(nvar - 1): for i in range(nvar - 1):
...@@ -146,38 +164,29 @@ class MetaData: ...@@ -146,38 +164,29 @@ class MetaData:
# ML 2020/04/24 E # ML 2020/04/24 E
def write_metadata_to_file(self, dest_dir=None): def write_metadata_to_file(self, dest_dir=None):
'''
""" Writes meta data stored as attributes in the class instance to metadata.json.
Write meta data attributes of class instance to json-file. If dest_dir is None, the destination directory is constructed based on the attributes expdir and expname.
""" :param dest_dir: path to directory where to store metadata.json
:return: -
'''
method_name = MetaData.write_metadata_to_file.__name__ + " of Class " + MetaData.__name__ method_name = MetaData.write_metadata_to_file.__name__ + " of Class " + MetaData.__name__
# actual work: # actual work:
meta_dict = {"expname": self.expname, meta_dict = {"expname": self.expname, "expdir": self.expdir, "exp_id": self.exp_id, "sw_corner_frame": {
"expdir" : self.expdir,
"exp_id" : self.exp_id}
meta_dict["sw_corner_frame"] = {
"lat": np.around(self.sw_c[0], decimals=2), "lat": np.around(self.sw_c[0], decimals=2),
"lon": np.around(self.sw_c[1], decimals=2) "lon": np.around(self.sw_c[1], decimals=2)
} }, "coordinates": {
meta_dict["coordinates"] = {
"lat": np.around(self.lat, decimals=2).tolist(), "lat": np.around(self.lat, decimals=2).tolist(),
"lon": np.around(self.lon, decimals=2).tolist() "lon": np.around(self.lon, decimals=2).tolist()
} }, "frame_size": {
meta_dict["frame_size"] = {
"nx": int(self.nx), "nx": int(self.nx),
"ny": int(self.ny) "ny": int(self.ny)
} }, "variables": []}
meta_dict["variables"] = []
for i in range(len(self.varnames)): for i in range(len(self.varnames)):
# print(self.varnames[i]) # print(self.varnames[i])
meta_dict["variables"].append( meta_dict["variables"].append({"var" + str(i + 1): self.varnames[i]})
{"var"+str(i+1) : self.varnames[i]})
# create directory if required # create directory if required
if dest_dir is None: if dest_dir is None:
dest_dir = os.path.join(self.expdir, self.expname) dest_dir = os.path.join(self.expdir, self.expname)
...@@ -195,7 +204,8 @@ class MetaData: ...@@ -195,7 +204,8 @@ class MetaData:
if dict_dupl != meta_dict: if dict_dupl != meta_dict:
meta_fname_dbg = os.path.join(dest_dir, "metadata_debug.json") meta_fname_dbg = os.path.join(dest_dir, "metadata_debug.json")
print(method_name+": Already existing metadata (see '"+meta_fname+"') do not fit data being processed right now (see '" \ print(
method_name + ": Already existing metadata (see '" + meta_fname + "') do not fit data being processed right now (see '" \
+ meta_fname_dbg + "'. Ensure a common data base.") + meta_fname_dbg + "'. Ensure a common data base.")
with open(meta_fname_dbg, 'w') as js_file: with open(meta_fname_dbg, 'w') as js_file:
json.dump(meta_dict, js_file) json.dump(meta_dict, js_file)
...@@ -210,15 +220,28 @@ class MetaData: ...@@ -210,15 +220,28 @@ class MetaData:
self.status = "new" # set status to new in order to trigger modification of shell-/Batch-scripts self.status = "new" # set status to new in order to trigger modification of shell-/Batch-scripts
def get_metadata_from_file(self, js_file): def get_metadata_from_file(self, js_file):
'''
""" :param js_file: json file from which to retrieve the meta data
Retrieves meta data attributes from json-file :return: A class instance with the following attributes set:
""" * varnames : name of variables to be processed
* nx : number of grid points of sliced region in zonal direction
* ny : same as nx, but in meridional direction
* sw_c : south-west corner [lat,lon] coordinates of region of interest
* lat : latitude coordinates of grid points (on a rectangular grid)
* lon : longitude coordinates of grid points (on a rectangular grid)
* expname : name of target experiment directory following naming convention (see above)
* expdir : basename of experiment directory
* exp_id : experimental identifier (if available!)
* status : status to indicate if a new metadata is set-up or pre-existing (left empty here!)
'''
with open(js_file) as js_file: with open(js_file) as js_file:
dict_in = json.load(js_file) dict_in = json.load(js_file)
self.expdir = dict_in["expdir"] self.expdir = dict_in["expdir"]
self.expname = dict_in["expname"]
# check if exp_id is available (retained for ensuring backward compatilibity with
# old meta data files without exp_id)
if "exp_id" in dict_in: if "exp_id" in dict_in:
self.exp_id = dict_in["exp_id"] self.exp_id = dict_in["exp_id"]
...@@ -235,12 +258,16 @@ class MetaData: ...@@ -235,12 +258,16 @@ class MetaData:
self.variables = [list_of_dict_aux[ivar]["var" + str(ivar + 1)] for ivar in range(len(list_of_dict_aux))] self.variables = [list_of_dict_aux[ivar]["var" + str(ivar + 1)] for ivar in range(len(list_of_dict_aux))]
def write_dirs_to_batch_scripts(self, batch_script): def write_dirs_to_batch_scripts(self, batch_script):
'''
Method for automatic extension of path variables in Batch scripts by the experiment directory which is saved
in the expname-attribute of the class instance
:param batch_script: Batch script whose (known) path variables (defined by paths_to_mod below) will be expanded
by the expname-attribute of the class instance at hand
:return: modified Batch script
'''
""" paths_to_mod = ["source_dir=", "destination_dir=", "checkpoint_dir=",
Expands ('known') directory-variables in batch_script by expdir-attribute of class instance "results_dir="] # known directory-variables in batch-scripts
"""
paths_to_mod = ["source_dir=","destination_dir=","checkpoint_dir=","results_dir="] # known directory-variables in batch-scripts
# For backward compability: # For backward compability:
# Check if exp_id (if present) needs to be added to batch_script in order to access the file # Check if exp_id (if present) needs to be added to batch_script in order to access the file
...@@ -252,21 +279,24 @@ class MetaData: ...@@ -252,21 +279,24 @@ class MetaData:
data = file.readlines() data = file.readlines()
nlines = len(data) nlines = len(data)
matched_lines = [iline for iline in range(nlines) if any(str_id in data[iline] for str_id in paths_to_mod)] # list of line-number indices to be modified matched_lines = [iline for iline in range(nlines) if any(
str_id in data[iline] for str_id in paths_to_mod)] # list of line-number indices to be modified
for i in matched_lines: for i in matched_lines:
data[i] = add_str_to_path(data[i], self.expname) data[i] = add_str_to_path(data[i], self.expname)
with open(batch_script, 'w') as file: with open(batch_script, 'w') as file:
file.writelines(data) file.writelines(data)
@staticmethod @staticmethod
def write_destdir_jsontmp(dest_dir, tmp_dir=None): def write_destdir_jsontmp(dest_dir, tmp_dir=None):
""" '''
Writes dest_dir to temporary json-file (temp.json) stored in the current working directory. Writes dest_dir to temporary json-file (temp.json) stored in the current working directory.
To be executed by Master node in parallel mode. To be executed by Master node only in parallel mode.
""" :param dest_dir: path to destination directory
:param tmp_dir: directory where to store temp.json (optional)
:return: -
'''
if not tmp_dir: tmp_dir = os.getcwd() if not tmp_dir: tmp_dir = os.getcwd()
...@@ -279,9 +309,11 @@ class MetaData: ...@@ -279,9 +309,11 @@ class MetaData:
@staticmethod @staticmethod
def get_destdir_jsontmp(tmp_dir=None): def get_destdir_jsontmp(tmp_dir=None):
""" '''
Retrieves dest_dir from temporary json-file which is expected to exist in the current working directory and returns it. Retrieves path destination directory from temp.json file (to be created by write_destdir_jsontmp-method)
""" :param tmp_dir: directory where temp.json is stored (optional). If not provided, the working directory is used.
:return: string containing the path to the destination directory
'''
method_name = MetaData.get_destdir_jsontmp.__name__ + " of Class " + MetaData.__name__ method_name = MetaData.get_destdir_jsontmp.__name__ + " of Class " + MetaData.__name__
...@@ -303,9 +335,13 @@ class MetaData: ...@@ -303,9 +335,13 @@ class MetaData:
@staticmethod @staticmethod
def wait_for_jsontmp(tmp_dir=None, waittime=10, delay=0.5): def wait_for_jsontmp(tmp_dir=None, waittime=10, delay=0.5):
""" '''
Waits at max. waittime (in sec) until temp.json-file becomes available Waits until temp.json-file becomes available
""" :param tmp_dir: directory where temp.json is stored (optional). If not provided, the working directory is used.
:param waittime: time to wait in seconds (default: 10 s)
:param delay: length of checkin intervall (default: 0.5 s)
:return: -
'''
method_name = MetaData.wait_for_jsontmp.__name__ + " of Class " + MetaData.__name__ method_name = MetaData.wait_for_jsontmp.__name__ + " of Class " + MetaData.__name__
...@@ -329,12 +365,14 @@ class MetaData: ...@@ -329,12 +365,14 @@ class MetaData:
if status != "ok": raise IOError(method_name + ": '" + file_tmp + \ if status != "ok": raise IOError(method_name + ": '" + file_tmp + \
"' does not exist after waiting for " + str(waittime) + " sec.") "' does not exist after waiting for " + str(waittime) + " sec.")
@staticmethod @staticmethod
def issubset(a, b): def issubset(a, b):
""" '''
Checks if all elements of a exist in b or vice versa (depends on the length of the corresponding lists/sets) Checks if all elements of a exist in b or vice versa (depends on the length of the corresponding lists/sets)
""" :param a: list 1
:param b: list 2
:return: True or False
'''
if len(a) > len(b): if len(a) > len(b):
return (set(b).issubset(set(a))) return (set(b).issubset(set(a)))
...@@ -343,6 +381,12 @@ class MetaData: ...@@ -343,6 +381,12 @@ class MetaData:
@staticmethod @staticmethod
def check_datafile(datafile, varnames): def check_datafile(datafile, varnames):
'''
Checks if all variables whose names are given in varnames can be found in data-object (read in from a netCDF)
:param datafile: data-object
:param varnames: names of variables to be expected in data-object
:return: Raises a ValueError if any variable cannot be found
'''
""" """
Checks if all varnames can be found in datafile Checks if all varnames can be found in datafile
""" """
...@@ -355,11 +399,4 @@ class MetaData: ...@@ -355,11 +399,4 @@ class MetaData:
else: else:
pass pass
# ----------------------------------- end of class MetaData ----------------------------------- # ----------------------------------- end of class MetaData -----------------------------------
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment