Select Git revision
Create_JupyterKernel_general.ipynb
-
Jens Henrik Goebbert authoredJens Henrik Goebbert authored
config_extraction.py 4.99 KiB
"""
Child class used for configuring the data extraction runscript of the workflow.
"""
__author__ = "Michael Langguth"
__date__ = "2021-01-27"
# import modules
import os, glob
from config_utils import Config_runscript_base # import parent class
class Config_Extraction(Config_runscript_base):
cls_name = "Config_Extraction"#.__name__
def __init__(self, venv_name, lhpc):
super().__init__(venv_name, lhpc)
# initialize attributes related to runscript name
self.long_name_wrk_step = "Data Extraction"
self.rscrpt_tmpl_prefix = "data_extraction_"
self.dataset = "era5"
self.runscript_template = self.rscrpt_tmpl_prefix + self.dataset + self.suffix_template
self.runscript_target = self.rscrpt_tmpl_prefix + self.dataset + ".sh"
# initialize additional runscript-specific attributes to be set via keyboard interaction
self.year = None
# list of variables to be written to runscript
self.list_batch_vars = ["VIRT_ENV_NAME", "source_dir", "destination_dir"]
# copy over method for keyboard interaction
self.run_config = Config_Extraction.run_extraction
#
# -----------------------------------------------------------------------------------
#
def run_extraction(self):
"""
Runs the keyboard interaction for data extraction step
:return: all attributes of class Data_Extraction are set
"""
method_name = Config_Extraction.run_extraction.__name__
dataset_req_str = "Enter the path where the original ERA5 netCDF-files are located:"
dataset_err = FileNotFoundError("Cannot retrieve input data from passed path.")
self.source_dir = Config_Extraction.keyboard_interaction(dataset_req_str, Config_Extraction.check_data_indir,
dataset_err, ntries=3)
year_req_str = "Enter the year for which data extraction should be performed:"
year_err = ValueError("Please type in a year (after 1970) in YYYY-format.")
self.year = Config_Extraction.keyboard_interaction(year_req_str, Config_Extraction.check_year,
year_err, ntries = 2, test_arg="2012")
# final check for input data
path_year = os.path.join(self.source_dir, self.year)
if not Config_Extraction.check_data_indir(path_year, silent=True, recursive=False):
raise FileNotFoundError("%{0}: Cannot retrieve input data from {1}".format(method_name, path_year))
# append source_dir with year
self.source_dir = os.path.join(self.source_dir, self.year)
# set destination directory based on base directory which can be retrieved from the template runscript
base_dir = Config_Extraction.get_var_from_runscript(os.path.join(self.runscript_dir, self.runscript_template),
"destination_dir")
self.destination_dir = os.path.join(base_dir, "extracted_data", self.year)
#
# -----------------------------------------------------------------------------------
#
# auxiliary functions for keyboard interaction
@staticmethod
def check_data_indir(indir, silent=False, recursive=True):
"""
Check recursively for existence era5 netCDF-files in indir.
This is just a simplified check, i.e. the script will fail if the directory tree is not
built up like '<indir>/YYYY/MM/'.
:param indir: path to passed input directory
:param silent: flag if print-statement are executed
:param recursive: flag if one-level (!) recursive search should be performed
:return: status with True confirming success
"""
status = False
if os.path.isdir(indir):
# the built-in 'any'-function has a short-sircuit mechanism, i.e. returns True
# if the first True element is met
if recursive:
fexist = any(glob.iglob(os.path.join(indir, "**", "*era5*.nc"), recursive=True))
else:
fexist = any(glob.iglob(os.path.join(indir, "*", "*era5*.nc")))
if fexist:
status = True
else:
if not silent: print("{0} does not contain any ERA5 netCDF-files.".format(indir))
else:
if not silent: print("Could not find data directory '{0}'.".format(indir))
return status
#
# -----------------------------------------------------------------------------------
#
@staticmethod
def check_year(year_in, silent=False):
status = True
if not year_in.isnumeric():
status = False
if not silent: print("{0} is not a numeric number.".format(year_in))
if not int(year_in) > 1970:
status = False
if not silent: print("{0} must match the format YYYY and must be larger than 1970.")
return status
#
# -----------------------------------------------------------------------------------
#