Skip to content
Snippets Groups Projects
Select Git revision
  • 6643b9340dfd29a2f5427eaf4bcefee64d55c1d1
  • 2022 default
  • 2021
  • master protected
  • 2021
5 results

vtk_export.c

Blame
  • config_extraction.py 4.99 KiB
    """
    Child class used for configuring the data extraction runscript of the workflow.
    """
    __author__ = "Michael Langguth"
    __date__ = "2021-01-27"
    
    # import modules
    import os, glob
    from config_utils import Config_runscript_base    # import parent class
    
    class Config_Extraction(Config_runscript_base):
    
        cls_name = "Config_Extraction"#.__name__
    
        def __init__(self, venv_name, lhpc):
            super().__init__(venv_name, lhpc)
    
            # initialize attributes related to runscript name
            self.long_name_wrk_step = "Data Extraction"
            self.rscrpt_tmpl_prefix = "data_extraction_"
            self.dataset = "era5"
            self.runscript_template = self.rscrpt_tmpl_prefix + self.dataset + self.suffix_template
            self.runscript_target = self.rscrpt_tmpl_prefix + self.dataset + ".sh"
            # initialize additional runscript-specific attributes to be set via keyboard interaction
            self.year = None
            # list of variables to be written to runscript
            self.list_batch_vars = ["VIRT_ENV_NAME", "source_dir", "destination_dir"]
            # copy over method for keyboard interaction
            self.run_config = Config_Extraction.run_extraction
        #
        # -----------------------------------------------------------------------------------
        #
        def run_extraction(self):
            """
            Runs the keyboard interaction for data extraction step
            :return: all attributes of class Data_Extraction are set
            """
    
            method_name = Config_Extraction.run_extraction.__name__
    
            dataset_req_str = "Enter the path where the original ERA5 netCDF-files are located:"
            dataset_err = FileNotFoundError("Cannot retrieve input data from passed path.")
    
            self.source_dir = Config_Extraction.keyboard_interaction(dataset_req_str, Config_Extraction.check_data_indir,
                                                                     dataset_err, ntries=3)
    
            year_req_str = "Enter the year for which data extraction should be performed:"
            year_err = ValueError("Please type in a year (after 1970) in YYYY-format.")
    
            self.year = Config_Extraction.keyboard_interaction(year_req_str, Config_Extraction.check_year,
                                                               year_err, ntries = 2, test_arg="2012")
    
            # final check for input data
            path_year = os.path.join(self.source_dir, self.year)
            if not Config_Extraction.check_data_indir(path_year, silent=True, recursive=False):
                raise FileNotFoundError("%{0}: Cannot retrieve input data from {1}".format(method_name, path_year))
    
            # append source_dir with year
            self.source_dir = os.path.join(self.source_dir, self.year)
    
            # set destination directory based on base directory which can be retrieved from the template runscript
            base_dir = Config_Extraction.get_var_from_runscript(os.path.join(self.runscript_dir, self.runscript_template),
                                                                "destination_dir")
            self.destination_dir = os.path.join(base_dir, "extracted_data", self.year)
    
        #
        # -----------------------------------------------------------------------------------
        #
        # auxiliary functions for keyboard interaction
        @staticmethod
        def check_data_indir(indir, silent=False, recursive=True):
            """
            Check recursively for existence era5 netCDF-files in indir.
            This is just a simplified check, i.e. the script will fail if the directory tree is not
            built up like '<indir>/YYYY/MM/'.
            :param indir: path to passed input directory
            :param silent: flag if print-statement are executed
            :param recursive: flag if one-level (!) recursive search should be performed
            :return: status with True confirming success
            """
            status = False
            if os.path.isdir(indir):
                # the built-in 'any'-function has a short-sircuit mechanism, i.e. returns True
                # if the first True element is met
                if recursive:
                    fexist = any(glob.iglob(os.path.join(indir, "**", "*era5*.nc"), recursive=True))
                else:
                    fexist = any(glob.iglob(os.path.join(indir, "*", "*era5*.nc")))
    
                if fexist:
                    status = True
                else:
                    if not silent: print("{0} does not contain any ERA5 netCDF-files.".format(indir))
            else:
                if not silent: print("Could not find data directory '{0}'.".format(indir))
    
            return status
        #
        # -----------------------------------------------------------------------------------
        #
        @staticmethod
        def check_year(year_in, silent=False):
            status = True
            if not year_in.isnumeric():
                status = False
                if not silent: print("{0} is not a numeric number.".format(year_in))
    
            if not int(year_in) > 1970:
                status = False
                if not silent: print("{0} must match the format YYYY and must be larger than 1970.")
    
            return status
    #
    # -----------------------------------------------------------------------------------
    #