Skip to content
Snippets Groups Projects
Select Git revision
  • bing_issue#170_data_preprocess_training_tf1
  • bing_issues#190_tf2
  • bing_tf2_convert
  • bing_issue#189_train_modular
  • simon_#172_integrate_weatherbench
  • develop
  • bing_issue#188_restructure_ambs
  • yan_issue#100_extract_prcp_data
  • Gong2022_temperature_forecasts
  • bing_issue#186_clean_GMD1_tag
  • yan_issue#179_integrate_GZAWS_data_onfly
  • bing_issue#178_runscript_bug_postprocess
  • michael_issue#187_bugfix_setup_runscript_template
  • bing_issue#180_bugs_postprpocess_meta_postprocess
  • yan_issue#177_repo_for_CLGAN_gmd
  • bing_issue#176_integrate_weather_bench
  • michael_issue#181_eval_era5_forecasts
  • michael_issue#182_eval_subdomain
  • michael_issue#119_warmup_Horovod
  • bing_issue#160_test_zam347
  • ambs_v1
  • ambs_gmd_nowcasting_v1.0
  • GMD1
  • modular_booster_20210203
  • new_structure_20201004_v1.0
  • old_structure_20200930
26 results

ambs

config_extraction.py 4.99 KiB
"""
Child class used for configuring the data extraction runscript of the workflow.
"""
__author__ = "Michael Langguth"
__date__ = "2021-01-27"

# import modules
import os, glob
from config_utils import Config_runscript_base    # import parent class

class Config_Extraction(Config_runscript_base):

    cls_name = "Config_Extraction"#.__name__

    def __init__(self, venv_name, lhpc):
        super().__init__(venv_name, lhpc)

        # initialize attributes related to runscript name
        self.long_name_wrk_step = "Data Extraction"
        self.rscrpt_tmpl_prefix = "data_extraction_"
        self.dataset = "era5"
        self.runscript_template = self.rscrpt_tmpl_prefix + self.dataset + self.suffix_template
        self.runscript_target = self.rscrpt_tmpl_prefix + self.dataset + ".sh"
        # initialize additional runscript-specific attributes to be set via keyboard interaction
        self.year = None
        # list of variables to be written to runscript
        self.list_batch_vars = ["VIRT_ENV_NAME", "source_dir", "destination_dir"]
        # copy over method for keyboard interaction
        self.run_config = Config_Extraction.run_extraction
    #
    # -----------------------------------------------------------------------------------
    #
    def run_extraction(self):
        """
        Runs the keyboard interaction for data extraction step
        :return: all attributes of class Data_Extraction are set
        """

        method_name = Config_Extraction.run_extraction.__name__

        dataset_req_str = "Enter the path where the original ERA5 netCDF-files are located:"
        dataset_err = FileNotFoundError("Cannot retrieve input data from passed path.")

        self.source_dir = Config_Extraction.keyboard_interaction(dataset_req_str, Config_Extraction.check_data_indir,
                                                                 dataset_err, ntries=3)

        year_req_str = "Enter the year for which data extraction should be performed:"
        year_err = ValueError("Please type in a year (after 1970) in YYYY-format.")

        self.year = Config_Extraction.keyboard_interaction(year_req_str, Config_Extraction.check_year,
                                                           year_err, ntries = 2, test_arg="2012")

        # final check for input data
        path_year = os.path.join(self.source_dir, self.year)
        if not Config_Extraction.check_data_indir(path_year, silent=True, recursive=False):
            raise FileNotFoundError("%{0}: Cannot retrieve input data from {1}".format(method_name, path_year))

        # append source_dir with year
        self.source_dir = os.path.join(self.source_dir, self.year)

        # set destination directory based on base directory which can be retrieved from the template runscript
        base_dir = Config_Extraction.get_var_from_runscript(os.path.join(self.runscript_dir, self.runscript_template),
                                                            "destination_dir")
        self.destination_dir = os.path.join(base_dir, "extracted_data", self.year)

    #
    # -----------------------------------------------------------------------------------
    #
    # auxiliary functions for keyboard interaction
    @staticmethod
    def check_data_indir(indir, silent=False, recursive=True):
        """
        Check recursively for existence era5 netCDF-files in indir.
        This is just a simplified check, i.e. the script will fail if the directory tree is not
        built up like '<indir>/YYYY/MM/'.
        :param indir: path to passed input directory
        :param silent: flag if print-statement are executed
        :param recursive: flag if one-level (!) recursive search should be performed
        :return: status with True confirming success
        """
        status = False
        if os.path.isdir(indir):
            # the built-in 'any'-function has a short-sircuit mechanism, i.e. returns True
            # if the first True element is met
            if recursive:
                fexist = any(glob.iglob(os.path.join(indir, "**", "*era5*.nc"), recursive=True))
            else:
                fexist = any(glob.iglob(os.path.join(indir, "*", "*era5*.nc")))

            if fexist:
                status = True
            else:
                if not silent: print("{0} does not contain any ERA5 netCDF-files.".format(indir))
        else:
            if not silent: print("Could not find data directory '{0}'.".format(indir))

        return status
    #
    # -----------------------------------------------------------------------------------
    #
    @staticmethod
    def check_year(year_in, silent=False):
        status = True
        if not year_in.isnumeric():
            status = False
            if not silent: print("{0} is not a numeric number.".format(year_in))

        if not int(year_in) > 1970:
            status = False
            if not silent: print("{0} must match the format YYYY and must be larger than 1970.")

        return status
#
# -----------------------------------------------------------------------------------
#