"""
Child class used for configuring the data extraction runscript of the workflow.
"""
__author__ = "Michael Langguth"
__date__ = "2021-01-27"

# import modules
import os, glob
from config_utils import Config_runscript_base    # import parent class

class Config_Extraction(Config_runscript_base):

    cls_name = "Config_Extraction"#.__name__

    def __init__(self, venv_name, lhpc):
        super().__init__(venv_name, lhpc)

        # initialize attributes related to runscript name
        self.long_name_wrk_step = "Data Extraction"
        self.rscrpt_tmpl_prefix = "data_extraction_"
        self.dataset = "era5"
        self.runscript_template = self.rscrpt_tmpl_prefix + self.dataset + self.suffix_template
        self.runscript_target = self.rscrpt_tmpl_prefix + self.dataset + ".sh"
        # initialize additional runscript-specific attributes to be set via keyboard interaction
        self.year = None
        # list of variables to be written to runscript
        self.list_batch_vars = ["VIRT_ENV_NAME", "source_dir", "destination_dir"]
        # copy over method for keyboard interaction
        self.run_config = Config_Extraction.run_extraction
    #
    # -----------------------------------------------------------------------------------
    #
    def run_extraction(self):
        """
        Runs the keyboard interaction for data extraction step
        :return: all attributes of class Data_Extraction are set
        """

        method_name = Config_Extraction.run_extraction.__name__

        dataset_req_str = "Enter the path where the original ERA5 netCDF-files are located:"
        dataset_err = FileNotFoundError("Cannot retrieve input data from passed path.")

        self.source_dir = Config_Extraction.keyboard_interaction(dataset_req_str, Config_Extraction.check_data_indir,
                                                                 dataset_err, ntries=3)

        year_req_str = "Enter the year for which data extraction should be performed:"
        year_err = ValueError("Please type in a year (after 1970) in YYYY-format.")

        self.year = Config_Extraction.keyboard_interaction(year_req_str, Config_Extraction.check_year,
                                                           year_err, ntries = 2, test_arg="2012")

        # final check for input data
        path_year = os.path.join(self.source_dir, self.year)
        if not Config_Extraction.check_data_indir(path_year, silent=True, recursive=False):
            raise FileNotFoundError("%{0}: Cannot retrieve input data from {1}".format(method_name, path_year))

        # append source_dir with year
        self.source_dir = os.path.join(self.source_dir, self.year)

        # set destination directory based on base directory which can be retrieved from the template runscript
        base_dir = Config_Extraction.get_var_from_runscript(os.path.join(self.runscript_dir, self.runscript_template),
                                                            "destination_dir")
        self.destination_dir = os.path.join(base_dir, "extracted_data", self.year)

    #
    # -----------------------------------------------------------------------------------
    #
    # auxiliary functions for keyboard interaction
    @staticmethod
    def check_data_indir(indir, silent=False, recursive=True):
        """
        Check recursively for existence era5 netCDF-files in indir.
        This is just a simplified check, i.e. the script will fail if the directory tree is not
        built up like '<indir>/YYYY/MM/'.
        :param indir: path to passed input directory
        :param silent: flag if print-statement are executed
        :param recursive: flag if one-level (!) recursive search should be performed
        :return: status with True confirming success
        """
        status = False
        if os.path.isdir(indir):
            # the built-in 'any'-function has a short-sircuit mechanism, i.e. returns True
            # if the first True element is met
            if recursive:
                fexist = any(glob.iglob(os.path.join(indir, "**", "*era5*.nc"), recursive=True))
            else:
                fexist = any(glob.iglob(os.path.join(indir, "*", "*era5*.nc")))

            if fexist:
                status = True
            else:
                if not silent: print("{0} does not contain any ERA5 netCDF-files.".format(indir))
        else:
            if not silent: print("Could not find data directory '{0}'.".format(indir))

        return status
    #
    # -----------------------------------------------------------------------------------
    #
    @staticmethod
    def check_year(year_in, silent=False):
        status = True
        if not year_in.isnumeric():
            status = False
            if not silent: print("{0} is not a numeric number.".format(year_in))

        if not int(year_in) > 1970:
            status = False
            if not silent: print("{0} must match the format YYYY and must be larger than 1970.")

        return status
#
# -----------------------------------------------------------------------------------
#