##
# Copyright 2012-2021 Ghent University
#
# This file is part of EasyBuild,
# originally created by the HPC team of Ghent University (http://ugent.be/hpc/en),
# with support of Ghent University (http://ugent.be/hpc),
# the Flemish Supercomputer Centre (VSC) (https://www.vscentrum.be),
# Flemish Research Foundation (FWO) (http://www.fwo.be/en)
# and the Department of Economy, Science and Innovation (EWI) (http://www.ewi-vlaanderen.be/en).
#
# https://github.com/easybuilders/easybuild
#
# EasyBuild is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation v2.
#
# EasyBuild is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with EasyBuild.  If not, see <http://www.gnu.org/licenses/>.
##
"""
EasyBuild support for CUDA, implemented as an easyblock

Ref: https://speakerdeck.com/ajdecon/introduction-to-the-cuda-toolkit-for-building-applications

@author: George Tsouloupas (Cyprus Institute)
@author: Fotis Georgatos (Uni.lu)
@author: Kenneth Hoste (Ghent University)
@author: Damian Alvarez (Forschungszentrum Juelich)
@author: Ward Poelmans (Free University of Brussels)
@author: Robert Mijakovic (LuxProvide S.A.)
"""
import os
import re
import stat

from distutils.version import LooseVersion

from easybuild.easyblocks.generic.binary import Binary
from easybuild.framework.easyconfig import CUSTOM
from easybuild.tools.build_log import EasyBuildError
from easybuild.tools.config import IGNORE
from easybuild.tools.filetools import adjust_permissions, change_dir, copy_dir, expand_glob_paths
from easybuild.tools.filetools import patch_perl_script_autoflush, remove_file, symlink, which, write_file
from easybuild.tools.run import run_cmd, run_cmd_qa
from easybuild.tools.systemtools import AARCH64, POWER, X86_64, get_cpu_architecture, get_shared_lib_ext
import easybuild.tools.environment as env

# Wrapper script definition
WRAPPER_TEMPLATE = """#!/bin/sh
echo "$@" | grep -e '-ccbin' -e '--compiler-bindir' > /dev/null
if [ $? -eq 0 ];
then
        echo "ERROR: do not set -ccbin or --compiler-bindir when using the `basename $0` wrapper"
else
        nvcc -ccbin=%s "$@"
        exit $?
fi """


class EB_CUDA(Binary):
    """
    Support for installing CUDA.
    """

    @staticmethod
    def extra_options():
        """Create a set of wrappers based on a list determined by the easyconfig file"""
        extra_vars = {
            'host_compilers': [None, "Host compilers for which a wrapper will be generated", CUSTOM]
        }
        return Binary.extra_options(extra_vars)

    def __init__(self, *args, **kwargs):
        """ Init the cuda easyblock adding a new cudaarch template var """
        myarch = get_cpu_architecture()
        if myarch == AARCH64:
            cudaarch = '_sbsa'
        elif myarch == POWER:
            cudaarch = '_ppc64le'
        elif myarch == X86_64:
            cudaarch = ''
        else:
            raise EasyBuildError("Architecture %s is not supported for CUDA on EasyBuild", myarch)

        super(EB_CUDA, self).__init__(*args, **kwargs)

        self.cfg.template_values['cudaarch'] = cudaarch
        self.cfg.generate_template_values()

    def extract_step(self):
        """Extract installer to have more control, e.g. options, patching Perl scripts, etc."""
        execpath = self.src[0]['path']
        run_cmd("/bin/sh " + execpath + " --noexec --nox11 --target " + self.builddir)
        self.src[0]['finalpath'] = self.builddir

    def install_step(self):
        """Install CUDA using Perl install script."""

        # define how to run the installer
        # script has /usr/bin/perl hardcoded, but we want to have control over which perl is being used
        if LooseVersion(self.version) <= LooseVersion("5"):
            install_interpreter = "perl"
            install_script = "install-linux.pl"
            self.cfg.update('installopts', '--prefix=%s' % self.installdir)
        elif LooseVersion(self.version) > LooseVersion("5") and LooseVersion(self.version) < LooseVersion("10.1"):
            install_interpreter = "perl"
            install_script = "cuda-installer.pl"
            # note: samples are installed by default
            self.cfg.update('installopts', "-verbose -silent -toolkitpath=%s -toolkit" % self.installdir)
        else:
            install_interpreter = ""
            install_script = "./cuda-installer"
            # samples are installed in two places with identical copies:
            # self.installdir/samples and $HOME/NVIDIA_CUDA-11.2_Samples
            # changing the second location (the one under $HOME) to a scratch location using
            # --samples --samplespath=self.builddir
            # avoids the duplicate and pollution of the home directory of the installer.
            self.cfg.update('installopts',
                            "--silent --samples --samplespath=%s --toolkit --toolkitpath=%s --defaultroot=%s" % (
                                self.builddir, self.installdir, self.installdir))
            # When eb is called via sudo -u someuser -i eb ..., the installer may try to chown samples to the
            # original user using the SUDO_USER environment variable, which fails
            if "SUDO_USER" in os.environ:
                self.log.info("SUDO_USER was defined as '%s', need to unset it to avoid problems..." %
                              os.environ["SUDO_USER"])
                del os.environ["SUDO_USER"]

        if LooseVersion("10.0") < LooseVersion(self.version) < LooseVersion("10.2") and get_cpu_architecture() == POWER:
            # Workaround for
            # https://devtalk.nvidia.com/default/topic/1063995/cuda-setup-and-installation/cuda-10-1-243-10-1-update-2-ppc64le-run-file-installation-issue/
            install_script = " && ".join([
                "mkdir -p %(installdir)s/targets/ppc64le-linux/include",
                "([ -e %(installdir)s/include ] || ln -s targets/ppc64le-linux/include %(installdir)s/include)",
                "cp -r %(builddir)s/builds/cublas/src %(installdir)s/.",
                install_script
            ]) % {
                'installdir': self.installdir,
                'builddir': self.builddir
            }

        # Use C locale to avoid localized questions and crash on CUDA 10.1
        self.cfg.update('preinstallopts', "export LANG=C && ")

        cmd = "%(preinstallopts)s %(interpreter)s %(script)s %(installopts)s" % {
            'preinstallopts': self.cfg['preinstallopts'],
            'interpreter': install_interpreter,
            'script': install_script,
            'installopts': self.cfg['installopts']
        }

        # prepare for running install script autonomously
        qanda = {}
        stdqa = {
            # this question is only asked if CUDA tools are already available system-wide
            r"Would you like to remove all CUDA files under .*? (yes/no/abort): ": "no",
        }
        noqanda = [
            r"^Configuring",
            r"Installation Complete",
            r"Verifying archive integrity.*",
            r"^Uncompressing NVIDIA CUDA",
            r".* -> .*",
        ]

        # patch install script to handle Q&A autonomously
        if install_interpreter == "perl":
            patch_perl_script_autoflush(os.path.join(self.builddir, install_script))
            p5lib = os.getenv('PERL5LIB', '')
            if p5lib == '':
                p5lib = self.builddir
            else:
                p5lib = os.pathsep.join([self.builddir, p5lib])
            env.setvar('PERL5LIB', p5lib)

        # make sure $DISPLAY is not defined, which may lead to (weird) problems
        # this is workaround for not being able to specify --nox11 to the Perl install scripts
        if 'DISPLAY' in os.environ:
            os.environ.pop('DISPLAY')

        # cuda-installer creates /tmp/cuda-installer.log (ignoring TMPDIR)
        # Try to remove it before running the installer.
        # This will fail with a usable error if it can't be removed
        # instead of segfaulting in the cuda-installer.
        remove_file('/tmp/cuda-installer.log')

        # overriding maxhits default value to 1000 (seconds to wait for nothing to change in the output
        # without seeing a known question)
        run_cmd_qa(cmd, qanda, std_qa=stdqa, no_qa=noqanda, log_all=True, simple=True, maxhits=1000)

        # Remove the cuda-installer log file
        remove_file('/tmp/cuda-installer.log')

        # check if there are patches to apply
        if len(self.src) > 1:
            for patch in self.src[1:]:
                self.log.debug("Running patch %s", patch['name'])
                run_cmd("/bin/sh " + patch['path'] + " --accept-eula --silent --installdir=" + self.installdir)

    def post_install_step(self):
        """
        Create wrappers for the specified host compilers, generate the appropriate stub symlinks and create version
        independent pkgconfig files
        """
        def create_wrapper(wrapper_name, wrapper_comp):
            """Create for a particular compiler, with a particular name"""
            wrapper_f = os.path.join(self.installdir, 'bin', wrapper_name)
            write_file(wrapper_f, WRAPPER_TEMPLATE % wrapper_comp)
            perms = stat.S_IXUSR | stat.S_IRUSR | stat.S_IXGRP | stat.S_IRGRP | stat.S_IXOTH | stat.S_IROTH
            adjust_permissions(wrapper_f, perms)

        # Prepare wrappers to handle a default host compiler other than g++
        for comp in (self.cfg['host_compilers'] or []):
            create_wrapper('nvcc_%s' % comp, comp)

        ldconfig = which('ldconfig', log_ok=False, on_error=IGNORE)
        sbin_dirs = ['/sbin', '/usr/sbin']
        if not ldconfig:
            # ldconfig is usually in /sbin or /usr/sbin
            for cand_path in sbin_dirs:
                if os.path.exists(os.path.join(cand_path, 'ldconfig')):
                    ldconfig = os.path.join(cand_path, 'ldconfig')
                    break

        # fail if we couldn't find ldconfig, because it's really needed
        if ldconfig:
            self.log.info("ldconfig found at %s", ldconfig)
        else:
            path = os.environ.get('PATH', '')
            raise EasyBuildError("Unable to find 'ldconfig' in $PATH (%s), nor in any of %s", path, sbin_dirs)

        stubs_dir = os.path.join(self.installdir, 'lib64', 'stubs')
        # Run ldconfig to create missing symlinks in the stubs directory (libcuda.so.1, etc)
        cmd = ' '.join([ldconfig, '-N', stubs_dir])
        run_cmd(cmd)

        # GCC searches paths in LIBRARY_PATH and the system paths suffixed with ../lib64 or ../lib first
        # This means stubs/../lib64 is searched before the system /lib64 folder containing a potentially older libcuda.
        # See e.g. https://github.com/easybuilders/easybuild-easyconfigs/issues/12348
        # Workaround: Create a copy that matches this pattern
        new_stubs_dir = os.path.join(self.installdir, 'stubs')
        copy_dir(stubs_dir, os.path.join(new_stubs_dir, 'lib64'))
        # Also create the lib dir as a symlink
        symlink('lib64', os.path.join(new_stubs_dir, 'lib'), use_abspath_source=False)

        # Packages like xpra look for version independent pc files.
        # See e.g. https://github.com/Xpra-org/xpra/blob/master/setup.py#L206
        # Distros provide these files, so let's do it here too
        pkgconfig_dir = os.path.join(self.installdir, 'pkgconfig')
        pc_files = expand_glob_paths([os.path.join(pkgconfig_dir, '*.pc')])
        change_dir(pkgconfig_dir)
        for f in pc_files:
            f = os.path.basename(f)
            l = re.sub('-[0-9]*.?[0-9]*(.[0-9]*)?.pc', '.pc', f)
            symlink(f, l, use_abspath_source=False)

        super(EB_CUDA, self).post_install_step()

    def sanity_check_step(self):
        """Custom sanity check for CUDA."""

        shlib_ext = get_shared_lib_ext()

        chk_libdir = ["lib64", "lib"]
        culibs = ["cublas", "cudart", "cufft", "curand", "cusparse"]
        custom_paths = {
            'files': [os.path.join("bin", x) for x in ["fatbinary", "nvcc", "nvlink", "ptxas"]] +
            [os.path.join("%s", "lib%s.%s") % (x, y, shlib_ext) for x in chk_libdir for y in culibs],
            'dirs': ["include"],
        }

        if LooseVersion(self.version) > LooseVersion('5'):
            custom_paths['files'].append(os.path.join('samples', 'Makefile'))
        if LooseVersion(self.version) < LooseVersion('7'):
            custom_paths['files'].append(os.path.join('open64', 'bin', 'nvopencc'))
        if LooseVersion(self.version) >= LooseVersion('7'):
            custom_paths['files'].append(os.path.join("extras", "CUPTI", "lib64", "libcupti.%s") % shlib_ext)
            custom_paths['dirs'].append(os.path.join("extras", "CUPTI", "include"))

        # Just a subset of files are checked, since the whole list is likely to change, and irrelevant in most cases
        # anyway
        pc_files = ['cublas.pc', 'cudart.pc', 'cuda.pc', 'nvidia-ml.pc', 'nvjpeg.pc']
        custom_paths['files'] = custom_paths['files'] + [os.path.join('pkgconfig', x) for x in pc_files]

        super(EB_CUDA, self).sanity_check_step(custom_paths=custom_paths)

    def make_module_extra(self):
        """Set the install directory as CUDA_HOME, CUDA_ROOT, CUDA_PATH."""

        # avoid adding of installation directory to $PATH (cfr. Binary easyblock) since that may cause trouble,
        # for example when there's a clash between command name and a subdirectory in the installation directory
        # (like compute-sanitizer)
        self.cfg['prepend_to_path'] = False

        txt = super(EB_CUDA, self).make_module_extra()
        txt += self.module_generator.set_environment('CUDA_HOME', self.installdir)
        txt += self.module_generator.set_environment('CUDA_ROOT', self.installdir)
        txt += self.module_generator.set_environment('CUDA_PATH', self.installdir)
        self.log.debug("make_module_extra added this: %s", txt)
        return txt

    def make_module_req_guess(self):
        """Specify CUDA custom values for PATH etc."""

        guesses = super(EB_CUDA, self).make_module_req_guess()

        # The dirs should be in the order ['open64/bin', 'bin']
        bin_path = []
        if LooseVersion(self.version) < LooseVersion('7'):
            bin_path.append(os.path.join('open64', 'bin'))
        bin_path.append('bin')

        lib_path = ['lib64']
        inc_path = ['include']
        if LooseVersion(self.version) >= LooseVersion('7'):
            lib_path.append(os.path.join('extras', 'CUPTI', 'lib64'))
            inc_path.append(os.path.join('extras', 'CUPTI', 'include'))
            bin_path.append(os.path.join('nvvm', 'bin'))
            lib_path.append(os.path.join('nvvm', 'lib64'))
            inc_path.append(os.path.join('nvvm', 'include'))

        guesses.update({
            'PATH': bin_path,
            'LD_LIBRARY_PATH': lib_path,
            'LIBRARY_PATH': ['lib64', os.path.join('stubs', 'lib64')],
            'CPATH': inc_path,
            'PKG_CONFIG_PATH': ['pkgconfig'],
        })

        return guesses