diff --git a/Custom_EasyBlocks/openmpi.py b/Custom_EasyBlocks/openmpi.py new file mode 100644 index 0000000000000000000000000000000000000000..c75373b81f8f3e530c0f3b21762780b007edf3e7 --- /dev/null +++ b/Custom_EasyBlocks/openmpi.py @@ -0,0 +1,230 @@ +## +# Copyright 2019-2022 Ghent University +# +# This file is part of EasyBuild, +# originally created by the HPC team of Ghent University (http://ugent.be/hpc/en), +# with support of Ghent University (http://ugent.be/hpc), +# the Flemish Supercomputer Centre (VSC) (https://www.vscentrum.be), +# Flemish Research Foundation (FWO) (http://www.fwo.be/en) +# and the Department of Economy, Science and Innovation (EWI) (http://www.ewi-vlaanderen.be/en). +# +# https://github.com/easybuilders/easybuild +# +# EasyBuild is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation v2. +# +# EasyBuild is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with EasyBuild. If not, see <http://www.gnu.org/licenses/>. +## +""" +EasyBuild support for OpenMPI, implemented as an easyblock + +@author: Kenneth Hoste (Ghent University) +@author: Robert Mijakovic (LuxProvide) +""" +import os +import re +from distutils.version import LooseVersion + +import easybuild.tools.toolchain as toolchain +from easybuild.easyblocks.generic.configuremake import ConfigureMake +from easybuild.framework.easyconfig.constants import EASYCONFIG_CONSTANTS +from easybuild.tools.build_log import EasyBuildError +from easybuild.tools.config import build_option +from easybuild.tools.modules import get_software_root +from easybuild.tools.systemtools import check_os_dependency, get_shared_lib_ext +from easybuild.tools.toolchain.mpi import get_mpi_cmd_template + + +class EB_OpenMPI(ConfigureMake): + """OpenMPI easyblock.""" + + def configure_step(self): + """Custom configuration step for OpenMPI.""" + + def config_opt_used(key, enable_opt=False): + """Helper function to check whether a configure option is already specified in 'configopts'.""" + if enable_opt: + regex = '--(disable|enable)-%s' % key + else: + regex = '--(with|without)-%s' % key + + return bool(re.search(regex, self.cfg['configopts'])) + + config_opt_names = [ + # suppress failure modes in relation to mpirun path + 'mpirun-prefix-by-default', + # build shared libraries + 'shared', + ] + + for key in config_opt_names: + if not config_opt_used(key, enable_opt=True): + self.cfg.update('configopts', '--enable-%s' % key) + + # List of EasyBuild dependencies for which OMPI has known options + known_dependencies = ('CUDA', 'hwloc', 'libevent', 'libfabric', 'PMIx', 'UCX') + # Value to use for `--with-<dep>=<value>` if the dependency is not specified in the easyconfig + # No entry is interpreted as no option added at all + # This is to make builds reproducible even when the system libraries are changed and avoids failures + # due to e.g. finding only PMIx but not libevent on the system + unused_dep_value = dict() + # Known options since version 3.0 (no earlier ones checked) + if LooseVersion(self.version) >= LooseVersion('3.0'): + # Default to disable the option with "no" + unused_dep_value = {dep: 'no' for dep in known_dependencies} + # For these the default is to use an internal copy and not using any is not supported + for dep in ('hwloc', 'libevent', 'PMIx'): + unused_dep_value[dep] = 'internal' + + # handle dependencies + for dep in known_dependencies: + opt_name = dep.lower() + # If the option is already used, don't add it + if config_opt_used(opt_name): + continue + + # libfabric option renamed in OpenMPI 3.1.0 to ofi + if dep == 'libfabric' and LooseVersion(self.version) >= LooseVersion('3.1'): + opt_name = 'ofi' + # Check new option name. They are synonyms since 3.1.0 for backward compatibility + if config_opt_used(opt_name): + continue + + dep_root = get_software_root(dep) + # If the dependency is loaded, specify its path, else use the "unused" value, if any + if dep_root: + opt_value = dep_root + else: + opt_value = unused_dep_value.get(dep) + if opt_value is not None: + self.cfg.update('configopts', '--with-%s=%s' % (opt_name, opt_value)) + + if bool(get_software_root('PMIx')) != bool(get_software_root('libevent')): + raise EasyBuildError('You must either use both PMIx and libevent as dependencies or none of them. ' + 'This is to enforce the same libevent is used for OpenMPI as for PMIx or ' + 'the behavior may be unpredictable.') + + # check whether VERBS support should be enabled + if not config_opt_used('verbs'): + + # for OpenMPI v4.x, the openib BTL should be disabled when UCX is used; + # this is required to avoid "error initializing an OpenFabrics device" warnings, + # see also https://www.open-mpi.org/faq/?category=all#ofa-device-error + is_ucx_enabled = ('--with-ucx' in self.cfg['configopts'] and + '--with-ucx=no' not in self.cfg['configopts']) + if LooseVersion(self.version) >= LooseVersion('4.0.0') and is_ucx_enabled: + verbs = False + else: + # auto-detect based on available OS packages + os_packages = EASYCONFIG_CONSTANTS['OS_PKG_IBVERBS_DEV'][0] + verbs = any(check_os_dependency(osdep) for osdep in os_packages) + # for OpenMPI v5.x, the verbs support is removed, only UCX is available + # see https://github.com/open-mpi/ompi/pull/6270 + if LooseVersion(self.version) < LooseVersion('5.0.0'): + if verbs: + self.cfg.update('configopts', '--with-verbs') + else: + self.cfg.update('configopts', '--without-verbs') + + super(EB_OpenMPI, self).configure_step() + + def test_step(self): + """Test step for OpenMPI""" + # Default to `make check` if nothing is set. Disable with "runtest = False" in the EC + if self.cfg['runtest'] is None: + self.cfg['runtest'] = 'check' + + super(EB_OpenMPI, self).test_step() + + def load_module(self, *args, **kwargs): + """ + Load (temporary) module file, after resetting to initial environment. + + Also put RPATH wrappers back in place if needed, to ensure that sanity check commands work as expected. + """ + super(EB_OpenMPI, self).load_module(*args, **kwargs) + + # ensure RPATH wrappers are in place, otherwise compiling minimal test programs will fail + if build_option('rpath'): + if self.toolchain.options.get('rpath', True): + self.toolchain.prepare_rpath_wrappers(rpath_filter_dirs=self.rpath_filter_dirs, + rpath_include_dirs=self.rpath_include_dirs) + + def sanity_check_step(self): + """Custom sanity check for OpenMPI.""" + + bin_names = ['mpicc', 'mpicxx', 'mpif90', 'mpifort', 'ompi_info', 'opal_wrapper'] + if LooseVersion(self.version) >= LooseVersion('5.0.0'): + bin_names.append('prterun') + else: + if '--with-orte=no' not in self.cfg['configopts'] and '--without-orte' not in self.cfg['configopts']: + bin_names.extend(['orterun', 'mpirun']) + bin_files = [os.path.join('bin', x) for x in bin_names] + + shlib_ext = get_shared_lib_ext() + lib_names = ['mpi_mpifh', 'mpi', 'open-pal'] + if LooseVersion(self.version) >= LooseVersion('5.0.0'): + lib_names.append('prrte') + else: + lib_names.extend(['ompitrace', 'open-rte']) + lib_files = [os.path.join('lib', 'lib%s.%s' % (x, shlib_ext)) for x in lib_names] + + inc_names = ['mpi-ext', 'mpif-config', 'mpif', 'mpi', 'mpi_portable_platform'] + if LooseVersion(self.version) >= LooseVersion('5.0.0'): + inc_names.append('prte') + inc_files = [os.path.join('include', x + '.h') for x in inc_names] + + custom_paths = { + 'files': bin_files + inc_files + lib_files, + 'dirs': [], + } + + # make sure MPI compiler wrappers pick up correct compilers + expected = { + 'mpicc': os.getenv('CC', 'gcc'), + 'mpicxx': os.getenv('CXX', 'g++'), + 'mpifort': os.getenv('FC', 'gfortran'), + 'mpif90': os.getenv('F90', 'gfortran'), + } + # actual pattern for gfortran is "GNU Fortran" + for key in ['mpifort', 'mpif90']: + if expected[key] == 'gfortran': + expected[key] = "GNU Fortran" + # for PGI, correct pattern is "pgfortran" with mpif90 + if expected['mpif90'] == 'pgf90': + expected['mpif90'] = 'pgfortran' + + custom_commands = ["%s --version | grep '%s'" % (key, expected[key]) for key in sorted(expected.keys())] + + # Add minimal test program to sanity checks + # Run with correct MPI launcher + mpi_cmd_tmpl, params = get_mpi_cmd_template(toolchain.OPENMPI, dict(), mpi_version=self.version) + # Limit number of ranks to 8 to avoid it failing due to hyperthreading + ranks = min(8, self.cfg['parallel']) + for src, compiler in (('hello_c.c', 'mpicc'), ('hello_mpifh.f', 'mpifort'), ('hello_usempi.f90', 'mpif90')): + src_path = os.path.join(self.cfg['start_dir'], 'examples', src) + if os.path.exists(src_path): + test_exe = os.path.join(self.builddir, 'mpi_test_' + os.path.splitext(src)[0]) + self.log.info("Adding minimal MPI test program to sanity checks: %s", test_exe) + + # Build test binary + custom_commands.append("%s %s -o %s" % (compiler, src_path, test_exe)) + + # Run the test if chosen + if build_option('mpi_tests'): + params.update({'nr_ranks': ranks, 'cmd': test_exe}) + # Allow oversubscription for this test (in case of hyperthreading) + custom_commands.append("OMPI_MCA_rmaps_base_oversubscribe=1 " + mpi_cmd_tmpl % params) + # Run with 1 process which may trigger other bugs + # See https://github.com/easybuilders/easybuild-easyconfigs/issues/12978 + params['nr_ranks'] = 1 + custom_commands.append(mpi_cmd_tmpl % params) + + super(EB_OpenMPI, self).sanity_check_step(custom_paths=custom_paths, custom_commands=custom_commands) diff --git a/Custom_Hooks/eb_hooks.py b/Custom_Hooks/eb_hooks.py index e666341bfc9afc31dc18973963759c4087495954..537d6a7be851dcd51958096d339b009e22e3de81 100644 --- a/Custom_Hooks/eb_hooks.py +++ b/Custom_Hooks/eb_hooks.py @@ -71,16 +71,18 @@ VETOED_INSTALLATIONS = { } TWEAKABLE_DEPENDENCIES = { - 'UCX': 'default', + #'Boost': '1.78.0', + #'Boost.Python': '1.78.0', 'CUDA': '11.7', - 'Mesa': ('OpenGL', '2022a'), - 'libglvnd': ('OpenGL', '2022a'), - #'libxc': '5.1.7', 'glu': ('OpenGL', '2022a'), 'glew': ('OpenGL', '2022a'), + 'libglvnd': ('OpenGL', '2022a'), + #'libxc': '5.1.7', + 'Mesa': ('OpenGL', '2022a'), + 'NCCL': 'default', 'pkg-config': ('pkgconf', '1.8.0'), - #'Boost': '1.78.0', - #'Boost.Python': '1.78.0', + 'UCC': 'default', + 'UCX': 'default', } SIDECOMPILERS = ['AOCC', 'Clang'] diff --git a/Golden_Repo/n/NCCL/NCCL-2.14.3-1-GCCcore-11.3.0-CUDA-11.7.eb b/Golden_Repo/n/NCCL/NCCL-default-GCCcore-11.3.0-CUDA-11.7.eb similarity index 74% rename from Golden_Repo/n/NCCL/NCCL-2.14.3-1-GCCcore-11.3.0-CUDA-11.7.eb rename to Golden_Repo/n/NCCL/NCCL-default-GCCcore-11.3.0-CUDA-11.7.eb index 5c27b17ca75f06764ed5adc098fce6d8c29873a3..00fc8cb669d576810019870e143242d26fc4d697 100644 --- a/Golden_Repo/n/NCCL/NCCL-2.14.3-1-GCCcore-11.3.0-CUDA-11.7.eb +++ b/Golden_Repo/n/NCCL/NCCL-default-GCCcore-11.3.0-CUDA-11.7.eb @@ -1,5 +1,6 @@ name = 'NCCL' -version = '2.14.3-1' +version = 'default' +local_realversion = '2.14.3-1' versionsuffix = '-CUDA-%(cudashortver)s' homepage = 'https://developer.nvidia.com/nccl' @@ -10,15 +11,15 @@ toolchain = {'name': 'GCCcore', 'version': '11.3.0'} github_account = 'NVIDIA' sources = [{ - 'filename': '%(name)s-%(version)s.tar.gz', + 'filename': f'%(name)s-{local_realversion}.tar.gz', 'git_config': { 'url': 'https://github.com/NVIDIA/', 'repo_name': 'nccl', - 'tag': 'v%(version)s', + 'tag': f'v{local_realversion}', 'recursive': True, }, }] -checksums = ['a05e153f0508e05be76e19b8262c2ea4f8996aedaaa873dcd241e061202422a6'] +checksums = ['8108e00bcf995666330484cf24a5cc52c3a392acaa5aeabf88e2e168e07c291d'] builddependencies = [('binutils', '2.38')] diff --git a/Golden_Repo/u/UCC/NCCL-2.14.3-1.tar.gz b/Golden_Repo/u/UCC/NCCL-2.14.3-1.tar.gz deleted file mode 100644 index 3400371cf58a722255fc02131699649f13f0a0b3..0000000000000000000000000000000000000000 Binary files a/Golden_Repo/u/UCC/NCCL-2.14.3-1.tar.gz and /dev/null differ diff --git a/Golden_Repo/u/UCC/UCC-default-GCCcore-11.3.0.eb b/Golden_Repo/u/UCC/UCC-default-GCCcore-11.3.0.eb index 5450bf72c12d7caa4c672507cf165fbc90c76624..66990644ffefa65c01fe0fc700c2ba0025ed74d8 100644 --- a/Golden_Repo/u/UCC/UCC-default-GCCcore-11.3.0.eb +++ b/Golden_Repo/u/UCC/UCC-default-GCCcore-11.3.0.eb @@ -7,7 +7,7 @@ easyblock = 'ConfigureMake' name = 'UCC' version = 'default' -local_version = '1.1.0-rc1' +local_realversion = '1.1.0-rc1' homepage = 'https://www.openucx.org/' description = """UCC (Unified Collective Communication) is a collective @@ -19,7 +19,7 @@ toolchain = {'name': 'GCCcore', 'version': '11.3.0'} toolchainopts = {'pic': True} source_urls = ['https://github.com/openucx/ucc/archive/refs/tags'] -sources = [f'v{local_version}.tar.gz'] +sources = [f'v{local_realversion}.tar.gz'] checksums = [ '4af76d706a788af081c4a6ce566b6d4e33d75629ce9a8a7b8eec1760eff13168', # v1.1.0-rc1.tar.gz ] @@ -32,7 +32,7 @@ builddependencies = [ dependencies = [ ('UCX', 'default', '', SYSTEM), ('CUDA', '11.7', '', SYSTEM), - ('NCCL', '2.14.3-1', '-CUDA-%(cudashortver)s'), + ('NCCL', 'default', '-CUDA-%(cudashortver)s'), ] preconfigopts = "./autogen.sh && "