Skip to content
Snippets Groups Projects
Commit 49785d16 authored by Sebastian Achilles's avatar Sebastian Achilles
Browse files

JR H100: update stage

parent e15bbcdc
No related branches found
No related tags found
No related merge requests found
easyblock = 'ConfigureMake'
name = 'NVSHMEM'
version = '2.8.0'
versionsuffix = '-CUDA-%(cudaver)s'
local_cuda_version = '12.0'
homepage = 'https://developer.nvidia.com/nvshmem'
description = """NVSHMEM is a parallel programming interface based on OpenSHMEM that provides
efficient and scalable communication for NVIDIA GPU clusters. NVSHMEM creates a
global address space for data that spans the memory of multiple GPUs and can be
accessed with fine-grained GPU-initiated operations, CPU-initiated operations,
and operations on CUDA streams.
"""
toolchain = {'name': 'gompi', 'version': '2022a'}
download_instructions = """The sources of NVSHMEM can be downloaded at NVIDIA's webpage when you have signed up for
their (free) developer program:
https://developer.nvidia.com/nvshmem-downloads"""
sources = ['%(namelower)s_src_%(version)s-3.txz']
checksums = ['7d4ef226630a94b587d18e02c27decc8b41d6f4ee52a26e25644b23cd18da81f']
builddependencies = [
('Autotools', '20220317'),
('pkgconf', '1.8.0'),
]
dependencies = [
('CUDA', local_cuda_version, '', SYSTEM),
('NCCL', 'default', f'-CUDA-{local_cuda_version}'),
]
skipsteps = ['configure']
prebuildopts = 'export %s &&' % ' '.join([
'NVSHMEM_USE_GDRCOPY=1',
'GDRCOPY_HOME=${EBROOTGDRCOPY}',
'MPI_HOME=${EBROOTOPENMPI}',
'NVSHMEM_MPI_SUPPORT=1',
'NVSHMEMTEST_USE_MPI_LAUNCHER=1',
'NCCL_HOME=${EBROOTNCCL}',
'NVSHMEM_USE_NCCL=1',
'NVSHMEM_BUILDDIR=%(builddir)s',
'NVSHMEM_EXAMPLES_BUILDDIR=${NVSHMEM_BUILDDIR}/examples/obj',
'NVSHMEM_OTHERTEST_BUILDDIR=${NVSHMEM_BUILDDIR}/othertest/obj',
'NVSHMEM_TEST_BUILDDIR=${NVSHMEM_BUILDDIR}/test/obj',
'NVSHMEM_PERFTEST_BUILDDIR=${NVSHMEM_BUILDDIR}/perftest/obj',
'NVSHMEM_PREFIX=%(installdir)s',
'NVSHMEM_EXAMPLES_INSTALL=${NVSHMEM_PREFIX}/examples',
'NVSHMEM_OTHERTEST_INSTALL=${NVSHMEM_PREFIX}/othertest',
'NVSHMEM_PERFTEST_INSTALL=${NVSHMEM_PREFIX}/perftest',
'NVSHMEM_TEST_INSTALL=${NVSHMEM_PREFIX}/test',
])
preinstallopts = prebuildopts
sanity_check_paths = {
'files': ['lib/libnvshmem.a', 'lib/nvshmem_bootstrap_mpi.%s' % SHLIB_EXT],
'dirs': ['include']
}
modextravars = {'NVSHMEM_HOME': '%(installdir)s'}
moduleclass = 'devel'
name = 'nvidia-driver'
version = 'default'
realversion = '525.105.17'
homepage = 'https://developer.nvidia.com/cuda-toolkit'
description = f"""
This is a set of libraries normally installed by the NVIDIA driver installer.
The real version of this package is {realversion}.
"""
site_contacts = 'sc@fz-juelich.de'
toolchain = SYSTEM
source_urls = ['http://us.download.nvidia.com/tesla/%s/' % realversion]
sources = ['NVIDIA-Linux-x86_64-%s.run' % realversion]
checksums = ['c635a21a282c9b53485f19ebb64a0f4b536a968b94d4d97629e0bc547a58142a']
# To avoid conflicts between NVML and the kernel driver
postinstallcmds = ['rm %(installdir)s/lib64/libnvidia-ml.so*']
modluafooter = '''
add_property("arch","gpu")
'''
moduleclass = 'system'
easyblock = 'ConfigureMake'
name = 'OpenMPI' name = 'OpenMPI'
version = '4.1.4' version = '4.1.4'
...@@ -53,13 +55,28 @@ preconfigopts = ' && '.join([ ...@@ -53,13 +55,28 @@ preconfigopts = ' && '.join([
'' ''
]) ])
configopts = '--without-orte ' configopts = '--enable-shared '
configopts += '--with-hwloc=$EBROOTHWLOC ' # hwloc support
configopts += '--with-ucx=$EBROOTUCX '
configopts += '--with-verbs '
configopts += '--with-libevent=$EBROOTLIBEVENT '
configopts += '--without-orte '
configopts += '--without-psm2 ' configopts += '--without-psm2 '
configopts += '--disable-oshmem ' configopts += '--disable-oshmem '
configopts += '--with-cuda=$EBROOTCUDA '
configopts += '--with-ime=/opt/ddn/ime ' configopts += '--with-ime=/opt/ddn/ime '
configopts += '--with-gpfs ' configopts += '--with-gpfs '
# to enable SLURM integration (site-specific) # to enable SLURM integration (site-specific)
configopts += '--with-slurm --with-pmix=external --with-libevent=external --with-ompi-pmix-rte' configopts += '--with-slurm --with-pmix=external --with-libevent=external --with-ompi-pmix-rte'
local_libs = ["mpi_mpifh", "mpi", "ompitrace", "open-pal", "open-rte"]
sanity_check_paths = {
'files': ["bin/%s" % local_binfile for local_binfile in ["ompi_info", "opal_wrapper"]] +
["lib/lib%s.%s" % (local_libfile, SHLIB_EXT) for local_libfile in local_libs] +
["include/%s.h" % x for x in ["mpi-ext", "mpif-config",
"mpif", "mpi", "mpi_portable_platform"]],
'dirs': [],
}
moduleclass = 'mpi' moduleclass = 'mpi'
...@@ -7,7 +7,7 @@ easyblock = 'ConfigureMake' ...@@ -7,7 +7,7 @@ easyblock = 'ConfigureMake'
name = 'UCC' name = 'UCC'
version = 'default' version = 'default'
local_realversion = '1.1.0-rc1' local_realversion = '1.2.0-rc1'
homepage = 'https://www.openucx.org/' homepage = 'https://www.openucx.org/'
description = f"""UCC (Unified Collective Communication) is a collective description = f"""UCC (Unified Collective Communication) is a collective
...@@ -23,7 +23,7 @@ toolchainopts = {'pic': True} ...@@ -23,7 +23,7 @@ toolchainopts = {'pic': True}
source_urls = ['https://github.com/openucx/ucc/archive/refs/tags'] source_urls = ['https://github.com/openucx/ucc/archive/refs/tags']
sources = [f'v{local_realversion}.tar.gz'] sources = [f'v{local_realversion}.tar.gz']
checksums = [ checksums = [
'4af76d706a788af081c4a6ce566b6d4e33d75629ce9a8a7b8eec1760eff13168', # v1.1.0-rc1.tar.gz {'v1.2.0-rc1.tar.gz': 'ae6384eecec5054e2c5e960dfc03c083f5f98afaed17276a306c6fe27db4354b'},
] ]
builddependencies = [ builddependencies = [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment