From 49785d16d904135d9da45ec977524f74b7b14d02 Mon Sep 17 00:00:00 2001
From: Sebastian Achilles <s.achilles@fz-juelich.de>
Date: Sat, 3 Jun 2023 16:39:25 +0200
Subject: [PATCH] JR H100: update stage

---
 .../NVSHMEM-2.8.0-gompi-2022a-CUDA-12.0.eb    | 71 +++++++++++++++++++
 .../n/nvidia-driver/nvidia-driver-default.eb  | 27 +++++++
 .../o/OpenMPI/OpenMPI-4.1.4-GCC-11.3.0.eb     | 21 +++++-
 .../u/UCC/UCC-default-GCCcore-11.3.0.eb       |  4 +-
 4 files changed, 119 insertions(+), 4 deletions(-)
 create mode 100644 Overlays/jureca_spr_overlay/n/NVSHMEM/NVSHMEM-2.8.0-gompi-2022a-CUDA-12.0.eb
 create mode 100644 Overlays/jureca_spr_overlay/n/nvidia-driver/nvidia-driver-default.eb

diff --git a/Overlays/jureca_spr_overlay/n/NVSHMEM/NVSHMEM-2.8.0-gompi-2022a-CUDA-12.0.eb b/Overlays/jureca_spr_overlay/n/NVSHMEM/NVSHMEM-2.8.0-gompi-2022a-CUDA-12.0.eb
new file mode 100644
index 000000000..0d7ea6832
--- /dev/null
+++ b/Overlays/jureca_spr_overlay/n/NVSHMEM/NVSHMEM-2.8.0-gompi-2022a-CUDA-12.0.eb
@@ -0,0 +1,71 @@
+easyblock = 'ConfigureMake'
+
+name = 'NVSHMEM'
+version = '2.8.0'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+local_cuda_version = '12.0'
+
+homepage = 'https://developer.nvidia.com/nvshmem'
+description = """NVSHMEM is a parallel programming interface based on OpenSHMEM that provides 
+efficient and scalable communication for NVIDIA GPU clusters. NVSHMEM creates a 
+global address space for data that spans the memory of multiple GPUs and can be 
+accessed with fine-grained GPU-initiated operations, CPU-initiated operations, 
+and operations on CUDA streams.
+"""
+
+toolchain = {'name': 'gompi', 'version': '2022a'}
+
+download_instructions = """The sources of NVSHMEM can be downloaded at NVIDIA's webpage when you have signed up for
+their (free) developer program:
+https://developer.nvidia.com/nvshmem-downloads"""
+
+sources = ['%(namelower)s_src_%(version)s-3.txz']
+checksums = ['7d4ef226630a94b587d18e02c27decc8b41d6f4ee52a26e25644b23cd18da81f']
+
+builddependencies = [
+    ('Autotools', '20220317'),
+    ('pkgconf', '1.8.0'),
+]
+
+dependencies = [
+    ('CUDA', local_cuda_version, '', SYSTEM),
+    ('NCCL', 'default', f'-CUDA-{local_cuda_version}'),
+]
+
+skipsteps = ['configure']
+
+prebuildopts = 'export %s &&' % ' '.join([
+    'NVSHMEM_USE_GDRCOPY=1',
+    'GDRCOPY_HOME=${EBROOTGDRCOPY}',
+
+    'MPI_HOME=${EBROOTOPENMPI}',
+    'NVSHMEM_MPI_SUPPORT=1',
+    'NVSHMEMTEST_USE_MPI_LAUNCHER=1',
+
+    'NCCL_HOME=${EBROOTNCCL}',
+    'NVSHMEM_USE_NCCL=1',
+
+    'NVSHMEM_BUILDDIR=%(builddir)s',
+    'NVSHMEM_EXAMPLES_BUILDDIR=${NVSHMEM_BUILDDIR}/examples/obj',
+    'NVSHMEM_OTHERTEST_BUILDDIR=${NVSHMEM_BUILDDIR}/othertest/obj',
+    'NVSHMEM_TEST_BUILDDIR=${NVSHMEM_BUILDDIR}/test/obj',
+    'NVSHMEM_PERFTEST_BUILDDIR=${NVSHMEM_BUILDDIR}/perftest/obj',
+
+    'NVSHMEM_PREFIX=%(installdir)s',
+    'NVSHMEM_EXAMPLES_INSTALL=${NVSHMEM_PREFIX}/examples',
+    'NVSHMEM_OTHERTEST_INSTALL=${NVSHMEM_PREFIX}/othertest',
+    'NVSHMEM_PERFTEST_INSTALL=${NVSHMEM_PREFIX}/perftest',
+    'NVSHMEM_TEST_INSTALL=${NVSHMEM_PREFIX}/test',
+])
+
+preinstallopts = prebuildopts
+
+sanity_check_paths = {
+    'files': ['lib/libnvshmem.a', 'lib/nvshmem_bootstrap_mpi.%s' % SHLIB_EXT],
+    'dirs': ['include']
+}
+
+modextravars = {'NVSHMEM_HOME': '%(installdir)s'}
+
+moduleclass = 'devel'
diff --git a/Overlays/jureca_spr_overlay/n/nvidia-driver/nvidia-driver-default.eb b/Overlays/jureca_spr_overlay/n/nvidia-driver/nvidia-driver-default.eb
new file mode 100644
index 000000000..2900da59e
--- /dev/null
+++ b/Overlays/jureca_spr_overlay/n/nvidia-driver/nvidia-driver-default.eb
@@ -0,0 +1,27 @@
+name = 'nvidia-driver'
+version = 'default'
+realversion = '525.105.17'
+
+homepage = 'https://developer.nvidia.com/cuda-toolkit'
+description = f"""
+This is a set of libraries normally installed by the NVIDIA driver installer.
+
+The real version of this package is {realversion}.
+"""
+
+site_contacts = 'sc@fz-juelich.de'
+
+toolchain = SYSTEM
+
+source_urls = ['http://us.download.nvidia.com/tesla/%s/' % realversion]
+sources = ['NVIDIA-Linux-x86_64-%s.run' % realversion]
+checksums = ['c635a21a282c9b53485f19ebb64a0f4b536a968b94d4d97629e0bc547a58142a']
+
+# To avoid conflicts between NVML and the kernel driver
+postinstallcmds = ['rm %(installdir)s/lib64/libnvidia-ml.so*']
+
+modluafooter = '''
+add_property("arch","gpu")
+'''
+
+moduleclass = 'system'
diff --git a/Overlays/jureca_spr_overlay/o/OpenMPI/OpenMPI-4.1.4-GCC-11.3.0.eb b/Overlays/jureca_spr_overlay/o/OpenMPI/OpenMPI-4.1.4-GCC-11.3.0.eb
index b3e731028..18778a139 100644
--- a/Overlays/jureca_spr_overlay/o/OpenMPI/OpenMPI-4.1.4-GCC-11.3.0.eb
+++ b/Overlays/jureca_spr_overlay/o/OpenMPI/OpenMPI-4.1.4-GCC-11.3.0.eb
@@ -1,3 +1,5 @@
+easyblock = 'ConfigureMake'
+
 name = 'OpenMPI'
 version = '4.1.4'
 
@@ -51,15 +53,30 @@ preconfigopts = ' && '.join([
     'aclocal',
     'automake',
     ''
-])
+ ])
 
-configopts = '--without-orte '
+configopts = '--enable-shared '
+configopts += '--with-hwloc=$EBROOTHWLOC '  # hwloc support
+configopts += '--with-ucx=$EBROOTUCX '
+configopts += '--with-verbs '
+configopts += '--with-libevent=$EBROOTLIBEVENT '
+configopts += '--without-orte '
 configopts += '--without-psm2 '
 configopts += '--disable-oshmem '
+configopts += '--with-cuda=$EBROOTCUDA '
 configopts += '--with-ime=/opt/ddn/ime '
 configopts += '--with-gpfs '
 
 # to enable SLURM integration (site-specific)
 configopts += '--with-slurm --with-pmix=external --with-libevent=external --with-ompi-pmix-rte'
 
+local_libs = ["mpi_mpifh", "mpi", "ompitrace", "open-pal", "open-rte"]
+sanity_check_paths = {
+    'files': ["bin/%s" % local_binfile for local_binfile in ["ompi_info", "opal_wrapper"]] +
+             ["lib/lib%s.%s" % (local_libfile, SHLIB_EXT) for local_libfile in local_libs] +
+             ["include/%s.h" % x for x in ["mpi-ext", "mpif-config",
+                                           "mpif", "mpi", "mpi_portable_platform"]],
+    'dirs': [],
+}
+
 moduleclass = 'mpi'
diff --git a/Overlays/jureca_spr_overlay/u/UCC/UCC-default-GCCcore-11.3.0.eb b/Overlays/jureca_spr_overlay/u/UCC/UCC-default-GCCcore-11.3.0.eb
index 6a90ac037..a508309e8 100644
--- a/Overlays/jureca_spr_overlay/u/UCC/UCC-default-GCCcore-11.3.0.eb
+++ b/Overlays/jureca_spr_overlay/u/UCC/UCC-default-GCCcore-11.3.0.eb
@@ -7,7 +7,7 @@ easyblock = 'ConfigureMake'
 
 name = 'UCC'
 version = 'default'
-local_realversion = '1.1.0-rc1'
+local_realversion = '1.2.0-rc1'
 
 homepage = 'https://www.openucx.org/'
 description = f"""UCC (Unified Collective Communication) is a collective
@@ -23,7 +23,7 @@ toolchainopts = {'pic': True}
 source_urls = ['https://github.com/openucx/ucc/archive/refs/tags']
 sources = [f'v{local_realversion}.tar.gz']
 checksums = [
-    '4af76d706a788af081c4a6ce566b6d4e33d75629ce9a8a7b8eec1760eff13168',  # v1.1.0-rc1.tar.gz
+    {'v1.2.0-rc1.tar.gz': 'ae6384eecec5054e2c5e960dfc03c083f5f98afaed17276a306c6fe27db4354b'},
 ]
 
 builddependencies = [
-- 
GitLab