Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
easybuild-repository-public-release
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
DEEP-SEA
easybuild-repository-public-release
Commits
86984f71
Commit
86984f71
authored
1 year ago
by
George Katevenis
Browse files
Options
Downloads
Patches
Plain Diff
Add easyconfig for XHC, bundled with OpenMPI v5 (pre-release)
parent
c5d65e8e
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
Golden_Repo/o/OpenMPI/OpenMPI-XHC-5.0.0rc7-1.2-GCC-11.3.0.eb
+123
-0
123 additions, 0 deletions
Golden_Repo/o/OpenMPI/OpenMPI-XHC-5.0.0rc7-1.2-GCC-11.3.0.eb
Golden_Repo/o/OpenMPI/OpenMPI-XHC-fixes.patch
+283
-0
283 additions, 0 deletions
Golden_Repo/o/OpenMPI/OpenMPI-XHC-fixes.patch
with
406 additions
and
0 deletions
Golden_Repo/o/OpenMPI/OpenMPI-XHC-5.0.0rc7-1.2-GCC-11.3.0.eb
0 → 100644
+
123
−
0
View file @
86984f71
# If you get an oversubscription error during testing, set
# PRTE_MCA_rmaps_default_mapping_policy=:OVERSUBSCRIBE.
# (different from OpenMPI v4's OMPI_MCA_rmaps_base_oversubscribe=1)
# TODO: How to set an env var in an easyconfig?
easyblock = 'ConfigureMake'
# ------------------------------
local_xhc_version = '1.2'
local_xhc_sources = '/p/project/deepsea/wp5/xhc/source/xhc-v' + local_xhc_version + '.tar.gz'
local_xhc_checksums = 'e184e86aa90a092f53c878fdcbfe8699a0b125bdca236b434dbdbae6253f02e8'
local_ompi_version = '5.0.0rc7'
# ------------------------------
name = 'OpenMPI-XHC'
version = '%s-%s' % (local_ompi_version, local_xhc_version)
homepage = 'https://www.open-mpi.org/'
description = """The Open MPI Project is an open source MPI-3 implementation."""
toolchain = {'name': 'GCC', 'version': '11.3.0'}
toolchainopts = {'pic': True}
sources = [
# Using git clone instead of the release URL; git submodules will be needed
{
'filename': 'ompi-%s.tar.gz' % local_ompi_version,
'git_config': {
'url': 'https://github.com/open-mpi',
'repo_name': 'ompi',
'tag': 'v%s' % local_ompi_version,
'keep_git_dir': True,
},
},
{
'filename': local_xhc_sources,
'extract_cmd': 'tar -xzvf %s && mv xhc-* ./ompi/ompi/mca/coll/xhc'
}
]
patches = ['OpenMPI-XHC-fixes.patch']
checksums = [
None,
local_xhc_checksums,
'7b3e9454aa7ac28569a00f74e543c79ded0305c68502238efb6f42a83ce53a8e'
]
osdependencies = [
# needed for --with-verbs
('libibverbs-dev', 'libibverbs-devel', 'rdma-core-devel'),
]
builddependencies = [
('Autotools', '20220317'),
('pkg-config', '0.29.2'),
('Perl', '5.34.1'),
('Pandoc', '2.19.2', '', SYSTEM),
('flex', '2.6.4'),
]
dependencies = [
('zlib', '1.2.12'),
# ('hwloc', '2.5.0'), # internal
('UCX', 'default'),
('CUDA', '11.7', '', SYSTEM),
# ('libevent', '2.1.12'), # internal
# ('PMIx', '4.1.0'), # internal
]
# Developer checkout is necessary for XHC, and these are necessary for developer checkout
preconfigopts = 'git submodule update --init --recursive && '
preconfigopts += '(cd 3rd-party/openpmix && git am ../../pmix_fix_flex.patch) && '
preconfigopts += './autogen.pl && '
configopts = '--enable-shared '
configopts += '--with-hwloc=internal '
configopts += '--with-ucx=$EBROOTUCX --with-ucx-libdir=$EBROOTUCX/lib '
# OMPI has trouble with ambiguity between lib and lib64 when detecting UCX's libdir
configopts += '--with-libevent=internal '
configopts += '--with-pmix=internal '
configopts += '--with-slurm '
configopts += '--without-psm2 '
configopts += '--disable-oshmem '
configopts += '--with-cuda=$EBROOTCUDA '
configopts += '--with-xpmem=/p/project/deepsea/wp5/xpmem '
# libportals is available on deepv but not on cluster nodes.
configopts += '--without-portals4 '
# -------------------------------
# No IME or GPFS in DEEP
# configopts += '--with-ime=/opt/ddn/ime '
# configopts += '--with-gpfs '
# disable MPI1 compatibility for now, see what breaks...
# configopts = '--enable-mpi1-compatibility '
# to enable SLURM integration (site-specific)
# configopts += '--with-slurm --with-pmi=/usr/include/slurm --with-pmi-libdir=/usr'
# -------------------------------
# Trigger autofs mount of these filesystems, so that Open MPI's imperfect
# opal_path_nfs test won't fail (https://github.com/open-mpi/ompi/issues/10152)
# pretestcmds/postbuildcmds = ['stat /direct/Software /p/{arch,usersoftware,fastdata}']
skipsteps = ['test'] # not sure how to do the above though!
local_libs = ["mpi_mpifh", "mpi", "open-pal"]
sanity_check_paths = {
'files': ["bin/%s" % local_binfile for local_binfile in ["ompi_info", "opal_wrapper"]] +
["lib/lib%s.%s" % (local_libfile, SHLIB_EXT) for local_libfile in local_libs] +
["include/%s.h" % x for x in ["mpi-ext", "mpif-config",
"mpif", "mpi", "mpi_portable_platform"]],
'dirs': [],
}
moduleclass = 'mpi'
This diff is collapsed.
Click to expand it.
Golden_Repo/o/OpenMPI/OpenMPI-XHC-fixes.patch
0 → 100644
+
283
−
0
View file @
86984f71
From 848096fcec8956eaf6982ef4021e0ede72e6186e Mon Sep 17 00:00:00 2001
From: Joseph Schuchart <schuchart@icl.utk.edu>
Date: Wed, 16 Mar 2022 09:33:21 -0400
Subject: [PATCH 1/5] smsc/xpmem: Fix bound alignment
The upper bound of the mapped region must include the last byte of
the range and not reach past the aligned range.
Signed-off-by: Joseph Schuchart <schuchart@icl.utk.edu>
---
opal/mca/smsc/xpmem/smsc_xpmem_module.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/opal/mca/smsc/xpmem/smsc_xpmem_module.c b/opal/mca/smsc/xpmem/smsc_xpmem_module.c
index d2954c1e31..80a0729649 100644
--- a/opal/mca/smsc/xpmem/smsc_xpmem_module.c
+++ b/opal/mca/smsc/xpmem/smsc_xpmem_module.c
@@ -116,7 +116,7 @@
void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t fla
int rc;
base = OPAL_DOWN_ALIGN((uintptr_t) remote_ptr, attach_align, uintptr_t);
- bound = OPAL_ALIGN((uintptr_t) remote_ptr + size - 1, attach_align, uintptr_t) + 1;
+ bound = OPAL_ALIGN((uintptr_t) remote_ptr + size, attach_align, uintptr_t);
if (OPAL_UNLIKELY(bound > xpmem_endpoint->address_max)) {
bound = xpmem_endpoint->address_max;
}
--
2.31.1
From 61e00ee395f66963c069d3e28bea788d57360ade Mon Sep 17 00:00:00 2001
From: Joseph Schuchart <schuchart@icl.utk.edu>
Date: Wed, 16 Mar 2022 09:36:37 -0400
Subject: [PATCH 2/5] smsc/xpmem: retry with page upper bound if aligned range
cannot be mapped
The aligned range computed in mca_smsc_xpmem_map_peer_region may
reach past the end of the stack, which may cause the mapping to fail.
Retrying with an actual page as upper bound has a better chance to succeed.
Signed-off-by: Joseph Schuchart <schuchart@icl.utk.edu>
---
opal/mca/smsc/xpmem/smsc_xpmem_module.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/opal/mca/smsc/xpmem/smsc_xpmem_module.c b/opal/mca/smsc/xpmem/smsc_xpmem_module.c
index 80a0729649..6a3444a35d 100644
--- a/opal/mca/smsc/xpmem/smsc_xpmem_module.c
+++ b/opal/mca/smsc/xpmem/smsc_xpmem_module.c
@@ -23,6 +23,7 @@
#include "opal/mca/smsc/base/base.h"
#include "opal/mca/smsc/xpmem/smsc_xpmem_internal.h"
#include "opal/util/minmax.h"
+#include "opal/util/sys_limits.h"
OBJ_CLASS_INSTANCE(mca_smsc_xpmem_endpoint_t, opal_object_t, NULL, NULL);
@@ -157,8 +158,14 @@
void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t fla
reg->rcache_context = xpmem_attach(xpmem_addr, bound - base, NULL);
if (OPAL_UNLIKELY((void *) -1 == reg->rcache_context)) {
- OBJ_RELEASE(reg);
- return NULL;
+ /* retry with the page as upper bound */
+ bound = OPAL_ALIGN((uintptr_t) remote_ptr + size, opal_getpagesize(), uintptr_t);
+ reg->bound = (unsigned char *) bound;
+ reg->rcache_context = xpmem_attach(xpmem_addr, bound - base, NULL);
+ if (OPAL_UNLIKELY((void *) -1 == reg->rcache_context)) {
+ OBJ_RELEASE(reg);
+ return NULL;
+ }
}
opal_memchecker_base_mem_defined(reg->rcache_context, bound - base);
@@ -307,5 +314,5 @@
mca_smsc_xpmem_module_t mca_smsc_xpmem_module = {
.copy_from = mca_smsc_xpmem_copy_from,
.map_peer_region = mca_smsc_xpmem_map_peer_region,
.unmap_peer_region = mca_smsc_xpmem_unmap_peer_region,
- },
+ },
};
--
2.31.1
From 553bf8adf30d28da53b7d7462468e3ba4e0146e3 Mon Sep 17 00:00:00 2001
From: cc-riscv64 <cc-riscv64>
Date: Thu, 28 Apr 2022 16:42:18 +0000
Subject: [PATCH 3/5] Fix mpi_comm_dup_with_info
---
ompi/communicator/comm.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c
index 8f9c95ade1..98ed989f28 100644
--- a/ompi/communicator/comm.c
+++ b/ompi/communicator/comm.c
@@ -963,6 +963,7 @@
int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key,
ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_LAZY_BARRIER);
ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_ACTIVE_POLL);
if (info) {
+ opal_info_dup(info, &newcomp->super.s_info);
opal_infosubscribe_change_info(&newcomp->super, info);
}
@@ -1068,6 +1069,7 @@
int ompi_comm_dup_with_info ( ompi_communicator_t * comm, opal_info_t *info, omp
ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_LAZY_BARRIER);
ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_ACTIVE_POLL);
if (info) {
+ opal_info_dup(info, &newcomp->super.s_info);
opal_infosubscribe_change_info(&newcomp->super, info);
}
--
2.31.1
From d5ca5c9baf308fc855563edf0b61d07eae8e3420 Mon Sep 17 00:00:00 2001
From: George Katevenis <gkatev@ics.forth.gr>
Date: Thu, 6 Oct 2022 14:35:52 +0300
Subject: [PATCH 4/5] Initialize opal/smsc outside of btl/sm, to enable its use
without it
Signed-off-by: George Katevenis <gkatev@ics.forth.gr>
---
ompi/instance/instance.c | 4 ++++
opal/mca/btl/sm/btl_sm_component.c | 9 ++++-----
2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/ompi/instance/instance.c b/ompi/instance/instance.c
index 03dad6faeb..a713a5617d 100644
--- a/ompi/instance/instance.c
+++ b/ompi/instance/instance.c
@@ -480,6 +480,10 @@
static int ompi_mpi_instance_init_common (void)
/* Select which MPI components to use */
+ if (OPAL_SUCCESS != (ret = mca_smsc_base_select())) {
+ return ompi_instance_print_error ("mca_smsc_base_select() failed", ret);
+ }
+
if (OMPI_SUCCESS != (ret = mca_pml_base_select (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
return ompi_instance_print_error ("mca_pml_base_select() failed", ret);
}
diff --git a/opal/mca/btl/sm/btl_sm_component.c b/opal/mca/btl/sm/btl_sm_component.c
index d3b6bfb69d..de865f9fb4 100644
--- a/opal/mca/btl/sm/btl_sm_component.c
+++ b/opal/mca/btl/sm/btl_sm_component.c
@@ -40,7 +40,6 @@
#include "opal/mca/btl/sm/btl_sm_fbox.h"
#include "opal/mca/btl/sm/btl_sm_fifo.h"
#include "opal/mca/btl/sm/btl_sm_frag.h"
-#include "opal/mca/smsc/base/base.h"
#include "opal/mca/smsc/smsc.h"
#ifdef HAVE_SYS_STAT_H
@@ -332,8 +331,8 @@
mca_btl_sm_component_init(int *num_btls, bool enable_progress_threads, bool enab
/* no fast boxes allocated initially */
component->num_fbox_in_endpoints = 0;
- rc = mca_smsc_base_select();
- if (OPAL_SUCCESS == rc) {
+ bool have_smsc = (NULL != mca_smsc);
+ if (have_smsc) {
mca_btl_sm.super.btl_flags |= MCA_BTL_FLAGS_RDMA;
mca_btl_sm.super.btl_get = mca_btl_sm_get;
mca_btl_sm.super.btl_put = mca_btl_sm_put;
@@ -355,11 +354,11 @@
mca_btl_sm_component_init(int *num_btls, bool enable_progress_threads, bool enab
} else {
BTL_ERROR(("single-copy component requires registration but could not provide the "
"registration handle size"));
- rc = (int) handle_size;
+ have_smsc = false;
}
}
}
- if (OPAL_SUCCESS != rc) {
+ if (!have_smsc) {
mca_btl_sm.super.btl_flags &= ~MCA_BTL_FLAGS_RDMA;
mca_btl_sm.super.btl_get = NULL;
mca_btl_sm.super.btl_put = NULL;
--
2.31.1
From 77081cff10f5a3b04052f34e6e5c89fb64384f70 Mon Sep 17 00:00:00 2001
From: George Katevenis <gkatev@ics.forth.gr>
Date: Fri, 19 Jan 2024 13:58:51 +0200
Subject: [PATCH 5/5] Add patch to fix flex in internal openpmix (openpmix/#2606)
---
pmix_fix_flex.patch | 78 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 78 insertions(+)
create mode 100644 pmix_fix_flex.patch
diff --git a/pmix_fix_flex.patch b/pmix_fix_flex.patch
new file mode 100644
index 0000000000..ded91bf4c1
--- /dev/null
+++ b/pmix_fix_flex.patch
@@ -0,0 +1,78 @@
+From 1286709db150ea2540f8a1d20f286a858c7a07df Mon Sep 17 00:00:00 2001
+From: Ralph Castain <rhc@pmix.org>
+Date: Tue, 24 May 2022 19:05:00 -0700
+Subject: [PATCH] Require flex only when keyval_lex.c is not provided
+
+We currently require flex whenever we are in a Git clone, but that
+really isn't the requirement. We need flex whenever the flex output
+files are not present - otherwise, you can build just fine. So open
+things up a bit by tying the flex requirement to the actual one
+(i.e., that the flex output file exist).
+
+Signed-off-by: Ralph Castain <rhc@pmix.org>
+---
+ config/pmix.m4 | 19 -------------------
+ configure.ac | 20 +++++++++++++++++++-
+ 2 files changed, 19 insertions(+), 20 deletions(-)
+
+diff --git a/config/pmix.m4 b/config/pmix.m4
+index 9c5f83df30..c870d90a9c 100644
+--- a/config/pmix.m4
++++ b/config/pmix.m4
+@@ -1026,25 +1026,6 @@ AC_DEFUN([PMIX_DEFINE_ARGS],[
+ AC_DEFINE_UNQUOTED(PMIX_ENABLE_DLOPEN_SUPPORT, $PMIX_ENABLE_DLOPEN_SUPPORT,
+ [Whether we want to enable dlopen support])
+
+-#
+-# Is this a developer copy?
+-#
+-
+-if test -e $PMIX_TOP_SRCDIR/.git; then
+- PMIX_DEVEL=1
+- # check for Flex
+- AC_PROG_LEX(yywrap)
+- if test "x$LEX" != xflex; then
+- AC_MSG_WARN([PMIx requires Flex to build from non-tarball sources,])
+- AC_MSG_WARN([but Flex was not found. Please install Flex into])
+- AC_MSG_WARN([your path and try again])
+- AC_MSG_ERROR([Cannot continue])
+- fi
+-else
+- PMIX_DEVEL=0
+-fi
+-
+-
+ #
+ # Developer picky compiler options
+ #
+diff --git a/configure.ac b/configure.ac
+index e0f207a2d0..7d3fe1de4a 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -209,7 +209,25 @@ PMIX_SETUP_WRAPPER_INIT
+ # This did not exist pre AM 1.11.x (where x is somewhere >0 and <3),
+ # but it is necessary in AM 1.12.x.
+ m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
+-AC_PROG_LEX(yywrap)
++
++#
++# Is this a developer copy?
++#
++
++if test -e $PMIX_TOP_SRCDIR/.git; then
++ PMIX_DEVEL=1
++else
++ PMIX_DEVEL=0
++fi
++# check for Flex
++AC_PROG_LEX(noyywrap)
++if test "x$LEX" != xflex && test ! -e $PMIX_TOP_SRCDIR/util/keyval/keyval_lex.c; then
++ AC_MSG_WARN([PMIx requires Flex to build from sources that were not])
++ AC_MSG_WARN([fully pre-processed (e.g., an official release tarball),])
++ AC_MSG_WARN([but Flex was not found. Please install Flex into])
++ AC_MSG_WARN([your path and try again])
++ AC_MSG_ERROR([Cannot continue])
++fi
+
+ ############################################################################
+ # Configuration options
--
2.43.0
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment