From cfcf2171a649cb9df9356ca145f89ef809602102 Mon Sep 17 00:00:00 2001 From: Damian Alvarez <swmanage@jrlogin12.jureca> Date: Wed, 6 Oct 2021 14:53:21 +0200 Subject: [PATCH] Tweaks to the hooks and MNS to support BullMPI By mistake in this commit, but necessary anyway: Added UD modules for psmpi --- Custom_Hooks/eb_hooks.py | 14 +++---- .../flexible_custom_hierarchical_mns.py | 10 ++++- .../psmpi-settings-5.4-CUDA-UD.eb | 26 +++++++++++++ .../psmpi-settings-5.4-CUDA-low-latency-UD.eb | 39 +++++++++++++++++++ .../psmpi-settings-5.4-UCX-UD.eb | 24 ++++++++++++ 5 files changed, 104 insertions(+), 9 deletions(-) create mode 100644 Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-UD.eb create mode 100644 Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-low-latency-UD.eb create mode 100644 Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-UCX-UD.eb diff --git a/Custom_Hooks/eb_hooks.py b/Custom_Hooks/eb_hooks.py index c9b8c5843..f28dc2a4b 100644 --- a/Custom_Hooks/eb_hooks.py +++ b/Custom_Hooks/eb_hooks.py @@ -44,12 +44,12 @@ SUPPORTED_TOOLCHAIN_FAMILIES = ( + SUPPORTED_TOPLEVEL_TOOLCHAIN_FAMILIES ) VETOED_INSTALLATIONS = { - 'juwelsbooster': ['impi', 'impi-settings'], - 'juwels': [''], + 'juwelsbooster': ['impi', 'impi-settings', 'BullMPI', 'BullMPI-settings'], + 'juwels': ['BullMPI', 'BullMPI-settings'], 'jurecadc': [''], - 'jurecabooster': ['OpenMPI', 'OpenMPI-settings', 'CUDA', 'nvidia-driver', 'UCX', 'NVHPC'], - 'jusuf': ['impi', 'impi-settings'], - 'hdfml': [''], + 'jurecabooster': ['OpenMPI', 'OpenMPI-settings', 'CUDA', 'nvidia-driver', 'UCX', 'NVHPC', 'BullMPI', 'BullMPI-settings'], + 'jusuf': ['impi', 'impi-settings', 'BullMPI', 'BullMPI-settings'], + 'hdfml': ['BullMPI', 'BullMPI-settings'], } common_site_contact = 'Support <sc@fz-juelich.de>' @@ -201,7 +201,7 @@ def parse_hook(ec, *args, **kwargs): # Update the dict ec_dict = ec.asdict() # MPIs are a family (in the Lmod sense) and require to load mpi-settings - if ec.name in SUPPORTED_MPIS: + if ec.name in SUPPORTED_MPIS and '/p/software' in install_path().lower(): key = "modluafooter" value = ''' if not ( isloaded("mpi-settings") ) then @@ -260,7 +260,7 @@ family("mpi") # not sure of a fool-proof way to do this, let's just try a heuristic site_contacts = None # Non-user installation - if '/p/software' in install_path().lower() or '/gpfs/software' in install_path().lower(): + if '/p/software' in install_path().lower(): if 'swmanage' in os.getenv('USER'): site_contacts = common_site_contact else: diff --git a/Custom_MNS/flexible_custom_hierarchical_mns.py b/Custom_MNS/flexible_custom_hierarchical_mns.py index e35a4c857..bea574645 100644 --- a/Custom_MNS/flexible_custom_hierarchical_mns.py +++ b/Custom_MNS/flexible_custom_hierarchical_mns.py @@ -51,10 +51,11 @@ mpi_relevant_versions = { 'psmpi': 2, 'MVAPICH2': 2, 'OpenMPI': 2, + 'BullMPI': 2, } # MPIs with settings modules -mpi_with_settings = ['psmpi', 'impi', 'MVAPICH2', 'OpenMPI'] +mpi_with_settings = ['psmpi', 'impi', 'OpenMPI', 'BullMPI'] class FlexibleCustomHierarchicalMNS(HierarchicalMNS): """Class implementing an example hierarchical module naming scheme.""" @@ -255,7 +256,12 @@ class FlexibleCustomHierarchicalMNS(HierarchicalMNS): else: tc_comp_name, tc_comp_ver = self._find_relevant_compiler_info(tc_comp_info) mpi_name, mpi_ver = self._find_relevant_mpi_info(ec) - paths.append(os.path.join(MPI, tc_comp_name, tc_comp_ver, mpi_name, mpi_ver)) + # Hack the module path extension, so BullMPI actually reuses the stack from OpenMPI + # instead of building everything on top unnecessarily + if mpi_name in 'BullMPI': + paths.append(os.path.join(MPI, tc_comp_name, tc_comp_ver, 'OpenMPI', mpi_ver)) + else: + paths.append(os.path.join(MPI, tc_comp_name, tc_comp_ver, mpi_name, mpi_ver)) if ec['name'] in mpi_with_settings: paths.append(os.path.join(MPI_SETTINGS, mpi_name, mpi_ver)) diff --git a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-UD.eb b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-UD.eb new file mode 100644 index 000000000..6ffafc830 --- /dev/null +++ b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-UD.eb @@ -0,0 +1,26 @@ +easyblock = 'SystemBundle' + +name = 'psmpi-settings' +version = '5.4' +versionsuffix = 'CUDA-UD' + +homepage = '' +description = 'This is a module to load the default ParaStationMPI configuration' + +site_contacts = 'd.alvarez@fz-juelich.de' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'PSP_CUDA': '1', + 'PSP_SHM': '0', + 'PSP_UCP': '1', + 'PSP_HARD_ABORT': '1', + 'UCX_TLS': 'ud_x,cuda_ipc,gdr_copy,self,sm,cuda_copy', + 'UCX_MEMTYPE_CACHE': 'n', +} + +moduleclass = 'system' diff --git a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-low-latency-UD.eb b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-low-latency-UD.eb new file mode 100644 index 000000000..dd2d90c6b --- /dev/null +++ b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-low-latency-UD.eb @@ -0,0 +1,39 @@ +easyblock = 'SystemBundle' + +name = 'psmpi-settings' +version = '5.4' +versionsuffix = 'CUDA-low-latency-UD' + +homepage = '' +description = '''This is a module to load the default ParaStationMPI configuration + +This module is otherwise equivalent to mpi-settings/CUDA, but enables UCX_MEMTYPE_CACHE. Please read the URL below to +understand if this is something you can use: + +http://openucx.github.io/ucx/faq.html#7-what-are-the-current-limitations-of-using-gpu-memory +''' + +modloadmsg = ''' +This module is otherwise equivalent to mpi-settings/CUDA, but enables UCX_MEMTYPE_CACHE. Please read the URL below to +understand if this is something you can use: + +http://openucx.github.io/ucx/faq.html#7-what-are-the-current-limitations-of-using-gpu-memory +''' + +site_contacts = 'd.alvarez@fz-juelich.de' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'PSP_CUDA': '1', + 'PSP_SHM': '0', + 'PSP_UCP': '1', + 'PSP_HARD_ABORT': '1', + 'UCX_TLS': 'ud_x,cuda_ipc,gdr_copy,self,sm,cuda_copy', + 'UCX_MEMTYPE_CACHE': 'y', +} + +moduleclass = 'system' diff --git a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-UCX-UD.eb b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-UCX-UD.eb new file mode 100644 index 000000000..4a568a19b --- /dev/null +++ b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-UCX-UD.eb @@ -0,0 +1,24 @@ +easyblock = 'SystemBundle' + +name = 'psmpi-settings' +version = '5.4' +versionsuffix = 'UCX-UD' + +homepage = '' +description = 'This is a module to load the ParaStationMPI configuration. It enables UCX with UD as transport' + +site_contacts = 'd.alvarez@fz-juelich.de' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'PSP_OPENIB': '0', + 'PSP_UCP': '1', + 'PSP_HARD_ABORT': '1', + 'UCX_TLS': 'ud_x,self,sm', +} + +moduleclass = 'system' -- GitLab