From 84d0fd637cb4b3df3d12d60c405ca24f29b7d5bb Mon Sep 17 00:00:00 2001 From: Damian Alvarez <d.alvarez@fz-juelich.de> Date: Tue, 20 Sep 2022 11:29:48 +0200 Subject: [PATCH] Re-structure mpi-settings modules --- .../OpenMPI-settings-4.1-CUDA-UCC.eb | 55 +++++++++++++++++++ .../OpenMPI-settings-4.1-CUDA.eb | 53 ++++++++++++++++++ ...UCC.eb => OpenMPI-settings-4.1-UCX-UCC.eb} | 6 +- ...1-plain.eb => OpenMPI-settings-4.1-UCX.eb} | 2 +- .../psmpi-settings-5.6-plain.eb | 19 ------- 5 files changed, 113 insertions(+), 22 deletions(-) create mode 100644 Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA-UCC.eb create mode 100644 Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA.eb rename Golden_Repo/o/OpenMPI-settings/{OpenMPI-settings-4.1-UCC.eb => OpenMPI-settings-4.1-UCX-UCC.eb} (88%) rename Golden_Repo/o/OpenMPI-settings/{OpenMPI-settings-4.1-plain.eb => OpenMPI-settings-4.1-UCX.eb} (98%) delete mode 100644 Golden_Repo/p/psmpi-settings/psmpi-settings-5.6-plain.eb diff --git a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA-UCC.eb b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA-UCC.eb new file mode 100644 index 000000000..d4ef0f56e --- /dev/null +++ b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA-UCC.eb @@ -0,0 +1,55 @@ +easyblock = 'SystemBundle' + +name = 'OpenMPI-settings' +version = '4.1' +versionsuffix = 'CUDA-UCC' + +homepage = '' +description = '''This module loads the default OpenMPI configuration. It relies on UCX and enables the UCX CUDA +transports. It also rely on UCC for collectives. +''' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'SLURM_MPI_TYPE': 'pspmix', + 'OMPI_MCA_mca_base_component_show_load_errors': '1', + 'OMPI_MCA_mpi_param_check': '1', + 'OMPI_MCA_mpi_show_handle_leaks': '1', + 'OMPI_MCA_mpi_warn_on_fork': '1', + # Disable uct for the time being due to: + # https://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX#running-open-mpi-with-ucx + # Also openib, since it is deprecated and should be substituted by the UCX support in the pml + 'OMPI_MCA_btl': '^uct,openib', + 'OMPI_MCA_btl_openib_allow_ib': '1', + 'OMPI_MCA_bml_r2_show_unreach_errors': '0', + 'OMPI_MCA_coll_ucc_priority': '80', + 'OMPI_MCA_coll_ucc_enable': '1', + 'OMPI_MCA_pml': 'ucx', + 'OMPI_MCA_osc': '^rdma', + 'OMPI_MCA_opal_abort_print_stack': '1', + 'OMPI_MCA_opal_set_max_sys_limits': '1', + 'OMPI_MCA_opal_event_include': 'epoll', + 'OMPI_MCA_btl_openib_warn_default_gid_prefix': '0', + # OMPIO does not seem to work reliably on our system + 'OMPI_MCA_io': 'romio321', +} + +modluafooter = ''' +if mode()=="load" then + if isloaded("UCX-settings/RC") then + try_load("UCX-settings/RC-CUDA") + elseif isloaded("UCX-settings/UD") then + try_load("UCX-settings/UD-CUDA") + elseif isloaded("UCX-settings/DC") then + try_load("UCX-settings/DC-CUDA") + elseif not isloaded("UCX-settings") then + try_load("UCX-settings/RC-CUDA") + end +end +''' + +moduleclass = 'system' diff --git a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA.eb b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA.eb new file mode 100644 index 000000000..65d7ab388 --- /dev/null +++ b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA.eb @@ -0,0 +1,53 @@ +easyblock = 'SystemBundle' + +name = 'OpenMPI-settings' +version = '4.1' +versionsuffix = 'CUDA' + +homepage = '' +description = '''This module loads the default OpenMPI configuration. It relies on UCX and enables the UCX CUDA +transports. +''' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'SLURM_MPI_TYPE': 'pspmix', + 'OMPI_MCA_mca_base_component_show_load_errors': '1', + 'OMPI_MCA_mpi_param_check': '1', + 'OMPI_MCA_mpi_show_handle_leaks': '1', + 'OMPI_MCA_mpi_warn_on_fork': '1', + # Disable uct for the time being due to: + # https://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX#running-open-mpi-with-ucx + # Also openib, since it is deprecated and should be substituted by the UCX support in the pml + 'OMPI_MCA_btl': '^uct,openib', + 'OMPI_MCA_btl_openib_allow_ib': '1', + 'OMPI_MCA_bml_r2_show_unreach_errors': '0', + 'OMPI_MCA_pml': 'ucx', + 'OMPI_MCA_osc': '^rdma', + 'OMPI_MCA_opal_abort_print_stack': '1', + 'OMPI_MCA_opal_set_max_sys_limits': '1', + 'OMPI_MCA_opal_event_include': 'epoll', + 'OMPI_MCA_btl_openib_warn_default_gid_prefix': '0', + # OMPIO does not seem to work reliably on our system + 'OMPI_MCA_io': 'romio321', +} + +modluafooter = ''' +if mode()=="load" then + if isloaded("UCX-settings/RC") then + try_load("UCX-settings/RC-CUDA") + elseif isloaded("UCX-settings/UD") then + try_load("UCX-settings/UD-CUDA") + elseif isloaded("UCX-settings/DC") then + try_load("UCX-settings/DC-CUDA") + elseif not isloaded("UCX-settings") then + try_load("UCX-settings/RC-CUDA") + end +end +''' + +moduleclass = 'system' diff --git a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCC.eb b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCX-UCC.eb similarity index 88% rename from Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCC.eb rename to Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCX-UCC.eb index 1ca737aca..28d20bc68 100644 --- a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCC.eb +++ b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCX-UCC.eb @@ -2,10 +2,12 @@ easyblock = 'SystemBundle' name = 'OpenMPI-settings' version = '4.1' -versionsuffix = 'UCC' +versionsuffix = 'UCX-UCC' homepage = '' -description = 'This module loads the default OpenMPI configuration. It relies on UCX and UCC.' +description = '''This module loads the default OpenMPI configuration. It relies on UCX. It also rely on UCC for +collectives. +''' toolchain = SYSTEM diff --git a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-plain.eb b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCX.eb similarity index 98% rename from Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-plain.eb rename to Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCX.eb index 976a72395..4688f919a 100644 --- a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-plain.eb +++ b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCX.eb @@ -2,7 +2,7 @@ easyblock = 'SystemBundle' name = 'OpenMPI-settings' version = '4.1' -versionsuffix = 'plain' +versionsuffix = 'UCX' homepage = '' description = 'This module loads the default OpenMPI configuration. It relies on UCX.' diff --git a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.6-plain.eb b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.6-plain.eb deleted file mode 100644 index 7586f6ec3..000000000 --- a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.6-plain.eb +++ /dev/null @@ -1,19 +0,0 @@ -easyblock = 'SystemBundle' - -name = 'psmpi-settings' -version = '5.6' -versionsuffix = 'plain' - -homepage = '' -description = 'This module loads the ParaStationMPI configuration. It relies on the defaults.' - -toolchain = SYSTEM - -source_urls = [] - -sources = [] -modextravars = { - 'PSP_HARD_ABORT': '1', -} - -moduleclass = 'system' -- GitLab