From b428644542434dbb606ea861230cd73373845d91 Mon Sep 17 00:00:00 2001 From: Damian Alvarez <swmanage@hdfmll01.hdfml> Date: Wed, 3 Nov 2021 17:32:20 +0100 Subject: [PATCH] To support the separation of the UCX settings from the MPI settings, which provides a better view of what is tweaked, and allows to reuse UCX settings for all MPIs --- Custom_Hooks/eb_hooks.py | 17 ++++++ .../flexible_custom_hierarchical_mns.py | 10 ++++ ...21-UCX-UD.eb => impi-settings-2021-UCX.eb} | 6 +- .../impi-settings-2021-large-job-hybrid.eb | 25 -------- .../impi-settings-2021-large-job-mpi.eb | 25 -------- .../impi-settings/impi-settings-2021-plain.eb | 3 +- .../OpenMPI-settings-4.1-CUDA-low-latency.eb | 57 ------------------- .../OpenMPI-settings-4.1-UCX-RC.eb | 44 -------------- ...-CUDA.eb => OpenMPI-settings-4.1-plain.eb} | 6 +- .../psmpi-settings-5.4-CUDA-low-latency-UD.eb | 39 ------------- ...-CUDA-UD.eb => psmpi-settings-5.4-CUDA.eb} | 9 ++- ....4-UCX-UD.eb => psmpi-settings-5.4-UCX.eb} | 5 +- .../psmpi-settings-5.4-plain.eb | 21 +++++++ .../u/UCX-settings/UCX-settings-DC-CUDA.eb | 20 +++++++ Golden_Repo/u/UCX-settings/UCX-settings-DC.eb | 20 +++++++ .../u/UCX-settings/UCX-settings-RC-CUDA.eb | 20 +++++++ Golden_Repo/u/UCX-settings/UCX-settings-RC.eb | 20 +++++++ .../u/UCX-settings/UCX-settings-UD-CUDA.eb | 20 +++++++ Golden_Repo/u/UCX-settings/UCX-settings-UD.eb | 20 +++++++ .../BullMPI-settings-4.1-CUDA.eb | 2 - 20 files changed, 180 insertions(+), 209 deletions(-) rename Golden_Repo/i/impi-settings/{impi-settings-2021-UCX-UD.eb => impi-settings-2021-UCX.eb} (72%) delete mode 100644 Golden_Repo/i/impi-settings/impi-settings-2021-large-job-hybrid.eb delete mode 100644 Golden_Repo/i/impi-settings/impi-settings-2021-large-job-mpi.eb delete mode 100644 Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA-low-latency.eb delete mode 100644 Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCX-RC.eb rename Golden_Repo/o/OpenMPI-settings/{OpenMPI-settings-4.1-CUDA.eb => OpenMPI-settings-4.1-plain.eb} (86%) delete mode 100644 Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-low-latency-UD.eb rename Golden_Repo/p/psmpi-settings/{psmpi-settings-5.4-CUDA-UD.eb => psmpi-settings-5.4-CUDA.eb} (61%) rename Golden_Repo/p/psmpi-settings/{psmpi-settings-5.4-UCX-UD.eb => psmpi-settings-5.4-UCX.eb} (77%) create mode 100644 Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-plain.eb create mode 100644 Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb create mode 100644 Golden_Repo/u/UCX-settings/UCX-settings-DC.eb create mode 100644 Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb create mode 100644 Golden_Repo/u/UCX-settings/UCX-settings-RC.eb create mode 100644 Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb create mode 100644 Golden_Repo/u/UCX-settings/UCX-settings-UD.eb diff --git a/Custom_Hooks/eb_hooks.py b/Custom_Hooks/eb_hooks.py index 1aa463c96..b17033443 100644 --- a/Custom_Hooks/eb_hooks.py +++ b/Custom_Hooks/eb_hooks.py @@ -228,6 +228,23 @@ family("mpi") ec.log.info( "[parse hook] Injecting Lmod mpi family and mpi-settings loading") + # UCX require to load UCX-settings + if ec.name in 'UCX' and '/p/software' in install_path().lower(): + key = "modluafooter" + value = ''' +if not ( isloaded("UCX-settings") ) then + load("UCX-settings") +end + ''' + if key in ec_dict: + if not value in ec_dict[key]: + ec[key] = "\n".join([ec[key], value]) + else: + ec[key] = value + ec.log.info( + "[parse hook] Injecting UCX-settings loading") + + # Check if we need to use 'modaltsoftname' if ec.name in REQUIRE_MODALTSOFTNAME: key = "modaltsoftname" diff --git a/Custom_MNS/flexible_custom_hierarchical_mns.py b/Custom_MNS/flexible_custom_hierarchical_mns.py index bea574645..0259dc2fe 100644 --- a/Custom_MNS/flexible_custom_hierarchical_mns.py +++ b/Custom_MNS/flexible_custom_hierarchical_mns.py @@ -19,6 +19,7 @@ CORE = 'Core' COMPILER = 'Compiler' MPI = 'MPI' MPI_SETTINGS = 'MPI_settings' +COMM_SETTINGS = 'comm_settings' MODULECLASS_COMPILER = 'compiler' MODULECLASS_MPI = 'mpi' @@ -57,6 +58,9 @@ mpi_relevant_versions = { # MPIs with settings modules mpi_with_settings = ['psmpi', 'impi', 'OpenMPI', 'BullMPI'] +# Communication packages with settings modules +comm_pkg_with_settings = ['UCX', 'NCCL'] + class FlexibleCustomHierarchicalMNS(HierarchicalMNS): """Class implementing an example hierarchical module naming scheme.""" def is_short_modname_for(self, short_modname, name): @@ -170,6 +174,9 @@ class FlexibleCustomHierarchicalMNS(HierarchicalMNS): stripped_name = re.sub('-settings$', '', ec['name']) if stripped_name in mpi_with_settings: subdir = os.path.join(MPI_SETTINGS, stripped_name, ec['version']) + # or a module is for a communicaiton packages with settings + elif stripped_name in comm_pkg_with_settings and '-settings' in ec['name']: + subdir = os.path.join(COMM_SETTINGS, stripped_name) else: tc_comp_name, tc_comp_ver = self._find_relevant_compiler_info(tc_comp_info) tc_mpi = det_toolchain_mpi(ec) @@ -266,4 +273,7 @@ class FlexibleCustomHierarchicalMNS(HierarchicalMNS): if ec['name'] in mpi_with_settings: paths.append(os.path.join(MPI_SETTINGS, mpi_name, mpi_ver)) + elif ec['name'] in ['UCX', 'NCCL']: + paths.append(os.path.join(COMM_SETTINGS, ec['name'])) + return paths diff --git a/Golden_Repo/i/impi-settings/impi-settings-2021-UCX-UD.eb b/Golden_Repo/i/impi-settings/impi-settings-2021-UCX.eb similarity index 72% rename from Golden_Repo/i/impi-settings/impi-settings-2021-UCX-UD.eb rename to Golden_Repo/i/impi-settings/impi-settings-2021-UCX.eb index 0f68cff1f..128447e47 100644 --- a/Golden_Repo/i/impi-settings/impi-settings-2021-UCX-UD.eb +++ b/Golden_Repo/i/impi-settings/impi-settings-2021-UCX.eb @@ -2,10 +2,10 @@ easyblock = 'SystemBundle' name = 'impi-settings' version = '2021' -versionsuffix = 'UCX-UD' +versionsuffix = 'UCX' homepage = '' -description = """This is a module to load the IntelMPI configuration with UCX and UD as TL""" +description = 'This is a module to load the IntelMPI configuration with UCX' site_contacts = 'd.alvarez@fz-juelich.de' @@ -16,7 +16,7 @@ source_urls = [] sources = [] modextravars = { - 'UCX_TLS': 'ud_x,sm,self', + 'FI_PROVIDER': 'mlx', 'I_MPI_PMI_VALUE_LENGTH_MAX': '900', # Needed for PSM and harmless for InfiniBand. For ParaStation it is set on the pscom module 'HFI_NO_CPUAFFINITY': 'YES', diff --git a/Golden_Repo/i/impi-settings/impi-settings-2021-large-job-hybrid.eb b/Golden_Repo/i/impi-settings/impi-settings-2021-large-job-hybrid.eb deleted file mode 100644 index 4c5b2ed2a..000000000 --- a/Golden_Repo/i/impi-settings/impi-settings-2021-large-job-hybrid.eb +++ /dev/null @@ -1,25 +0,0 @@ -easyblock = 'SystemBundle' - -name = 'impi-settings' -version = '2021' -versionsuffix = 'large-job-hybrid' - -homepage = '' -description = """This is a module to load the IntelMPI configuration for large scale hybrid jobs""" - -site_contacts = 'd.alvarez@fz-juelich.de' - -toolchain = SYSTEM - -source_urls = [] - -sources = [] - -modextravars = { - 'UCX_TLS': 'dc_x,sm,self', - 'I_MPI_PMI_VALUE_LENGTH_MAX': '900', - # Needed for PSM and harmless for InfiniBand. For ParaStation it is set on the pscom module - 'HFI_NO_CPUAFFINITY': 'YES', -} - -moduleclass = 'system' diff --git a/Golden_Repo/i/impi-settings/impi-settings-2021-large-job-mpi.eb b/Golden_Repo/i/impi-settings/impi-settings-2021-large-job-mpi.eb deleted file mode 100644 index dbc4f7a2c..000000000 --- a/Golden_Repo/i/impi-settings/impi-settings-2021-large-job-mpi.eb +++ /dev/null @@ -1,25 +0,0 @@ -easyblock = 'SystemBundle' - -name = 'impi-settings' -version = '2021' -versionsuffix = 'large-job-mpi' - -homepage = '' -description = """This is a module to load the IntelMPI configuration for large scale MPI jobs""" - -site_contacts = 'd.alvarez@fz-juelich.de' - -toolchain = SYSTEM - -source_urls = [] - -sources = [] - -modextravars = { - 'UCX_TLS': 'dc_x,sm,self', - 'I_MPI_PMI_VALUE_LENGTH_MAX': '900', - # Needed for PSM and harmless for InfiniBand. For ParaStation it is set on the pscom module - 'HFI_NO_CPUAFFINITY': 'YES', -} - -moduleclass = 'system' diff --git a/Golden_Repo/i/impi-settings/impi-settings-2021-plain.eb b/Golden_Repo/i/impi-settings/impi-settings-2021-plain.eb index ebe18a75d..9bfd18a04 100644 --- a/Golden_Repo/i/impi-settings/impi-settings-2021-plain.eb +++ b/Golden_Repo/i/impi-settings/impi-settings-2021-plain.eb @@ -5,7 +5,7 @@ version = '2021' versionsuffix = 'plain' homepage = '' -description = """This is a module to load the default IntelMPI configuration""" +description = 'This is a module to load the default IntelMPI configuration. It relies on the default order for libfabric' site_contacts = 'd.alvarez@fz-juelich.de' @@ -16,7 +16,6 @@ source_urls = [] sources = [] modextravars = { - 'UCX_TLS': 'dc_x,sm,self', 'I_MPI_PMI_VALUE_LENGTH_MAX': '900', # Needed for PSM and harmless for InfiniBand. For ParaStation it is set on the pscom module 'HFI_NO_CPUAFFINITY': 'YES', diff --git a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA-low-latency.eb b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA-low-latency.eb deleted file mode 100644 index de60398a7..000000000 --- a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA-low-latency.eb +++ /dev/null @@ -1,57 +0,0 @@ -easyblock = 'SystemBundle' - -name = 'OpenMPI-settings' -version = '4.1' -versionsuffix = 'CUDA-low-latency' - -homepage = '' -description = '''This is a module to load the default OpenMPI configuration - -This module is otherwise equivalent to mpi-settings/CUDA, but enables UCX_MEMTYPE_CACHE. Please read the URL below to -understand if this is something you can use: - -http://openucx.github.io/ucx/faq.html#7-what-are-the-current-limitations-of-using-gpu-memory -''' - -modloadmsg = ''' -This module is otherwise equivalent to mpi-settings/CUDA, but enables UCX_MEMTYPE_CACHE. Please read the URL below to -understand if this is something you can use: - -http://openucx.github.io/ucx/faq.html#7-what-are-the-current-limitations-of-using-gpu-memory -''' - -site_contacts = 'd.alvarez@fz-juelich.de' - -toolchain = SYSTEM - -source_urls = [] - -sources = [] -modextravars = { - 'SLURM_MPI_TYPE': 'pspmix', - 'UCX_TLS': 'rc_x,cuda_ipc,gdr_copy,self,sm,cuda_copy', - 'UCX_MEMTYPE_CACHE': 'y', - 'OMPI_MCA_mca_base_component_show_load_errors': '1', - 'OMPI_MCA_mpi_param_check': '1', - 'OMPI_MCA_mpi_show_handle_leaks': '1', - 'OMPI_MCA_mpi_warn_on_fork': '1', - # Disable uct for the time being due to: - # https://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX#running-open-mpi-with-ucx - # Also openib, since it is deprecated and should be substituted by the UCX support in the pml - 'OMPI_MCA_btl': '^uct,openib', - 'OMPI_MCA_btl_openib_allow_ib': '1', - 'OMPI_MCA_bml_r2_show_unreach_errors': '0', - 'OMPI_MCA_coll': '^ml', - 'OMPI_MCA_coll_hcoll_enable': '1', - 'OMPI_MCA_coll_hcoll_np': '0', - 'OMPI_MCA_pml': 'ucx', - 'OMPI_MCA_osc': '^rdma', - 'OMPI_MCA_opal_abort_print_stack': '1', - 'OMPI_MCA_opal_set_max_sys_limits': '1', - 'OMPI_MCA_opal_event_include': 'epoll', - 'OMPI_MCA_btl_openib_warn_default_gid_prefix': '0', - # OMPIO does not seem to work reliably on our system - 'OMPI_MCA_io': 'romio321', -} - -moduleclass = 'system' diff --git a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCX-RC.eb b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCX-RC.eb deleted file mode 100644 index 9cb62ebcb..000000000 --- a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-UCX-RC.eb +++ /dev/null @@ -1,44 +0,0 @@ -easyblock = 'SystemBundle' - -name = 'OpenMPI-settings' -version = '4.1' -versionsuffix = 'UCX-RC' - -homepage = '' -description = """This is a module to load the an OpenMPI configuration for nodes not equipped with GPUs""" - -site_contacts = 'd.alvarez@fz-juelich.de' - -toolchain = SYSTEM - -source_urls = [] - -sources = [] -modextravars = { - 'SLURM_MPI_TYPE': 'pspmix', - 'UCX_TLS': 'rc_x,self,sm', - 'UCX_MEMTYPE_CACHE': 'n', - 'OMPI_MCA_mca_base_component_show_load_errors': '1', - 'OMPI_MCA_mpi_param_check': '1', - 'OMPI_MCA_mpi_show_handle_leaks': '1', - 'OMPI_MCA_mpi_warn_on_fork': '1', - # Disable uct for the time being due to: - # https://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX#running-open-mpi-with-ucx - # Also openib, since it is deprecated and should be substituted by the UCX support in the pml - 'OMPI_MCA_btl': '^uct,openib', - 'OMPI_MCA_btl_openib_allow_ib': '1', - 'OMPI_MCA_bml_r2_show_unreach_errors': '0', - 'OMPI_MCA_coll': '^ml', - 'OMPI_MCA_coll_hcoll_enable': '1', - 'OMPI_MCA_coll_hcoll_np': '0', - 'OMPI_MCA_pml': 'ucx', - 'OMPI_MCA_osc': '^rdma', - 'OMPI_MCA_opal_abort_print_stack': '1', - 'OMPI_MCA_opal_set_max_sys_limits': '1', - 'OMPI_MCA_opal_event_include': 'epoll', - 'OMPI_MCA_btl_openib_warn_default_gid_prefix': '0', - # OMPIO does not seem to work reliably on our system - 'OMPI_MCA_io': 'romio321', -} - -moduleclass = 'system' diff --git a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA.eb b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-plain.eb similarity index 86% rename from Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA.eb rename to Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-plain.eb index 351d51b18..7566c840e 100644 --- a/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-CUDA.eb +++ b/Golden_Repo/o/OpenMPI-settings/OpenMPI-settings-4.1-plain.eb @@ -2,10 +2,10 @@ easyblock = 'SystemBundle' name = 'OpenMPI-settings' version = '4.1' -versionsuffix = 'CUDA' +versionsuffix = 'plain' homepage = '' -description = """This is a module to load the default OpenMPI configuration""" +description = 'This is a module to load the default OpenMPI configuration. It relies on UCX.' site_contacts = 'd.alvarez@fz-juelich.de' @@ -16,8 +16,6 @@ source_urls = [] sources = [] modextravars = { 'SLURM_MPI_TYPE': 'pspmix', - 'UCX_TLS': 'rc_x,cuda_ipc,gdr_copy,self,sm,cuda_copy', - 'UCX_MEMTYPE_CACHE': 'n', 'OMPI_MCA_mca_base_component_show_load_errors': '1', 'OMPI_MCA_mpi_param_check': '1', 'OMPI_MCA_mpi_show_handle_leaks': '1', diff --git a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-low-latency-UD.eb b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-low-latency-UD.eb deleted file mode 100644 index dd2d90c6b..000000000 --- a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-low-latency-UD.eb +++ /dev/null @@ -1,39 +0,0 @@ -easyblock = 'SystemBundle' - -name = 'psmpi-settings' -version = '5.4' -versionsuffix = 'CUDA-low-latency-UD' - -homepage = '' -description = '''This is a module to load the default ParaStationMPI configuration - -This module is otherwise equivalent to mpi-settings/CUDA, but enables UCX_MEMTYPE_CACHE. Please read the URL below to -understand if this is something you can use: - -http://openucx.github.io/ucx/faq.html#7-what-are-the-current-limitations-of-using-gpu-memory -''' - -modloadmsg = ''' -This module is otherwise equivalent to mpi-settings/CUDA, but enables UCX_MEMTYPE_CACHE. Please read the URL below to -understand if this is something you can use: - -http://openucx.github.io/ucx/faq.html#7-what-are-the-current-limitations-of-using-gpu-memory -''' - -site_contacts = 'd.alvarez@fz-juelich.de' - -toolchain = SYSTEM - -source_urls = [] - -sources = [] -modextravars = { - 'PSP_CUDA': '1', - 'PSP_SHM': '0', - 'PSP_UCP': '1', - 'PSP_HARD_ABORT': '1', - 'UCX_TLS': 'ud_x,cuda_ipc,gdr_copy,self,sm,cuda_copy', - 'UCX_MEMTYPE_CACHE': 'y', -} - -moduleclass = 'system' diff --git a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-UD.eb b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA.eb similarity index 61% rename from Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-UD.eb rename to Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA.eb index 6ffafc830..c6f5352f2 100644 --- a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA-UD.eb +++ b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-CUDA.eb @@ -2,10 +2,10 @@ easyblock = 'SystemBundle' name = 'psmpi-settings' version = '5.4' -versionsuffix = 'CUDA-UD' +versionsuffix = 'CUDA' homepage = '' -description = 'This is a module to load the default ParaStationMPI configuration' +description = 'This is a module to load the ParaStationMPI configuration. It enables UCX as a communication library and CUDA-aware features.' site_contacts = 'd.alvarez@fz-juelich.de' @@ -15,12 +15,11 @@ source_urls = [] sources = [] modextravars = { + 'PSP_OPENIB': '0', + 'PSP_UCP': '1', 'PSP_CUDA': '1', 'PSP_SHM': '0', - 'PSP_UCP': '1', 'PSP_HARD_ABORT': '1', - 'UCX_TLS': 'ud_x,cuda_ipc,gdr_copy,self,sm,cuda_copy', - 'UCX_MEMTYPE_CACHE': 'n', } moduleclass = 'system' diff --git a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-UCX-UD.eb b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-UCX.eb similarity index 77% rename from Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-UCX-UD.eb rename to Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-UCX.eb index 4a568a19b..95183c01a 100644 --- a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-UCX-UD.eb +++ b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-UCX.eb @@ -2,10 +2,10 @@ easyblock = 'SystemBundle' name = 'psmpi-settings' version = '5.4' -versionsuffix = 'UCX-UD' +versionsuffix = 'UCX' homepage = '' -description = 'This is a module to load the ParaStationMPI configuration. It enables UCX with UD as transport' +description = 'This is a module to load the ParaStationMPI configuration. It enables UCX as a communication library.' site_contacts = 'd.alvarez@fz-juelich.de' @@ -18,7 +18,6 @@ modextravars = { 'PSP_OPENIB': '0', 'PSP_UCP': '1', 'PSP_HARD_ABORT': '1', - 'UCX_TLS': 'ud_x,self,sm', } moduleclass = 'system' diff --git a/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-plain.eb b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-plain.eb new file mode 100644 index 000000000..20bbab3d8 --- /dev/null +++ b/Golden_Repo/p/psmpi-settings/psmpi-settings-5.4-plain.eb @@ -0,0 +1,21 @@ +easyblock = 'SystemBundle' + +name = 'psmpi-settings' +version = '5.4' +versionsuffix = 'plain' + +homepage = '' +description = 'This is a module to load the ParaStationMPI configuration. It relies on the defaults.' + +site_contacts = 'd.alvarez@fz-juelich.de' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'PSP_HARD_ABORT': '1', +} + +moduleclass = 'system' diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb b/Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb new file mode 100644 index 000000000..fc2aeb05e --- /dev/null +++ b/Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb @@ -0,0 +1,20 @@ +easyblock = 'SystemBundle' + +name = 'UCX-settings' +version = 'DC-CUDA' + +homepage = '' +description = 'This is a module to load the set UCX to use DC as the transport layer, together with the CUWA-aware transports.' + +site_contacts = 'd.alvarez@fz-juelich.de' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'UCX_TLS': 'dc_x,self,sm,cuda_ipc,gdr_copy,cuda_copy', +} + +moduleclass = 'system' diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-DC.eb b/Golden_Repo/u/UCX-settings/UCX-settings-DC.eb new file mode 100644 index 000000000..878cd3d7c --- /dev/null +++ b/Golden_Repo/u/UCX-settings/UCX-settings-DC.eb @@ -0,0 +1,20 @@ +easyblock = 'SystemBundle' + +name = 'UCX-settings' +version = 'DC' + +homepage = '' +description = 'This is a module to load the set UCX to use DC as the transport layer.' + +site_contacts = 'd.alvarez@fz-juelich.de' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'UCX_TLS': 'dc_x,self,sm', +} + +moduleclass = 'system' diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb b/Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb new file mode 100644 index 000000000..9b9011760 --- /dev/null +++ b/Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb @@ -0,0 +1,20 @@ +easyblock = 'SystemBundle' + +name = 'UCX-settings' +version = 'RC-CUDA' + +homepage = '' +description = 'This is a module to load the set UCX to use RC as the transport layer, together with the CUWA-aware transports.' + +site_contacts = 'd.alvarez@fz-juelich.de' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'UCX_TLS': 'rc_x,self,sm,cuda_ipc,gdr_copy,cuda_copy', +} + +moduleclass = 'system' diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-RC.eb b/Golden_Repo/u/UCX-settings/UCX-settings-RC.eb new file mode 100644 index 000000000..150f2f32a --- /dev/null +++ b/Golden_Repo/u/UCX-settings/UCX-settings-RC.eb @@ -0,0 +1,20 @@ +easyblock = 'SystemBundle' + +name = 'UCX-settings' +version = 'RC' + +homepage = '' +description = 'This is a module to load the set UCX to use RC as the transport layer.' + +site_contacts = 'd.alvarez@fz-juelich.de' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'UCX_TLS': 'rc_x,self,sm', +} + +moduleclass = 'system' diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb b/Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb new file mode 100644 index 000000000..9d3d21cc0 --- /dev/null +++ b/Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb @@ -0,0 +1,20 @@ +easyblock = 'SystemBundle' + +name = 'UCX-settings' +version = 'UD-CUDA' + +homepage = '' +description = 'This is a module to load the set UCX to use UD as the transport layer, together with the CUWA-aware transports.' + +site_contacts = 'd.alvarez@fz-juelich.de' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'UCX_TLS': 'ud_x,self,sm,cuda_ipc,gdr_copy,cuda_copy', +} + +moduleclass = 'system' diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-UD.eb b/Golden_Repo/u/UCX-settings/UCX-settings-UD.eb new file mode 100644 index 000000000..2beb8b3fe --- /dev/null +++ b/Golden_Repo/u/UCX-settings/UCX-settings-UD.eb @@ -0,0 +1,20 @@ +easyblock = 'SystemBundle' + +name = 'UCX-settings' +version = 'UD' + +homepage = '' +description = 'This is a module to load the set UCX to use UD as the transport layer.' + +site_contacts = 'd.alvarez@fz-juelich.de' + +toolchain = SYSTEM + +source_urls = [] + +sources = [] +modextravars = { + 'UCX_TLS': 'ud_x,self,sm', +} + +moduleclass = 'system' diff --git a/Overlays/jurecadc_overlay/b/BullMPI-settings/BullMPI-settings-4.1-CUDA.eb b/Overlays/jurecadc_overlay/b/BullMPI-settings/BullMPI-settings-4.1-CUDA.eb index 08c35eea4..115be0f2c 100644 --- a/Overlays/jurecadc_overlay/b/BullMPI-settings/BullMPI-settings-4.1-CUDA.eb +++ b/Overlays/jurecadc_overlay/b/BullMPI-settings/BullMPI-settings-4.1-CUDA.eb @@ -16,8 +16,6 @@ source_urls = [] sources = [] modextravars = { 'SLURM_MPI_TYPE': 'pspmix', - 'UCX_TLS': 'rc_x,cuda_ipc,gdr_copy,self,sm,cuda_copy', - 'UCX_MEMTYPE_CACHE': 'n', 'OMPI_MCA_mca_base_component_show_load_errors': '1', 'OMPI_MCA_mpi_param_check': '1', 'OMPI_MCA_mpi_show_handle_leaks': '1', -- GitLab