From 70cd85e62b83a95d64461af425afafc626fb597d Mon Sep 17 00:00:00 2001 From: Damian Alvarez <swmanage@jwlogin03.juwels> Date: Wed, 30 Aug 2023 09:26:14 +0200 Subject: [PATCH] To make sure that managed buffers also get transferred via NVlink --- Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb | 3 +++ Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb | 3 +++ Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb | 3 +++ 3 files changed, 9 insertions(+) diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb b/Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb index d335c8261..513c564e0 100644 --- a/Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb +++ b/Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb @@ -20,6 +20,9 @@ modextravars = { # It actually has the side effect of using Ethernet and IB ports on JUSUF, which end up saturating the ethernet # fabric and result in a slow down 'UCX_MAX_RNDV_RAILS': '1', + # To make sure that transfers involving buffers allocated with cudaMallocManaged are done by the GPU. Important + # mostly for nodes with multiple GPUs per ndoe + 'UCX_RNDV_FRAG_MEM_TYPE': 'cuda', } moduleclass = 'system' diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb b/Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb index f3d285521..d0be7425e 100644 --- a/Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb +++ b/Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb @@ -20,6 +20,9 @@ modextravars = { # It actually has the side effect of using Ethernet and IB ports on JUSUF, which end up saturating the ethernet # fabric and result in a slow down 'UCX_MAX_RNDV_RAILS': '1', + # To make sure that transfers involving buffers allocated with cudaMallocManaged are done by the GPU. Important + # mostly for nodes with multiple GPUs per ndoe + 'UCX_RNDV_FRAG_MEM_TYPE': 'cuda', } moduleclass = 'system' diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb b/Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb index a2d8d3732..e581a757e 100644 --- a/Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb +++ b/Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb @@ -20,6 +20,9 @@ modextravars = { # It actually has the side effect of using Ethernet and IB ports on JUSUF, which end up saturating the ethernet # fabric and result in a slow down 'UCX_MAX_RNDV_RAILS': '1', + # To make sure that transfers involving buffers allocated with cudaMallocManaged are done by the GPU. Important + # mostly for nodes with multiple GPUs per ndoe + 'UCX_RNDV_FRAG_MEM_TYPE': 'cuda', } moduleclass = 'system' -- GitLab