diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb b/Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb index d335c8261eb2457db6ff040f80958482ca89a53d..513c564e05224d748ce747a06d5ae13a010c698c 100644 --- a/Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb +++ b/Golden_Repo/u/UCX-settings/UCX-settings-DC-CUDA.eb @@ -20,6 +20,9 @@ modextravars = { # It actually has the side effect of using Ethernet and IB ports on JUSUF, which end up saturating the ethernet # fabric and result in a slow down 'UCX_MAX_RNDV_RAILS': '1', + # To make sure that transfers involving buffers allocated with cudaMallocManaged are done by the GPU. Important + # mostly for nodes with multiple GPUs per ndoe + 'UCX_RNDV_FRAG_MEM_TYPE': 'cuda', } moduleclass = 'system' diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb b/Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb index f3d28552145c2e7576c6060e8af92ca48897a126..d0be7425efc43b30b06c309756c11f7c4359ab1a 100644 --- a/Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb +++ b/Golden_Repo/u/UCX-settings/UCX-settings-RC-CUDA.eb @@ -20,6 +20,9 @@ modextravars = { # It actually has the side effect of using Ethernet and IB ports on JUSUF, which end up saturating the ethernet # fabric and result in a slow down 'UCX_MAX_RNDV_RAILS': '1', + # To make sure that transfers involving buffers allocated with cudaMallocManaged are done by the GPU. Important + # mostly for nodes with multiple GPUs per ndoe + 'UCX_RNDV_FRAG_MEM_TYPE': 'cuda', } moduleclass = 'system' diff --git a/Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb b/Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb index a2d8d373290e1f96e9aa4d79bb968a0b54ed583a..e581a757effba63bef1bcccb87935503dedc840a 100644 --- a/Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb +++ b/Golden_Repo/u/UCX-settings/UCX-settings-UD-CUDA.eb @@ -20,6 +20,9 @@ modextravars = { # It actually has the side effect of using Ethernet and IB ports on JUSUF, which end up saturating the ethernet # fabric and result in a slow down 'UCX_MAX_RNDV_RAILS': '1', + # To make sure that transfers involving buffers allocated with cudaMallocManaged are done by the GPU. Important + # mostly for nodes with multiple GPUs per ndoe + 'UCX_RNDV_FRAG_MEM_TYPE': 'cuda', } moduleclass = 'system'