From f5d126570f236cb7d14003db2e165c965c5525d3 Mon Sep 17 00:00:00 2001 From: Ali Omar Abdelazim Mohammed <mohammal@hotlum-login.head.cm.hpcrb.rdlabs.ext.hpe.com> Date: Tue, 23 Aug 2022 07:04:44 -0500 Subject: [PATCH] add clarifications on FI_OFI_RXM_USE_SRX and FI_VERBS_PREFER_XRC for MPI apps with >=64 ranks --- maestro/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/maestro/core.c b/maestro/core.c index c22908f9..a40d60fa 100644 --- a/maestro/core.c +++ b/maestro/core.c @@ -610,6 +610,7 @@ mstro_core_init(const char *workflow_name, ERR("If your job terminates after this error please check\n" " ... that firewalling is not prohibiting libfabric communication\n" " ... that ulimit -n is high enough to keep a couple of FDs per network endpoints open (ulimit -n 1024 is a good start)\n" + " ... that FI_OFI_RXM_USE_SRX=1 and FI_VERBS_PREFER_XRC=0 with large MPI applications (>=64 ranks) when using verbs provider, especially with Cray MPICH\n" " ... that your workflow-name/component-name/component-index triple is correct\n"); goto BAILOUT; } -- GitLab