From ad07528c8a28f0f936c53c2e6e3c53645aaa1f7e Mon Sep 17 00:00:00 2001
From: Ali Mohammed <ali.mohammed@hpe.com>
Date: Mon, 19 Dec 2022 10:26:48 -0600
Subject: [PATCH] print omp and example thread pinning

---
 examples/omp_consumer.c    | 11 +++++++----
 examples/omp_injector.c    |  8 ++++++++
 examples/run_core_bench.sh | 16 +++++++++-------
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/examples/omp_consumer.c b/examples/omp_consumer.c
index 1ca298ad..cc69ff47 100644
--- a/examples/omp_consumer.c
+++ b/examples/omp_consumer.c
@@ -139,12 +139,16 @@ mstro_status require_CDOs(mstro_cdo *cdos, size_t num_CDOs, int *injector_ids, i
   int CDOs_per_inj = num_CDOs / num_injectors ;
   size_t cdoidx, cdo_gid, i, j;
 
-  
+ int num = atoi(getenv("OMP_NUM_THREADS"));
+  #pragma omp parallel for schedule(static,1)
+  for(int i =0; i<num;i++)
+  {
+           INFO("Running on CPU %d \n", sched_getcpu());
+  }
 
   /*declare CDOs loop */
   #pragma omp parallel for private(cdoidx, cdo_gid, i, j) reduction(| :s)
   for(cdo_gid=0; cdo_gid < num_CDOs; cdo_gid++) {
-
     i = cdo_gid / CDOs_per_inj; /* injector id */
     j = cdo_gid % CDOs_per_inj; /* cdo id within this injector */
     
@@ -191,8 +195,7 @@ mstro_status demand_CDOs(mstro_cdo *cdos, size_t num_CDOs){
 
   #pragma omp parallel for reduction(| :s)
   for(size_t i=0; i < num_CDOs; i++){
-    mstro_status s3,s4;
-
+    mstro_status s3,s4;     
     s3= mstro_cdo_demand(cdos[i]);
     DEBUG("Hey, I recieved %s \n", mstro_cdo_name(cdos[i]));
     s4= mstro_cdo_dispose(cdos[i]);
diff --git a/examples/omp_injector.c b/examples/omp_injector.c
index c2fa8657..0cf05f14 100644
--- a/examples/omp_injector.c
+++ b/examples/omp_injector.c
@@ -52,6 +52,14 @@ mstro_status inject_CDOs(int injector_id, mstro_cdo *cdos, size_t num_CDOs, size
 
   size_t cdoidx;
 
+ int num = atoi(getenv("OMP_NUM_THREADS"));  
+#pragma omp parallel for schedule(static,1)
+  for(int i =0; i<num;i++)
+  {
+           INFO("Running on CPU %d \n", sched_getcpu());
+
+  }
+
   /* declare CDOs loop */
   #pragma omp parallel for private(cdoidx) reduction(| :s)
   for(size_t i=0; i < num_CDOs; i++) {
diff --git a/examples/run_core_bench.sh b/examples/run_core_bench.sh
index 84e58a7e..aab6da42 100755
--- a/examples/run_core_bench.sh
+++ b/examples/run_core_bench.sh
@@ -19,13 +19,16 @@
 # number of procduer and comsumer threads
 export OMP_NUM_THREADS=4
 #OpenMP thread pinning for application threads
-export OMP_PLACES=cores
+export OMP_PLACES="{0,1,2,3}"
 export OMP_PROC_BIND=close
+#debug omp pinning
+export CRAY_OMP_CHECK_AFFINITY=TRUE
 
 #Maestro thread pinning
-export MSTRO_BIND_PM_PC=10
-export MSTRO_BIND_TRANSPORT_THREAD=11
-export MSTRO_BIND_CQ_HANDLER="3-12"
+export MSTRO_BIND_PM_PC=8
+export MSTRO_BIND_TRANSPORT_THREAD=7
+export MSTRO_BIND_CQ_HANDLER="4-6"
+export MSTRO_LOG_LEVEL=0
 
 # FI provider, e.g. sockets, gni, verbs, cxi
 export FI_PROVIDER=cxi
@@ -47,8 +50,7 @@ export MSTRO_CONSUMER_MODE=MSTRO_CONSUMER_SINK_ALL
 # total number of ranks =  number of producer ranks + number of consumer ranks + 1 (pool manager rank)
 # number of procducers = total number of ranks - #consumers - 1 (pool manager)
 
-# srun <options> <core_bench> <#attributes> <attribute size> <#CDOs/thread> <#consumers> <CDO size in bytes>
-srun --exclusive --cpu-bind=v -c 250 --ntasks-per-node 1 -n 3 ./core_bench 0 0  20 1 671088640
-
+# srun <options> <core_bench> <#attributes> <attribute size> <#CDOs/thread> <#consumers> <CDO size in byte>
+srun --exclusive -c 128  --cpu-bind=v --ntasks-per-node 1 -N 3 ./core_bench 0 0  20 1 671088640
 
 
-- 
GitLab