From ad07528c8a28f0f936c53c2e6e3c53645aaa1f7e Mon Sep 17 00:00:00 2001 From: Ali Mohammed <ali.mohammed@hpe.com> Date: Mon, 19 Dec 2022 10:26:48 -0600 Subject: [PATCH] print omp and example thread pinning --- examples/omp_consumer.c | 11 +++++++---- examples/omp_injector.c | 8 ++++++++ examples/run_core_bench.sh | 16 +++++++++------- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/examples/omp_consumer.c b/examples/omp_consumer.c index 1ca298ad..cc69ff47 100644 --- a/examples/omp_consumer.c +++ b/examples/omp_consumer.c @@ -139,12 +139,16 @@ mstro_status require_CDOs(mstro_cdo *cdos, size_t num_CDOs, int *injector_ids, i int CDOs_per_inj = num_CDOs / num_injectors ; size_t cdoidx, cdo_gid, i, j; - + int num = atoi(getenv("OMP_NUM_THREADS")); + #pragma omp parallel for schedule(static,1) + for(int i =0; i<num;i++) + { + INFO("Running on CPU %d \n", sched_getcpu()); + } /*declare CDOs loop */ #pragma omp parallel for private(cdoidx, cdo_gid, i, j) reduction(| :s) for(cdo_gid=0; cdo_gid < num_CDOs; cdo_gid++) { - i = cdo_gid / CDOs_per_inj; /* injector id */ j = cdo_gid % CDOs_per_inj; /* cdo id within this injector */ @@ -191,8 +195,7 @@ mstro_status demand_CDOs(mstro_cdo *cdos, size_t num_CDOs){ #pragma omp parallel for reduction(| :s) for(size_t i=0; i < num_CDOs; i++){ - mstro_status s3,s4; - + mstro_status s3,s4; s3= mstro_cdo_demand(cdos[i]); DEBUG("Hey, I recieved %s \n", mstro_cdo_name(cdos[i])); s4= mstro_cdo_dispose(cdos[i]); diff --git a/examples/omp_injector.c b/examples/omp_injector.c index c2fa8657..0cf05f14 100644 --- a/examples/omp_injector.c +++ b/examples/omp_injector.c @@ -52,6 +52,14 @@ mstro_status inject_CDOs(int injector_id, mstro_cdo *cdos, size_t num_CDOs, size size_t cdoidx; + int num = atoi(getenv("OMP_NUM_THREADS")); +#pragma omp parallel for schedule(static,1) + for(int i =0; i<num;i++) + { + INFO("Running on CPU %d \n", sched_getcpu()); + + } + /* declare CDOs loop */ #pragma omp parallel for private(cdoidx) reduction(| :s) for(size_t i=0; i < num_CDOs; i++) { diff --git a/examples/run_core_bench.sh b/examples/run_core_bench.sh index 84e58a7e..aab6da42 100755 --- a/examples/run_core_bench.sh +++ b/examples/run_core_bench.sh @@ -19,13 +19,16 @@ # number of procduer and comsumer threads export OMP_NUM_THREADS=4 #OpenMP thread pinning for application threads -export OMP_PLACES=cores +export OMP_PLACES="{0,1,2,3}" export OMP_PROC_BIND=close +#debug omp pinning +export CRAY_OMP_CHECK_AFFINITY=TRUE #Maestro thread pinning -export MSTRO_BIND_PM_PC=10 -export MSTRO_BIND_TRANSPORT_THREAD=11 -export MSTRO_BIND_CQ_HANDLER="3-12" +export MSTRO_BIND_PM_PC=8 +export MSTRO_BIND_TRANSPORT_THREAD=7 +export MSTRO_BIND_CQ_HANDLER="4-6" +export MSTRO_LOG_LEVEL=0 # FI provider, e.g. sockets, gni, verbs, cxi export FI_PROVIDER=cxi @@ -47,8 +50,7 @@ export MSTRO_CONSUMER_MODE=MSTRO_CONSUMER_SINK_ALL # total number of ranks = number of producer ranks + number of consumer ranks + 1 (pool manager rank) # number of procducers = total number of ranks - #consumers - 1 (pool manager) -# srun <options> <core_bench> <#attributes> <attribute size> <#CDOs/thread> <#consumers> <CDO size in bytes> -srun --exclusive --cpu-bind=v -c 250 --ntasks-per-node 1 -n 3 ./core_bench 0 0 20 1 671088640 - +# srun <options> <core_bench> <#attributes> <attribute size> <#CDOs/thread> <#consumers> <CDO size in byte> +srun --exclusive -c 128 --cpu-bind=v --ntasks-per-node 1 -N 3 ./core_bench 0 0 20 1 671088640 -- GitLab