Skip to content
Snippets Groups Projects
Select Git revision
  • master default protected
1 result

README.md

Blame
  • run_core_bench.sh 2.24 KiB
    #!/bin/sh
    
    #SBATCH -N 3
    #SBATCH -n 3
    #SBATCH --time=00:10:00
    #SBATCH --job-name=mstro_core_bench
    #SBATCH --output=job.core_bench.%j.out
    #SBATCH --error=job.core_bench.%j.err
    #SBATCH --hint=nomultithread
    #SBATCH --exclusive
    #SBATCH --ntasks-per-node 1
    
    # core_bench is an MPI program, where one rank acts as a pool manager, other ranks are producers and consumers.
    # It tests the bandwidth of moving data (core data objects, i.e. CDOs) between producers and consumers across nodes, 
    # hence --ntasks-per-node 1 is needed to make sure procucers and consumers are on different nodes.
    # It can also run on a single node, where it will use the local pool manager and test data movement between producers
    # and consumers threads.
    
    # number of procduer and comsumer threads
    export OMP_NUM_THREADS=4
    #OpenMP thread pinning for application threads
    export OMP_PLACES="{0,1,2,3}"
    export OMP_PROC_BIND=close
    #debug omp pinning
    export CRAY_OMP_CHECK_AFFINITY=TRUE
    
    #Maestro thread pinning
    export MSTRO_BIND_PM_PC=8
    export MSTRO_BIND_TRANSPORT_THREAD=7
    export MSTRO_BIND_CQ_HANDLER="4"
    export MSTRO_BIND_OP_THREAD="5-6"
    export MSTRO_LOG_LEVEL=0
    
    # FI provider, e.g. sockets, gni, verbs, cxi
    export FI_PROVIDER=cxi
    
    # maestro transport method, options are RDMA, GFS, MIO
    export MSTRO_TRANSPORT_DEFAULT=RDMA
    export MPICH_MAX_THREAD_SAFETY=multiple
    
    # number of completion queue handler threads
    export MSTRO_OFI_CQ_NUM_THREADS=1
    export MSTRO_OPERATIONS_NUM_THREADS=1
    # core_bench supports multiple consumer modes
    # MSTRO_CONSUMER_SINK_ALL >> One consumer sinks in all data from all producers
    # MSTRO_CONSUMER_ONE2ONE  >> #consumers == #procducers. each consumer is assigned CDOs from one producer
    # MSTRO_CONSUMER_ONE2TEN  >> one consumer sinks data from 10 producers. #producers = 10 * #consumers
    # MSTRO_CONSUMER_ALL2ALL  >> Each consumer sinks data from all producers
    export MSTRO_CONSUMER_MODE=MSTRO_CONSUMER_SINK_ALL
    
    # total number of ranks =  number of producer ranks + number of consumer ranks + 1 (pool manager rank)
    # number of procducers = total number of ranks - #consumers - 1 (pool manager)
    
    # srun <options> <core_bench> <#attributes> <attribute size> <#CDOs/thread> <#consumers> <CDO size in byte>
    srun --exclusive -c 128  --cpu-bind=v --ntasks-per-node 1 -N 3 ./core_bench 0 0  20 1 671088640