diff --git a/examples/core_benchmark.c b/examples/core_benchmark.c
index 55c1e034524b80a3a3f20f09b99842893835b290..87bdd4927acd11b4df1cee11610a827ab712bd76 100644
--- a/examples/core_benchmark.c
+++ b/examples/core_benchmark.c
@@ -270,12 +270,12 @@ int main(int argc, char *argv[]) {
       demand_time = (after - before) * 1000.0*1000.0; //time in us seconds
       fprintf(stdout, "Throughput (demand/dispose): %.5lf us\n", demand_time/(double) num_CDOs);
       fprintf(stdout, "[Consumer %d] demand-dispose time: %.5lf s\n", rank,after-before);
-      
+      fprintf(stdout, "Bandwidth: %.5f MB/s\n", (double) (num_CDOs*size_CDO)/(demand_time/1000000)/1024/1024); //bandwidth = bytes/s 
       fprintf(stdout, "[Consumer %d] declare-dispose time: %.5lf s\n", rank,consumer_end-consumer_start);
 
       /* finalize Maestro */
       status = mstro_finalize();
-      //assert(status == MSTRO_OK);
+      assert(status == MSTRO_OK);
 
       /*clean up*/
       if (producers_ids != NULL){
@@ -349,7 +349,7 @@ int main(int argc, char *argv[]) {
       /*report time in microseconds */
       declare_time = (after - before) * 1000.0*1000.0;
 
-      fprintf(stdout, "#CDOs: %zu, #Threads: %zu, #Attributes: %zu, Size of attributes: %zu \n", num_CDOs, num_threads, num_attributes, size_attributes);
+      fprintf(stdout, "#CDOs: %zu, CDO size: %"PRId64", #Threads: %zu, #Attributes: %zu, Size of attributes: %zu \n", num_CDOs, size_CDO, num_threads, num_attributes, size_attributes);
       fprintf(stdout, "Throughput (declare/offer): %.5lf us\n", declare_time/(double) num_CDOs);
       fprintf(stdout, "[Producer %d] declare-offer time: %.5lf s\n", rank, after-before);
       
@@ -382,7 +382,7 @@ int main(int argc, char *argv[]) {
           free(CDO_data[i]);
       }
 
-      //assert(status == MSTRO_OK);
+      assert(status == MSTRO_OK);
     }
 
   }
@@ -395,7 +395,7 @@ int main(int argc, char *argv[]) {
   if ((rank == 0) && (size > 1)) {
 
     status = mstro_pm_terminate();
-    /*
+    
     if(status!=MSTRO_OK) {
       ERR("Simple Maestro Pool Manager: Failed to shut down pool: %d (%s)\n",
         status, mstro_status_description(status));
@@ -403,7 +403,7 @@ int main(int argc, char *argv[]) {
     
       MPI_Abort(MPI_COMM_WORLD, -1);
     }
-    */
+   
     status = mstro_finalize();
    
   }
diff --git a/examples/run_core_bench.sh b/examples/run_core_bench.sh
new file mode 100755
index 0000000000000000000000000000000000000000..2eae588532739471c722e5251c9147a4ad17eaa6
--- /dev/null
+++ b/examples/run_core_bench.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+#SBATCH -N 3
+#SBATCH -n 3
+#SBATCH --time=00:10:00
+#SBATCH --job-name=mstro_core_bench
+#SBATCH --output=job.core_bench.%j.out
+#SBATCH --error=job.core_bench.%j.err
+#SBATCH --hint=nomultithread
+#SBATCH --exclusive
+#SBATCH --ntasks-per-node 1
+
+# core_bench is an MPI program, where one rank acts as a pool manager, other ranks are producers and consumers.
+# It tests the bandwidth of moving data (core data objects, i.e. CDOs) between producers and consumers across nodes, 
+# hence --ntasks-per-node 1 is needed to make sure procucers and consumers are on different nodes.
+# It can also run on a single node, where it will use the local pool manager and test data movement between producers
+# and consumers threads.
+
+# number of procduer and comsumer threads
+export OMP_NUM_THREADS=4
+#OpenMP thread pinning
+export OMP_PLACES=cores
+export OMP_PROC_BIND=close
+
+# FI provider, e.g. sockets, gni, verbs, cxi
+export FI_PROVIDER=cxi
+
+# maestro transport method, options are OFI, GFS, MIO
+export MSTRO_TRANSPORT_DEFAULT=OFI
+export MPICH_MAX_THREAD_SAFETY=multiple
+
+# number of completion queue handler threads
+export MSTRO_OFI_CQ_NUM_THREADS=1
+
+# core_bench supports multiple consumer modes
+# MSTRO_CONSUMER_SINK_ALL >> One consumer sinks in all data from all producers
+# MSTRO_CONSUMER_ONE2ONE  >> #consumers == #procducers. each consumer is assigned CDOs from one producer
+# MSTRO_CONSUMER_ONE2TEN  >> one consumer sinks data from 10 producers. #producers = 10 * #consumers
+# MSTRO_CONSUMER_ALL2ALL  >> Each consumer sinks data from all producers
+export MSTRO_CONSUMER_MODE=MSTRO_CONSUMER_SINK_ALL
+
+# total number of ranks =  number of producer ranks + number of consumer ranks + 1 (pool manager rank)
+# number of procducers = total number of ranks - #consumers - 1 (pool manager)
+
+# srun <options> <core_bench> <#attributes> <attribute size> <#CDOs/thread> <#consumers> <CDO size in bytes>
+srun --exclusive --ntasks-per-node 1 -n 3 ./core_bench 0 0  20 1 671088640
+
+
+