diff --git a/README.md b/README.md
index c0f06ce9e9f49d7e534b65a161ad99c558ee1b19..5bf5f55e19850c7f162c550d3aff3fc103823078 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,8 @@ module purge
 module load Stages/2023 
 module load GCC/11.3.0 CUDA/11.7 OpenMPI/4.1.4 NCCL/default-CUDA-11.7 Nsight-Systems/2023.2.1 MPI-settings/CUDA-UCC
 # NCCL Version 2.15.1
+# UCC Version=1.1.0 
+# UCX Version 1.13.1
 
 
 # All variant have the following command line options
@@ -48,40 +50,39 @@ srun  ./jacobi  -niter 10 -nx ${NXNY} -ny ${NXNY} 2>&1 | tee -a debug_log.txt
 ## Error
 
 ```
-Single GPU jacobi relaxation: 100 iterations on 20480 x 20480 mesh with norm check every 1 iterations
+Single GPU jacobi relaxation: 10 iterations on 20480 x 20480 mesh with norm check every 1 iterations
     0, 35.776176
-[jwb0085:22560:0:22560] Caught signal 11 (Segmentation fault: invalid permissions for mapped object at address 0x147768000000)
-==== backtrace (tid:  22560) ====
- 0 0x000000000004eb80 killpg()  ???:0
+[jrc0438:2954 :0:2954] Caught signal 11 (Segmentation fault: invalid permissions for mapped object at address 0x14a63a000000)
+==== backtrace (tid:   2956) ====
+ 0 0x000000000004eb50 killpg()  ???:0
  1 0x0000000000221af5 cuEGLApiInit()  ???:0
  2 0x0000000000238d90 cuEGLApiInit()  ???:0
  3 0x0000000000238efd cuEGLApiInit()  ???:0
  4 0x000000000031deb5 cuMemMapArrayAsync()  ???:0
- 5 0x000000000001d115 ???()  /p/software/juwelsbooster/stages/2023/software/CUDA/11.7/lib/libcudart.so.11.0:0
+ 5 0x000000000001d115 ???()  /p/software/jurecadc/stages/2023/software/CUDA/11.7/lib/libcudart.so.11.0:0
  6 0x000000000005be34 cudaGraphAddKernelNode()  ???:0
- 7 0x0000000000405763 main()  /p/project/cexalab/john2/task_graph/graph_wo_streams.cu:381
+ 7 0x00000000004058db main()  /p/project/cexalab/john2/task_graph/cuda-nccl-taskgraph/graph_wo_streams.cu:414
  8 0x000000000003ad85 __libc_start_main()  ???:0
  9 0x000000000040360e _start()  ???:0
 =================================
-[jwb0085:22560] *** Process received signal ***
-[jwb0085:22560] Signal: Segmentation fault (11)
-[jwb0085:22560] Signal code:  (-6)
-[jwb0085:22560] Failing at address: 0x448100005820
-[jwb0085:22560] [ 0] /usr/lib64/libc.so.6(+0x4eb80)[0x1479e2792b80]
-[jwb0085:22560] [ 1] /p/software/juwelsbooster/stages/2023/software/nvidia-driver/default/lib/libcuda.so.1(+0x221af5)[0x1479da608af5]
-[jwb0085:22560] [ 2] /p/software/juwelsbooster/stages/2023/software/nvidia-driver/default/lib/libcuda.so.1(+0x238d90)[0x1479da61fd90]
-[jwb0085:22560] [ 3] /p/software/juwelsbooster/stages/2023/software/nvidia-driver/default/lib/libcuda.so.1(+0x238efd)[0x1479da61fefd]
-[jwb0085:22560] [ 4] /p/software/juwelsbooster/stages/2023/software/nvidia-driver/default/lib/libcuda.so.1(+0x31deb5)[0x1479da704eb5]
-[jwb0085:22560] [ 5] /p/software/juwelsbooster/stages/2023/software/CUDA/11.7/lib/libcudart.so.11.0(+0x1d115)[0x1479e5582115]
-[jwb0085:22560] [ 6] /p/software/juwelsbooster/stages/2023/software/CUDA/11.7/lib/libcudart.so.11.0(cudaGraphAddKernelNode+0x204)[0x1479e55c0e34]
-[jwb0085:22560] [ 7] ./jacobi[0x405763]
-[jwb0085:22560] [ 8] /usr/lib64/libc.so.6(__libc_start_main+0xe5)[0x1479e277ed85]
-[jwb0085:22560] [ 9] ./jacobi[0x40360e]
-[jwb0085:22560] *** End of error message ***
-srun: error: jwb0085: task 0: Segmentation fault
+[jrc0438:02956] *** Process received signal ***
+[jrc0438:02956] Signal: Segmentation fault (11)
+[jrc0438:02956] Signal code:  (-6)
+[jrc0438:02956] Failing at address: 0x448100000b8c
+[jrc0438:02956] [ 0] /usr/lib64/libc.so.6(+0x4eb50)[0x14d3bb145b50]
+[jrc0438:02956] [ 1] /usr/lib64/libcuda.so.1(+0x221af5)[0x14d3b2608af5]
+[jrc0438:02956] [ 2] /usr/lib64/libcuda.so.1(+0x238d90)[0x14d3b261fd90]
+[jrc0438:02956] [ 3] /usr/lib64/libcuda.so.1(+0x238efd)[0x14d3b261fefd]
+[jrc0438:02956] [ 4] /usr/lib64/libcuda.so.1(+0x31deb5)[0x14d3b2704eb5]
+[jrc0438:02956] [ 5] /p/software/jurecadc/stages/2023/software/CUDA/11.7/lib/libcudart.so.11.0(+0x1d115)[0x14d3bfe26115]
+[jrc0438:02956] [ 6] /p/software/jurecadc/stages/2023/software/CUDA/11.7/lib/libcudart.so.11.0(cudaGraphAddKernelNode+0x204)[0x14d3bfe64e34]
+[jrc0438:02956] [ 7] ./jacobi[0x4058db]
+[jrc0438:02956] [ 8] /usr/lib64/libc.so.6(__libc_start_main+0xe5)[0x14d3bb131d85]
+[jrc0438:02956] [ 9] ./jacobi[0x40360e]
+[jrc0438:02956] *** End of error message ***
 ```
 
-## Compute-sanitizer
+## Part of Compute-sanitizer log
 
 ```
 ========= COMPUTE-SANITIZER
@@ -111,10 +112,66 @@ srun: error: jwb0085: task 0: Segmentation fault
 =========                in /p/software/jurecadc/stages/2023/software/OpenMPI/4.1.4-GCC-11.3.0/lib/libmpi.so.40
 =========     Host Frame:MPI_Init [0x7af8e]
 =========                in /p/software/jurecadc/stages/2023/software/OpenMPI/4.1.4-GCC-11.3.0/lib/libmpi.so.40
-=========     Host Frame:/p/project/cexalab/john2/task_graph/graph_wo_streams.cu:177:main [0x3782]
-=========                in /p/project/cexalab/john2/task_graph/./jacobi
+=========     Host Frame:/p/project/cexalab/john2/task_graph/cuda-nccl-taskgraph/graph_wo_streams.cu:177:main [0x3782]
+=========                in /p/project/cexalab/john2/task_graph/cuda-nccl-taskgraph/./jacobi
 =========     Host Frame:__libc_start_main [0x3ad85]
 =========                in /usr/lib64/libc.so.6
 =========     Host Frame:_start [0x360e]
-=========                in /p/project/cexalab/john2/task_graph/./jacobi
+=========                in /p/project/cexalab/john2/task_graph/cuda-nccl-taskgraph/./jacobi
+
+........
+========= Invalid __global__ write of size 8 bytes
+=========     at 0x8e10 in ncclKernel_SendRecv_RING_SIMPLE_Sum_int8_t(ncclDevComm *, unsigned long, ncclWork *)
+=========     by thread (240,0,0) in block (0,0,0)
+=========     Address 0x154f5fa00000 is out of bounds
+=========     and is 50,331,648 bytes before the nearest allocation at 0x154f62a00000 of size 6,291,456 bytes
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame: [0x319c12]
+=========                in /usr/lib64/libcuda.so.1
+=========     Host Frame:__cudart808 [0xdea9b]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:cudaLaunchKernel [0x13a238]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:/dev/shm/swmanage/jurecadc/NCCL/default/GCCcore-11.3.0-CUDA-11.7/nccl/src/enqueue.cc:1068:ncclLaunchKernel(ncclComm*, ncclKernelPlan*) [0x5f27d]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:/dev/shm/swmanage/jurecadc/NCCL/default/GCCcore-11.3.0-CUDA-11.7/nccl/src/group.cc:340:groupLaunch(ncclAsyncJob*) [0x63f8f]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:/dev/shm/swmanage/jurecadc/NCCL/default/GCCcore-11.3.0-CUDA-11.7/nccl/src/group.cc:376:ncclGroupEndInternal() [0x64ae8]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:/dev/shm/swmanage/jurecadc/NCCL/default/GCCcore-11.3.0-CUDA-11.7/nccl/src/group.cc:106:ncclGroupEnd [0x65179]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:/p/project/cexalab/john2/task_graph/cuda-nccl-taskgraph/graph_wo_streams.cu:310:main [0x50f3]
+=========                in /p/project/cexalab/john2/task_graph/cuda-nccl-taskgraph/./jacobi
+=========     Host Frame:__libc_start_main [0x3ad85]
+=========                in /usr/lib64/libc.so.6
+=========     Host Frame:_start [0x360e]
+=========                in /p/project/cexalab/john2/task_graph/cuda-nccl-taskgraph/./jacobi
+========= 
+========= Invalid __global__ write of size 16 bytes
+=========     at 0x70f0 in ncclKernel_SendRecv_RING_SIMPLE_Sum_int8_t(ncclDevComm *, unsigned long, ncclWork *)
+=========     by thread (320,0,0) in block (1,0,0)
+=========     Address 0x154f5d001000 is out of bounds
+=========     and is 6,295,553 bytes after the nearest allocation at 0x154f5c400000 of size 6,291,456 bytes
+=========     Saved host backtrace up to driver entry point at kernel launch time
+=========     Host Frame: [0x319c12]
+=========                in /usr/lib64/libcuda.so.1
+=========     Host Frame:__cudart808 [0xdea9b]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:cudaLaunchKernel [0x13a238]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:/dev/shm/swmanage/jurecadc/NCCL/default/GCCcore-11.3.0-CUDA-11.7/nccl/src/enqueue.cc:1068:ncclLaunchKernel(ncclComm*, ncclKernelPlan*) [0x5f27d]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:/dev/shm/swmanage/jurecadc/NCCL/default/GCCcore-11.3.0-CUDA-11.7/nccl/src/group.cc:340:groupLaunch(ncclAsyncJob*) [0x63f8f]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:/dev/shm/swmanage/jurecadc/NCCL/default/GCCcore-11.3.0-CUDA-11.7/nccl/src/group.cc:376:ncclGroupEndInternal() [0x64ae8]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:/dev/shm/swmanage/jurecadc/NCCL/default/GCCcore-11.3.0-CUDA-11.7/nccl/src/group.cc:106:ncclGroupEnd [0x65179]
+=========                in /p/software/jurecadc/stages/2023/software/NCCL/default-GCCcore-11.3.0-CUDA-11.7/lib/libnccl.so.2
+=========     Host Frame:/p/project/cexalab/john2/task_graph/cuda-nccl-taskgraph/graph_wo_streams.cu:310:main [0x50f3]
+=========                in /p/project/cexalab/john2/task_graph/cuda-nccl-taskgraph/./jacobi
+=========     Host Frame:__libc_start_main [0x3ad85]
+=========                in /usr/lib64/libc.so.6
+=========     Host Frame:_start [0x360e]
+=========                in /p/project/cexalab/john2/task_graph/cuda-nccl-taskgraph/./jacobi
+========= 
 ```