+ initial commit

91a231a2 · Jayesh Badwaik · 91a231a2 · 91a231a2 · 91a231a2
Unverified Commit 91a231a2 authored May 15, 2023 by Jayesh Badwaik
--- a/main.cu
+++ b/main.cu
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#include <iostream>
+
+template<class It1>
+__global__
+void async_pluseq_impl(int size, It1 lhs)
+{
+  int i = blockIdx.x*blockDim.x + threadIdx.x;
+  int* asd = lhs;
+  if (i < size){
+    int rhs = 3;
+    atomicAdd(&asd[i], rhs);
+  }
+}
+
+
+
+int main(){
+
+
+  thrust::device_vector<int> v = std::vector<int>({1, 2, 3});
+
+  int size = v.size();
+
+  int gridSize = (size + 255) / 256;
+  int blockSize = 256;
+
+  async_pluseq_impl<<<gridSize, blockSize>>>( size, thrust::raw_pointer_cast(v.data()));
+
+  thrust::host_vector<int> vv(v.begin(), v.end());
+
+  for (auto e : vv) std::cout << e << std::endl;
+  return 0;
+}
--- a/readme.md
+++ b/readme.md
+# atomicAdd is not working on nvc++
+
+The example takes an array [1,2,3] and atomically adds 3 to every element.
+The correct output is 4,5,6. However, due to a misconfiguration, there is a bug
+in the computation. The bug comes from NVHPC being configured against native CUDA 11.7 instead of
+CUDA 12.
+
+To reproduce the bug, run `./reproduce` on any JSC A100 machine.
--- a/reproduce.sh
+++ b/reproduce.sh
+#!/bin/bash
+
+NVCPPRC=$HOME/.mynvc++rc
+if [ -f $NVCPPRC ]; then
+  cp $NVCPPRC $NVCPPRC.bak
+fi
+
+module load NVHPC/23.1
+module load CUDA/11.7
+makelocalrc -x $NVHPC/Linux_x86_64/23.1/compilers/bin/ -d .
+cp localrc ~/.mynvc++rc
+nvc++ main.cu  && ./a.out
+module load CUDA/.12.0
+makelocalrc -x $NVHPC/Linux_x86_64/23.1/compilers/bin/ -d .
+cp localrc ~/.mynvc++rc
+nvc++ main.cu  && ./a.out
+
+if [ -f $NVCPPRC.bak ]; then
+  mv $NVCPPRC.bak $NVCPPRC
+fi
+
+