Skip to content
Snippets Groups Projects
Unverified Commit 91a231a2 authored by Jayesh Badwaik's avatar Jayesh Badwaik
Browse files

+ initial commit

parents
Branches
No related tags found
No related merge requests found
main.cu 0 → 100644
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <iostream>
template<class It1>
__global__
void async_pluseq_impl(int size, It1 lhs)
{
int i = blockIdx.x*blockDim.x + threadIdx.x;
int* asd = lhs;
if (i < size){
int rhs = 3;
atomicAdd(&asd[i], rhs);
}
}
int main(){
thrust::device_vector<int> v = std::vector<int>({1, 2, 3});
int size = v.size();
int gridSize = (size + 255) / 256;
int blockSize = 256;
async_pluseq_impl<<<gridSize, blockSize>>>( size, thrust::raw_pointer_cast(v.data()));
thrust::host_vector<int> vv(v.begin(), v.end());
for (auto e : vv) std::cout << e << std::endl;
return 0;
}
# atomicAdd is not working on nvc++
The example takes an array [1,2,3] and atomically adds 3 to every element.
The correct output is 4,5,6. However, due to a misconfiguration, there is a bug
in the computation. The bug comes from NVHPC being configured against native CUDA 11.7 instead of
CUDA 12.
To reproduce the bug, run `./reproduce` on any JSC A100 machine.
#!/bin/bash
NVCPPRC=$HOME/.mynvc++rc
if [ -f $NVCPPRC ]; then
cp $NVCPPRC $NVCPPRC.bak
fi
module load NVHPC/23.1
module load CUDA/11.7
makelocalrc -x $NVHPC/Linux_x86_64/23.1/compilers/bin/ -d .
cp localrc ~/.mynvc++rc
nvc++ main.cu && ./a.out
module load CUDA/.12.0
makelocalrc -x $NVHPC/Linux_x86_64/23.1/compilers/bin/ -d .
cp localrc ~/.mynvc++rc
nvc++ main.cu && ./a.out
if [ -f $NVCPPRC.bak ]; then
mv $NVCPPRC.bak $NVCPPRC
fi
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment