Skip to content
Snippets Groups Projects
Commit 3d892e12 authored by Andreas Herten's avatar Andreas Herten
Browse files

Add TODOs

parent bedf87e7
Branches
Tags
No related merge requests found
# MSA CPU-GPU Ping Pong
We extend the simple previous examples to now use a heterogeneous job to send _ping pong_ messages between the job components of increasing size.
Extending the previous examples, we now send ping pong messages between the CPU memory of one node and the GPU memory of another node, using the heterogeneous job features of Slurm.
Execute the following on JUWELS Booster
TODOs in `ping-pong.cu` indicate points to implement pointers to GPU memory instead of CPU memory.
After working on the TODOs, execute the following on JUWELS Booster to compile `ping-pong.cu` with the right modules.
```bash
bash compile.sh
```
Execute the following on JUWELS Cluster
Execute the following on JUWELS Cluster to compile the CPU part of the application and submit a batch job:
```bash
bash compile.sh
......
......@@ -52,24 +52,38 @@ int main(int argc, char *argv[])
A[i] = 0.0;
}
// TODO: Create an empty double pointer, d_A; allocate d_A on the GPU; copy the content of A to d_A
#ifdef SOLUTION
double *d_A;
cudaErrorCheck( cudaMalloc(&d_A, N*sizeof(double)) );
cudaErrorCheck( cudaMemcpy(d_A, A, N*sizeof(double), cudaMemcpyHostToDevice) );
#endif
int tag1 = 10;
int tag2 = 20;
int loop_count = 50;
// TODO: Use the GPU pointer d_A in the following MPI calls instead of A
// Warm-up loop
for(int i=1; i<=5; i++){
if(rank == 0){
#ifdef SOLUTION
MPI_Send(d_A, N, MPI_DOUBLE, 1, tag1, MPI_COMM_WORLD);
MPI_Recv(d_A, N, MPI_DOUBLE, 1, tag2, MPI_COMM_WORLD, &stat);
#else
MPI_Send(A, N, MPI_DOUBLE, 1, tag1, MPI_COMM_WORLD);
MPI_Recv(A, N, MPI_DOUBLE, 1, tag2, MPI_COMM_WORLD, &stat);
#endif
}
else if(rank == 1){
#ifdef SOLUTION
MPI_Recv(d_A, N, MPI_DOUBLE, 0, tag1, MPI_COMM_WORLD, &stat);
MPI_Send(d_A, N, MPI_DOUBLE, 0, tag2, MPI_COMM_WORLD);
#else
MPI_Recv(A, N, MPI_DOUBLE, 0, tag1, MPI_COMM_WORLD, &stat);
MPI_Send(A, N, MPI_DOUBLE, 0, tag2, MPI_COMM_WORLD);
#endif
}
}
......@@ -77,14 +91,25 @@ int main(int argc, char *argv[])
double start_time, stop_time, elapsed_time;
start_time = MPI_Wtime();
// TODO: Use the GPU pointer d_A in the following MPI calls instead of A
for(int i=1; i<=loop_count; i++){
if(rank == 0){
#ifdef SOLUTION
MPI_Send(d_A, N, MPI_DOUBLE, 1, tag1, MPI_COMM_WORLD);
MPI_Recv(d_A, N, MPI_DOUBLE, 1, tag2, MPI_COMM_WORLD, &stat);
#else
MPI_Send(A, N, MPI_DOUBLE, 1, tag1, MPI_COMM_WORLD);
MPI_Recv(A, N, MPI_DOUBLE, 1, tag2, MPI_COMM_WORLD, &stat);
#endif
}
else if(rank == 1){
#ifdef SOLUTION
MPI_Recv(d_A, N, MPI_DOUBLE, 0, tag1, MPI_COMM_WORLD, &stat);
MPI_Send(d_A, N, MPI_DOUBLE, 0, tag2, MPI_COMM_WORLD);
#else
MPI_Recv(A, N, MPI_DOUBLE, 0, tag1, MPI_COMM_WORLD, &stat);
MPI_Send(A, N, MPI_DOUBLE, 0, tag2, MPI_COMM_WORLD);
#endif
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment