Add TODOs

3d892e12 · Andreas Herten · bedf87e7 · 3d892e12 · 3d892e12
Commit 3d892e12 authored 1 year ago by Andreas Herten
--- a/03-MSA-ping-pong/README.md
+++ b/03-MSA-ping-pong/README.md
 # MSA CPU-GPU Ping Pong

-We extend the simple previous examples to now use a heterogeneous job to send _ping pong_ messages between the job components of increasing size.
+Extending the previous examples, we now send ping pong messages between the CPU memory of one node and the GPU memory of another node, using the heterogeneous job features of Slurm.

-Execute the following on JUWELS Booster
+TODOs in `ping-pong.cu` indicate points to implement pointers to GPU memory instead of CPU memory.
+
+After working on the TODOs, execute the following on JUWELS Booster to compile `ping-pong.cu` with the right modules.

 ```bash
 bash compile.sh
 ```

-Execute the following on JUWELS Cluster
+Execute the following on JUWELS Cluster to compile the CPU part of the application and submit a batch job:

 ```bash
 bash compile.sh

--- a/03-MSA-ping-pong/ping-pong.cu
+++ b/03-MSA-ping-pong/ping-pong.cu
@@ -52,24 +52,38 @@ int main(int argc, char *argv[])
 			A[i] = 0.0;
 		}

+		// TODO: Create an empty double pointer, d_A; allocate d_A on the GPU; copy the content of A to d_A
+		#ifdef SOLUTION
 		double *d_A;
 		cudaErrorCheck( cudaMalloc(&d_A, N*sizeof(double)) );
 		cudaErrorCheck( cudaMemcpy(d_A, A, N*sizeof(double), cudaMemcpyHostToDevice) );
+		#endif
 	
 		int tag1 = 10;
 		int tag2 = 20;
 	
 		int loop_count = 50;

+		// TODO: Use the GPU pointer d_A in the following MPI calls instead of A
 		// Warm-up loop
 		for(int i=1; i<=5; i++){
 			if(rank == 0){
+				#ifdef SOLUTION
 				MPI_Send(d_A, N, MPI_DOUBLE, 1, tag1, MPI_COMM_WORLD);
 				MPI_Recv(d_A, N, MPI_DOUBLE, 1, tag2, MPI_COMM_WORLD, &stat);
+				#else
+				MPI_Send(A, N, MPI_DOUBLE, 1, tag1, MPI_COMM_WORLD);
+				MPI_Recv(A, N, MPI_DOUBLE, 1, tag2, MPI_COMM_WORLD, &stat);
+				#endif
 			}
 			else if(rank == 1){
+				#ifdef SOLUTION
 				MPI_Recv(d_A, N, MPI_DOUBLE, 0, tag1, MPI_COMM_WORLD, &stat);
 				MPI_Send(d_A, N, MPI_DOUBLE, 0, tag2, MPI_COMM_WORLD);
+				#else
+				MPI_Recv(A, N, MPI_DOUBLE, 0, tag1, MPI_COMM_WORLD, &stat);
+				MPI_Send(A, N, MPI_DOUBLE, 0, tag2, MPI_COMM_WORLD);
+				#endif
 			}
 		}

@@ -77,14 +91,25 @@ int main(int argc, char *argv[])
 		double start_time, stop_time, elapsed_time;
 		start_time = MPI_Wtime();
 	
+		// TODO: Use the GPU pointer d_A in the following MPI calls instead of A
 		for(int i=1; i<=loop_count; i++){
 			if(rank == 0){
+				#ifdef SOLUTION
 				MPI_Send(d_A, N, MPI_DOUBLE, 1, tag1, MPI_COMM_WORLD);
 				MPI_Recv(d_A, N, MPI_DOUBLE, 1, tag2, MPI_COMM_WORLD, &stat);
+				#else
+				MPI_Send(A, N, MPI_DOUBLE, 1, tag1, MPI_COMM_WORLD);
+				MPI_Recv(A, N, MPI_DOUBLE, 1, tag2, MPI_COMM_WORLD, &stat);
+				#endif
 			}
 			else if(rank == 1){
+				#ifdef SOLUTION
 				MPI_Recv(d_A, N, MPI_DOUBLE, 0, tag1, MPI_COMM_WORLD, &stat);
 				MPI_Send(d_A, N, MPI_DOUBLE, 0, tag2, MPI_COMM_WORLD);
+				#else
+				MPI_Recv(A, N, MPI_DOUBLE, 0, tag1, MPI_COMM_WORLD, &stat);
+				MPI_Send(A, N, MPI_DOUBLE, 0, tag2, MPI_COMM_WORLD);
+				#endif
 			}
 		}