diff --git a/3_affinity_query/3_affinity_query.cpp b/3_affinity_query/3_affinity_query.cpp index 29dfa96264b1dc1f4aa9821178c9fdb9551d07b7..e0ead30aad1b7433c0eb8fca20333ca177fdd5b0 100644 --- a/3_affinity_query/3_affinity_query.cpp +++ b/3_affinity_query/3_affinity_query.cpp @@ -35,6 +35,10 @@ void numa_in_operations(int socket_num){ int main() { +/* + * output: Size, Sum, serial time, NUMA domain time, node time, NUMA-aware time + * +*/ int n_sockets, socket_num; int n_procs; @@ -44,10 +48,11 @@ int main() n_sockets = omp_get_num_places(); - printf("number of sockets = %d \n", n_sockets); +// printf("number of sockets = %d \n", n_sockets); - int size = 1000000000; + int size = 100000000; +/*Serial Sum*/ double *a = new double[size]; for(int i = 0; i < size; i++){ @@ -57,37 +62,105 @@ int main() double sum = 0; auto t1 = high_resolution_clock::now(); + for(int i = 0; i < size; i++){ sum += a[i]; } + auto t2 = high_resolution_clock::now(); + auto t = duration_cast<duration<double>>(t2 - t1); - printf("Sum of array is : %f in %f seconds\n", sum, t.count()); + printf("%d,%f,%f,", size, sum, t.count()); + + delete []a; + +/*Numa domain Sum*/ + double *b = new double[size]; - double total = 0; - auto start = high_resolution_clock::now(); + for(int i = 0; i < size; i++){ + b[i] = i + 1; + } + + sum = 0; - #pragma omp parallel num_threads(n_sockets) shared(total) private(socket_num, n_procs) proc_bind(spread) + t1 = high_resolution_clock::now(); + + #pragma omp parallel num_threads(n_sockets) shared(sum) private(socket_num, n_procs) proc_bind(spread) { socket_num = omp_get_place_num(); n_procs = omp_get_place_num_procs(socket_num); if(socket_num == 0){ - #pragma omp parallel for reduction(+:total) num_threads(n_procs) + #pragma omp parallel for reduction(+:sum) num_threads(n_procs) for(int i = 0; i < size; i++){ - total += a[i]; + sum += b[i]; } }else{ +/* printf("The other sockets do nothing\n"); - } +*/ + } } - auto end = high_resolution_clock::now(); + t2 = high_resolution_clock::now(); + + t = duration_cast<duration<double>>(t2 - t1); + + printf("%f,", t.count()); + + delete [] b; + +/*Node*/ + double *c = new double[size]; + + for(int i = 0; i < size; i++){ + c[i] = i + 1; + } + + sum = 0; + + t1 = high_resolution_clock::now(); + + #pragma omp parallel for reduction(+:sum) + for(int i = 0; i < size; i++){ + sum += c[i]; + } + + t2 = high_resolution_clock::now(); + + t = duration_cast<duration<double>>(t2 - t1); + + printf("%f,", t.count()); + +// printf("Node: Sum of array is : %f in %f seconds\n", sum, t.count()); + + delete [] c; + +/*Node with NUMA-Aware*/ + double *d = new double[size]; + + #pragma omp parallel for + for(int i = 0; i < size; i++){ + d[i] = i + 1; + } + + sum = 0; + + t1 = high_resolution_clock::now(); + + #pragma omp parallel for reduction(+:sum) + for(int i = 0; i < size; i++){ + sum += d[i]; + } + + t2 = high_resolution_clock::now(); + + t = duration_cast<duration<double>>(t2 - t1); - auto time_span = duration_cast<duration<double>>(end - start); + printf("%f\n", t.count()); - printf("Total of array is : %f in %f seconds\n", total, time_span.count()); + delete [] d; return 0; }