diff --git a/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization-solution.ipynb b/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization-solution.ipynb index 0fcb248f49897236cd632ed8c94abc5e6a171045..b90620320230340b5ccd9ad9e6805a96ef0930ab 100644 --- a/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization-solution.ipynb +++ b/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization-solution.ipynb @@ -1100,7 +1100,7 @@ "**TASK**: Look at the Makefile and work on the TODOs. \n", "\n", "- First generate a `-Ofast`-optimised binary and note down the performance in terms of cycles, seconds, and L3 misses. This is our baseline!\n", - "- Modify the `Makefile` to add the option for software prefetching (`-fprefetch-loop-arrays`). Compare performance of `-Ofast` with and without software prefetching" + "- Modify the `Makefile` to add the option for software prefetching (`-fprefetch-loop-arrays`) \u2013 have a look at the TODO close to `CFLAGS_GCC`. Compare performance of `-Ofast` with and without software prefetching" ] }, { @@ -1621,7 +1621,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -1645,14 +1645,14 @@ "**Task**: Please add the correct OpenMP directives to poisson2d.c and compilations flags in the Makefile to enable OpenMP with GCC and XL compilers.\n", "\n", "* **Directives**: Look at the TODOs in [`poisson2d.c`](poisson2d.c) to add OpenMP parallelism. The pragmas in question are `#pragma omp parallel for` (and once it's `#pragma omp parallel for reduction(max:error)` \u2013\u00a0can you guess where?)\n", - "* **Compilation**: Please add compilation flags enabling OpenMP in GCC and XL to the `Makefile`. For GCC, we need to add `-fopenmp` and the application needs to be linked with `-lgomp`. For XL, we need to add `-qsmp=omp` to the list of compilation flags. \n", + "* **Compilation**: Please add compilation flags enabling OpenMP in GCC and XL to the `Makefile`. For GCC, we need to add `-fopenmp` and the application needs to be linked with `-lgomp`; add both to `CFLAGS_GCC`. For XL, we need to add `-qsmp=omp` to the list of compilation flags (see `CFLAGS_XL`).\n", "\n", "Afterwards, compile and run the application with the following commands." ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -1677,14 +1677,14 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Job <24951> is submitted to default queue <batch>.\n", + "Job <25694> is submitted to default queue <batch>.\n", "<<Waiting for dispatch ...>>\n", "<<Starting on login1>>\n", "Jacobi relaxation calculation: max 1000 iterations on 1000 x 1000 mesh\n", @@ -1710,7 +1710,7 @@ " 700, 0.243173\n", " 800, 0.242228\n", " 900, 0.241291\n", - "1000x1000: Ref: 4.7430 s, This: 3.9363 s, speedup: 1.20\n" + "1000x1000: Ref: 4.6916 s, This: 3.9363 s, speedup: 1.20" ] } ], diff --git a/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization-task.ipynb b/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization-task.ipynb index 93d1c1b8b4690557cd242ddec45e1b5856b33d32..8450e3eb9625fa3470948e069bc028af204ef5a8 100644 --- a/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization-task.ipynb +++ b/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization-task.ipynb @@ -1091,7 +1091,7 @@ "**TASK**: Look at the Makefile and work on the TODOs. \n", "\n", "- First generate a `-Ofast`-optimised binary and note down the performance in terms of cycles, seconds, and L3 misses. This is our baseline!\n", - "- Modify the `Makefile` to add the option for software prefetching (`-fprefetch-loop-arrays`). Compare performance of `-Ofast` with and without software prefetching" + "- Modify the `Makefile` to add the option for software prefetching (`-fprefetch-loop-arrays`) \u2013 have a look at the TODO close to `CFLAGS_GCC`. Compare performance of `-Ofast` with and without software prefetching" ] }, { @@ -1593,7 +1593,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -1617,14 +1617,14 @@ "**Task**: Please add the correct OpenMP directives to poisson2d.c and compilations flags in the Makefile to enable OpenMP with GCC and XL compilers.\n", "\n", "* **Directives**: Look at the TODOs in [`poisson2d.c`](poisson2d.c) to add OpenMP parallelism. The pragmas in question are `#pragma omp parallel for` (and once it's `#pragma omp parallel for reduction(max:error)` \u2013\u00a0can you guess where?)\n", - "* **Compilation**: Please add compilation flags enabling OpenMP in GCC and XL to the `Makefile`. For GCC, we need to add `-fopenmp` and the application needs to be linked with `-lgomp`. For XL, we need to add `-qsmp=omp` to the list of compilation flags. \n", + "* **Compilation**: Please add compilation flags enabling OpenMP in GCC and XL to the `Makefile`. For GCC, we need to add `-fopenmp` and the application needs to be linked with `-lgomp`; add both to `CFLAGS_GCC`. For XL, we need to add `-qsmp=omp` to the list of compilation flags (see `CFLAGS_XL`).\n", "\n", "Afterwards, compile and run the application with the following commands." ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -1649,14 +1649,14 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Job <24951> is submitted to default queue <batch>.\n", + "Job <25694> is submitted to default queue <batch>.\n", "<<Waiting for dispatch ...>>\n", "<<Starting on login1>>\n", "Jacobi relaxation calculation: max 1000 iterations on 1000 x 1000 mesh\n", @@ -1682,7 +1682,7 @@ " 700, 0.243173\n", " 800, 0.242228\n", " 900, 0.241291\n", - "1000x1000: Ref: 4.7430 s, This: 3.9363 s, speedup: 1.20\n" + "1000x1000: Ref: 4.6916 s, This: 3.9363 s, speedup: 1.20" ] } ], @@ -1846,7 +1846,8 @@ } ], "source": [ - "!eval OMP_DISPLAY_ENV=true OMP_PLACES=\"{X},{Y},{Z},{A}\" OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" + "aff=\"{X},{Y},{Z},{A}\"\n", + "!eval OMP_DISPLAY_ENV=true OMP_PLACES=$aff OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" ] }, { @@ -1866,7 +1867,8 @@ } ], "source": [ - "!eval OMP_DISPLAY_ENV=true GOMP_CPU_AFFINITY=\"X,Y,Z,A\" OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" + "aff=\"X,Y,Z,A\"\n", + "!eval OMP_DISPLAY_ENV=true GOMP_CPU_AFFINITY=$aff OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" ] }, { diff --git a/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization.ipynb b/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization.ipynb index 95c240a71955474804ce25ba41bbfe6390a72e7d..2e3f577da788e3443fa257088ab6912924d0b1f8 100644 --- a/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization.ipynb +++ b/3-Optimizing_POWER/Handson/.master/HandsOnPerformanceOptimization.ipynb @@ -1100,7 +1100,7 @@ "**TASK**: Look at the Makefile and work on the TODOs. \n", "\n", "- First generate a `-Ofast`-optimised binary and note down the performance in terms of cycles, seconds, and L3 misses. This is our baseline!\n", - "- Modify the `Makefile` to add the option for software prefetching (`-fprefetch-loop-arrays`). Compare performance of `-Ofast` with and without software prefetching" + "- Modify the `Makefile` to add the option for software prefetching (`-fprefetch-loop-arrays`) – have a look at the TODO close to `CFLAGS_GCC`. Compare performance of `-Ofast` with and without software prefetching" ] }, { @@ -1621,7 +1621,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -1645,14 +1645,14 @@ "**Task**: Please add the correct OpenMP directives to poisson2d.c and compilations flags in the Makefile to enable OpenMP with GCC and XL compilers.\n", "\n", "* **Directives**: Look at the TODOs in [`poisson2d.c`](poisson2d.c) to add OpenMP parallelism. The pragmas in question are `#pragma omp parallel for` (and once it's `#pragma omp parallel for reduction(max:error)` – can you guess where?)\n", - "* **Compilation**: Please add compilation flags enabling OpenMP in GCC and XL to the `Makefile`. For GCC, we need to add `-fopenmp` and the application needs to be linked with `-lgomp`. For XL, we need to add `-qsmp=omp` to the list of compilation flags. \n", + "* **Compilation**: Please add compilation flags enabling OpenMP in GCC and XL to the `Makefile`. For GCC, we need to add `-fopenmp` and the application needs to be linked with `-lgomp`; add both to `CFLAGS_GCC`. For XL, we need to add `-qsmp=omp` to the list of compilation flags (see `CFLAGS_XL`).\n", "\n", "Afterwards, compile and run the application with the following commands." ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -1677,14 +1677,14 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Job <24951> is submitted to default queue <batch>.\n", + "Job <25694> is submitted to default queue <batch>.\n", "<<Waiting for dispatch ...>>\n", "<<Starting on login1>>\n", "Jacobi relaxation calculation: max 1000 iterations on 1000 x 1000 mesh\n", @@ -1710,7 +1710,7 @@ " 700, 0.243173\n", " 800, 0.242228\n", " 900, 0.241291\n", - "1000x1000: Ref: 4.7430 s, This: 3.9363 s, speedup: 1.20\n" + "1000x1000: Ref: 4.6916 s, This: 3.9363 s, speedup: 1.20" ] } ], @@ -2042,7 +2042,8 @@ } ], "source": [ - "!eval OMP_DISPLAY_ENV=true OMP_PLACES=\"{X},{Y},{Z},{A}\" OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" + "aff=\"{X},{Y},{Z},{A}\"\n", + "!eval OMP_DISPLAY_ENV=true OMP_PLACES=$aff OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" ] }, { @@ -2062,7 +2063,8 @@ } ], "source": [ - "!eval OMP_DISPLAY_ENV=true GOMP_CPU_AFFINITY=\"X,Y,Z,A\" OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" + "aff=\"X,Y,Z,A\"\n", + "!eval OMP_DISPLAY_ENV=true GOMP_CPU_AFFINITY=$aff OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" ] }, { diff --git a/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.html b/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.html index 53ef1e0297ef9b6c833894d108f5ce480ef23bec..43cea080bc72f3e50468d5d197c1aef210c2d2c9 100644 --- a/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.html +++ b/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.html @@ -13552,7 +13552,7 @@ Adapt the <code>CFLAGS</code> of <code>poisson2d_ref_info</code> to include <cod <p><strong>TASK</strong>: Look at the Makefile and work on the TODOs.</p> <ul> <li>First generate a <code>-Ofast</code>-optimised binary and note down the performance in terms of cycles, seconds, and L3 misses. This is our baseline!</li> -<li>Modify the <code>Makefile</code> to add the option for software prefetching (<code>-fprefetch-loop-arrays</code>). Compare performance of <code>-Ofast</code> with and without software prefetching</li> +<li>Modify the <code>Makefile</code> to add the option for software prefetching (<code>-fprefetch-loop-arrays</code>) – have a look at the TODO close to <code>CFLAGS_GCC</code>. Compare performance of <code>-Ofast</code> with and without software prefetching</li> </ul> </div> @@ -13812,7 +13812,7 @@ First, we need to change directory to that of Task3. For Task 3 we modify poisso <h3 id="Part-A:-Implement-OpenMP-Pragmas;-Compilation">Part A: Implement OpenMP Pragmas; Compilation<a class="anchor-link" href="#Part-A:-Implement-OpenMP-Pragmas;-Compilation">¶</a></h3><p><strong>Task</strong>: Please add the correct OpenMP directives to poisson2d.c and compilations flags in the Makefile to enable OpenMP with GCC and XL compilers.</p> <ul> <li><strong>Directives</strong>: Look at the TODOs in <a href="poisson2d.c"><code>poisson2d.c</code></a> to add OpenMP parallelism. The pragmas in question are <code>#pragma omp parallel for</code> (and once it's <code>#pragma omp parallel for reduction(max:error)</code> – can you guess where?)</li> -<li><strong>Compilation</strong>: Please add compilation flags enabling OpenMP in GCC and XL to the <code>Makefile</code>. For GCC, we need to add <code>-fopenmp</code> and the application needs to be linked with <code>-lgomp</code>. For XL, we need to add <code>-qsmp=omp</code> to the list of compilation flags. </li> +<li><strong>Compilation</strong>: Please add compilation flags enabling OpenMP in GCC and XL to the <code>Makefile</code>. For GCC, we need to add <code>-fopenmp</code> and the application needs to be linked with <code>-lgomp</code>; add both to <code>CFLAGS_GCC</code>. For XL, we need to add <code>-qsmp=omp</code> to the list of compilation flags (see <code>CFLAGS_XL</code>).</li> </ul> <p>Afterwards, compile and run the application with the following commands.</p> @@ -13945,7 +13945,8 @@ We added <code>--bind none</code> to prevent <code>jsrun</code>, the scheduler o <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span></span><span class="o">!</span><span class="nb">eval</span> <span class="nv">OMP_DISPLAY_ENV</span><span class="o">=</span><span class="nb">true</span> <span class="nv">OMP_PLACES</span><span class="o">=</span><span class="s2">"{X},{Y},{Z},{A}"</span> <span class="nv">OMP_NUM_THREADS</span><span class="o">=</span><span class="m">4</span> <span class="nv">$$</span>SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d <span class="m">1000</span> <span class="m">1000</span> <span class="m">100</span> <span class="p">|</span> grep <span class="s2">"OMP_PLACES\|speedup"</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">aff</span><span class="o">=</span><span class="s2">"</span><span class="si">{X}</span><span class="s2">,</span><span class="si">{Y}</span><span class="s2">,</span><span class="si">{Z}</span><span class="s2">,</span><span class="si">{A}</span><span class="s2">"</span> +<span class="o">!</span><span class="nb">eval</span> <span class="nv">OMP_DISPLAY_ENV</span><span class="o">=</span><span class="nb">true</span> <span class="nv">OMP_PLACES</span><span class="o">=</span><span class="nv">$aff</span> <span class="nv">OMP_NUM_THREADS</span><span class="o">=</span><span class="m">4</span> <span class="nv">$$</span>SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d <span class="m">1000</span> <span class="m">1000</span> <span class="m">100</span> <span class="p">|</span> grep <span class="s2">"OMP_PLACES\|speedup"</span> </pre></div> </div> @@ -13958,7 +13959,8 @@ We added <code>--bind none</code> to prevent <code>jsrun</code>, the scheduler o <div class="prompt input_prompt">In [ ]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span></span><span class="o">!</span><span class="nb">eval</span> <span class="nv">OMP_DISPLAY_ENV</span><span class="o">=</span><span class="nb">true</span> <span class="nv">GOMP_CPU_AFFINITY</span><span class="o">=</span><span class="s2">"X,Y,Z,A"</span> <span class="nv">OMP_NUM_THREADS</span><span class="o">=</span><span class="m">4</span> <span class="nv">$$</span>SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d <span class="m">1000</span> <span class="m">1000</span> <span class="m">100</span> <span class="p">|</span> grep <span class="s2">"OMP_PLACES\|speedup"</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">aff</span><span class="o">=</span><span class="s2">"X,Y,Z,A"</span> +<span class="o">!</span><span class="nb">eval</span> <span class="nv">OMP_DISPLAY_ENV</span><span class="o">=</span><span class="nb">true</span> <span class="nv">GOMP_CPU_AFFINITY</span><span class="o">=</span><span class="nv">$aff</span> <span class="nv">OMP_NUM_THREADS</span><span class="o">=</span><span class="m">4</span> <span class="nv">$$</span>SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d <span class="m">1000</span> <span class="m">1000</span> <span class="m">100</span> <span class="p">|</span> grep <span class="s2">"OMP_PLACES\|speedup"</span> </pre></div> </div> diff --git a/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.ipynb b/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.ipynb index 23c64feef54d35c745cc31ab5fee6ae1fa515df8..73e6c052c436f7ab8ca5c14afed8eade4b3416a4 100644 --- a/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.ipynb +++ b/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.ipynb @@ -449,7 +449,7 @@ "**TASK**: Look at the Makefile and work on the TODOs. \n", "\n", "- First generate a `-Ofast`-optimised binary and note down the performance in terms of cycles, seconds, and L3 misses. This is our baseline!\n", - "- Modify the `Makefile` to add the option for software prefetching (`-fprefetch-loop-arrays`). Compare performance of `-Ofast` with and without software prefetching" + "- Modify the `Makefile` to add the option for software prefetching (`-fprefetch-loop-arrays`) – have a look at the TODO close to `CFLAGS_GCC`. Compare performance of `-Ofast` with and without software prefetching" ] }, { @@ -676,7 +676,7 @@ "**Task**: Please add the correct OpenMP directives to poisson2d.c and compilations flags in the Makefile to enable OpenMP with GCC and XL compilers.\n", "\n", "* **Directives**: Look at the TODOs in [`poisson2d.c`](poisson2d.c) to add OpenMP parallelism. The pragmas in question are `#pragma omp parallel for` (and once it's `#pragma omp parallel for reduction(max:error)` – can you guess where?)\n", - "* **Compilation**: Please add compilation flags enabling OpenMP in GCC and XL to the `Makefile`. For GCC, we need to add `-fopenmp` and the application needs to be linked with `-lgomp`. For XL, we need to add `-qsmp=omp` to the list of compilation flags. \n", + "* **Compilation**: Please add compilation flags enabling OpenMP in GCC and XL to the `Makefile`. For GCC, we need to add `-fopenmp` and the application needs to be linked with `-lgomp`; add both to `CFLAGS_GCC`. For XL, we need to add `-qsmp=omp` to the list of compilation flags (see `CFLAGS_XL`).\n", "\n", "Afterwards, compile and run the application with the following commands." ] @@ -792,7 +792,8 @@ }, "outputs": [], "source": [ - "!eval OMP_DISPLAY_ENV=true OMP_PLACES=\"{X},{Y},{Z},{A}\" OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" + "aff=\"{X},{Y},{Z},{A}\"\n", + "!eval OMP_DISPLAY_ENV=true OMP_PLACES=$aff OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" ] }, { @@ -803,7 +804,8 @@ }, "outputs": [], "source": [ - "!eval OMP_DISPLAY_ENV=true GOMP_CPU_AFFINITY=\"X,Y,Z,A\" OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" + "aff=\"X,Y,Z,A\"\n", + "!eval OMP_DISPLAY_ENV=true GOMP_CPU_AFFINITY=$aff OMP_NUM_THREADS=4 $$SC19_SUBMIT_CMD -c ALL_CPUS --bind none ./poisson2d 1000 1000 100 | grep \"OMP_PLACES\\|speedup\"" ] }, { diff --git a/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.pdf b/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.pdf index fb3a0d0f3565c974da147b87d3fcd24de583590a..b985f1a32ee25f6b774c73fdde89f4345f627b14 100644 Binary files a/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.pdf and b/3-Optimizing_POWER/Handson/HandsOnPerformanceOptimization.pdf differ diff --git a/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.html b/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.html index 639b1d1cdbea3815b2dd7690a5f6d6ac6ff35bf7..3ac2013d140cf62a33e745d13d149dd465df12b0 100644 --- a/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.html +++ b/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.html @@ -14350,7 +14350,7 @@ poisson2d.c:72:5: note: considering unrolling loop 1 at BB 14 <p><strong>TASK</strong>: Look at the Makefile and work on the TODOs.</p> <ul> <li>First generate a <code>-Ofast</code>-optimised binary and note down the performance in terms of cycles, seconds, and L3 misses. This is our baseline!</li> -<li>Modify the <code>Makefile</code> to add the option for software prefetching (<code>-fprefetch-loop-arrays</code>). Compare performance of <code>-Ofast</code> with and without software prefetching</li> +<li>Modify the <code>Makefile</code> to add the option for software prefetching (<code>-fprefetch-loop-arrays</code>) – have a look at the TODO close to <code>CFLAGS_GCC</code>. Compare performance of <code>-Ofast</code> with and without software prefetching</li> </ul> </div> @@ -14991,7 +14991,7 @@ First, we need to change directory to that of Task3. For Task 3 we modify poisso </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> -<div class="prompt input_prompt">In [10]:</div> +<div class="prompt input_prompt">In [1]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span></span><span class="o">%</span><span class="k">cd</span> ../Task3 @@ -15026,7 +15026,7 @@ First, we need to change directory to that of Task3. For Task 3 we modify poisso <h3 id="Part-A:-Implement-OpenMP-Pragmas;-Compilation">Part A: Implement OpenMP Pragmas; Compilation<a class="anchor-link" href="#Part-A:-Implement-OpenMP-Pragmas;-Compilation">¶</a></h3><p><strong>Task</strong>: Please add the correct OpenMP directives to poisson2d.c and compilations flags in the Makefile to enable OpenMP with GCC and XL compilers.</p> <ul> <li><strong>Directives</strong>: Look at the TODOs in <a href="poisson2d.c"><code>poisson2d.c</code></a> to add OpenMP parallelism. The pragmas in question are <code>#pragma omp parallel for</code> (and once it's <code>#pragma omp parallel for reduction(max:error)</code> – can you guess where?)</li> -<li><strong>Compilation</strong>: Please add compilation flags enabling OpenMP in GCC and XL to the <code>Makefile</code>. For GCC, we need to add <code>-fopenmp</code> and the application needs to be linked with <code>-lgomp</code>. For XL, we need to add <code>-qsmp=omp</code> to the list of compilation flags. </li> +<li><strong>Compilation</strong>: Please add compilation flags enabling OpenMP in GCC and XL to the <code>Makefile</code>. For GCC, we need to add <code>-fopenmp</code> and the application needs to be linked with <code>-lgomp</code>; add both to <code>CFLAGS_GCC</code>. For XL, we need to add <code>-qsmp=omp</code> to the list of compilation flags (see <code>CFLAGS_XL</code>).</li> </ul> <p>Afterwards, compile and run the application with the following commands.</p> @@ -15035,7 +15035,7 @@ First, we need to change directory to that of Task3. For Task 3 we modify poisso </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> -<div class="prompt input_prompt">In [39]:</div> +<div class="prompt input_prompt">In [28]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span></span><span class="o">!</span>make poisson2d <span class="nv">CC</span><span class="o">=</span>gcc @@ -15075,7 +15075,7 @@ gcc -std=c99 -DUSE_DOUBLE -O3 -mcpu=power9 -mvsx -maltivec -fopenmp -lgomp p </div> <div class="cell border-box-sizing code_cell rendered"> <div class="input"> -<div class="prompt input_prompt">In [40]:</div> +<div class="prompt input_prompt">In [4]:</div> <div class="inner_cell"> <div class="input_area"> <div class=" highlight hl-ipython3"><pre><span></span><span class="o">!</span><span class="nb">eval</span> <span class="nv">$SC19_SUBMIT_CMD</span> ./poisson2d <span class="m">1000</span> <span class="m">1000</span> <span class="m">1000</span> @@ -15095,7 +15095,7 @@ gcc -std=c99 -DUSE_DOUBLE -O3 -mcpu=power9 -mvsx -maltivec -fopenmp -lgomp p <div class="output_subarea output_stream output_stdout output_text"> -<pre>Job <24951> is submitted to default queue <batch>. +<pre>Job <25694> is submitted to default queue <batch>. <<Waiting for dispatch ...>> <<Starting on login1>> Jacobi relaxation calculation: max 1000 iterations on 1000 x 1000 mesh @@ -15121,8 +15121,7 @@ Calculate current execution. 700, 0.243173 800, 0.242228 900, 0.241291 -1000x1000: Ref: 4.7430 s, This: 3.9363 s, speedup: 1.20 -</pre> +1000x1000: Ref: 4.6916 s, This: 3.9363 s, speedup: 1.20</pre> </div> </div> diff --git a/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.ipynb b/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.ipynb index 0fcb248f49897236cd632ed8c94abc5e6a171045..b90620320230340b5ccd9ad9e6805a96ef0930ab 100644 --- a/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.ipynb +++ b/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.ipynb @@ -1100,7 +1100,7 @@ "**TASK**: Look at the Makefile and work on the TODOs. \n", "\n", "- First generate a `-Ofast`-optimised binary and note down the performance in terms of cycles, seconds, and L3 misses. This is our baseline!\n", - "- Modify the `Makefile` to add the option for software prefetching (`-fprefetch-loop-arrays`). Compare performance of `-Ofast` with and without software prefetching" + "- Modify the `Makefile` to add the option for software prefetching (`-fprefetch-loop-arrays`) \u2013 have a look at the TODO close to `CFLAGS_GCC`. Compare performance of `-Ofast` with and without software prefetching" ] }, { @@ -1621,7 +1621,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -1645,14 +1645,14 @@ "**Task**: Please add the correct OpenMP directives to poisson2d.c and compilations flags in the Makefile to enable OpenMP with GCC and XL compilers.\n", "\n", "* **Directives**: Look at the TODOs in [`poisson2d.c`](poisson2d.c) to add OpenMP parallelism. The pragmas in question are `#pragma omp parallel for` (and once it's `#pragma omp parallel for reduction(max:error)` \u2013\u00a0can you guess where?)\n", - "* **Compilation**: Please add compilation flags enabling OpenMP in GCC and XL to the `Makefile`. For GCC, we need to add `-fopenmp` and the application needs to be linked with `-lgomp`. For XL, we need to add `-qsmp=omp` to the list of compilation flags. \n", + "* **Compilation**: Please add compilation flags enabling OpenMP in GCC and XL to the `Makefile`. For GCC, we need to add `-fopenmp` and the application needs to be linked with `-lgomp`; add both to `CFLAGS_GCC`. For XL, we need to add `-qsmp=omp` to the list of compilation flags (see `CFLAGS_XL`).\n", "\n", "Afterwards, compile and run the application with the following commands." ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -1677,14 +1677,14 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Job <24951> is submitted to default queue <batch>.\n", + "Job <25694> is submitted to default queue <batch>.\n", "<<Waiting for dispatch ...>>\n", "<<Starting on login1>>\n", "Jacobi relaxation calculation: max 1000 iterations on 1000 x 1000 mesh\n", @@ -1710,7 +1710,7 @@ " 700, 0.243173\n", " 800, 0.242228\n", " 900, 0.241291\n", - "1000x1000: Ref: 4.7430 s, This: 3.9363 s, speedup: 1.20\n" + "1000x1000: Ref: 4.6916 s, This: 3.9363 s, speedup: 1.20" ] } ], diff --git a/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.pdf b/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.pdf index f5f690fbae5337e146bfcaaabe6c8670f49e31c0..6c0303d2c9cf446d87798bc9fda314115b578735 100644 Binary files a/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.pdf and b/3-Optimizing_POWER/Handson/Solution-Notebook/HandsOnPerformanceOptimization.pdf differ diff --git a/3-Optimizing_POWER/Handson/Task2/Makefile b/3-Optimizing_POWER/Handson/Task2/Makefile index c75085260c0916a702375172ec86a54b71a1597e..c5d3d0213a3419a9f1b259aa675da49e283c4553 100644 --- a/3-Optimizing_POWER/Handson/Task2/Makefile +++ b/3-Optimizing_POWER/Handson/Task2/Makefile @@ -21,8 +21,8 @@ CFLAGS_GCC = $(CFLAGS) -mcpu=power9 -mvsx -maltivec CFLAGS_XL = $(CFLAGS) -qarch=pwr9 -qtune=pwr9 -DINLINE_LIBS -poisson2d_pref: CFLAGS_GCC+=-fprefetch-loop-arrays -poisson2d_pref: CFLAGS_XL+=-qprefetch=aggressive +poisson2d_pref: CFLAGS_GCC+= # TODO: Add flag for prefetching +poisson2d_pref: CFLAGS_XL+="" poisson2d_dscr: CFLAGS_GCC+="" poisson2d_dscr: CFLAGS_XL+= # TODO: Add flag for dscr prefetches @@ -37,9 +37,9 @@ endif poisson2d poisson2d_pref poisson2d_dscr: poisson2d.c common.h Makefile ifeq ($(CC),xlc_r) - $(CC) $(CFLAGS_XL) poisson2d.c -o $@ -lm + $(CC) $(CFLAGS_XL) poisson2d.c -o poisson2d -lm else - $(CC) $(CFLAGS_GCC) poisson2d.c -o $@ -lm + $(CC) $(CFLAGS_GCC) poisson2d.c -o poisson2d -lm endif .PHONY: clean run runstats l3missstats perfgenerate perfview clean diff --git a/3-Optimizing_POWER/Handson/Task2/Solution/Makefile b/3-Optimizing_POWER/Handson/Task2/Solution/Makefile index 3b7defb6a22f5f3697742383b06d0047b74c0bb5..2a47fb8cd785a8182649ae1875679ec452f7ee09 100644 --- a/3-Optimizing_POWER/Handson/Task2/Solution/Makefile +++ b/3-Optimizing_POWER/Handson/Task2/Solution/Makefile @@ -37,9 +37,9 @@ endif poisson2d poisson2d_pref poisson2d_dscr: poisson2d.c common.h Makefile ifeq ($(CC),xlc_r) - $(CC) $(CFLAGS_XL) poisson2d.c -o $@ -lm + $(CC) $(CFLAGS_XL) poisson2d.c -o poisson2d -lm else - $(CC) $(CFLAGS_GCC) poisson2d.c -o $@ -lm + $(CC) $(CFLAGS_GCC) poisson2d.c -o poisson2d -lm endif .PHONY: clean run runstats l3missstats perfgenerat perfview clean