From c8c5828f90f8e026e5e982e33cf7578d01adbc2b Mon Sep 17 00:00:00 2001 From: "Jan H. Meinke" <j.meinke@fz-juelich.de> Date: Wed, 6 Dec 2023 12:00:35 +0100 Subject: [PATCH] Update of material for High-Performance Computing with Python @ JSC 2023. --- 00_Introduction to IPython.ipynb | 61 +-- 01_Bottlenecks.ipynb | 21 +- 02_NumPy_concepts.ipynb | 10 +- 03_ThinkVector.ipynb | 20 +- 04_Particle Dynamics.ipynb | 433 +++++++++++++--- 05_Profiling a simple md code.ipynb | 15 +- 06_LocalParallel.ipynb | 167 +++--- 07_LocalTaskParallel.ipynb | 66 ++- 08_Numba vectorize.ipynb | 8 +- 09_NumbaIntro.ipynb | 132 +++-- 10_Speeding up your code with Cython.ipynb | 89 ++-- 11_Writing your own Python bindings.ipynb | 16 +- 12_Introduction to MPI.ipynb | 41 +- 13_Introduction to CuPy.ipynb | 32 +- 14_CUDA for Python.ipynb | 24 +- 15_CUDA and MPI.ipynb | 30 +- 16_Introduction to Dask.ipynb | 43 +- 17_Debugging.ipynb | 295 ++++++----- build.sh | 2 +- hpcpy22 | 19 - hpcpy23 | 26 + solutions/00_Introduction to IPython.ipynb | 61 +-- solutions/01_Bottlenecks.ipynb | 21 +- solutions/02_NumPy_concepts.ipynb | 10 +- solutions/03_ThinkVector.ipynb | 24 +- solutions/04_Particle Dynamics.ipynb | 482 +++++++++++++++--- solutions/05_Profiling a simple md code.ipynb | 17 +- solutions/06_LocalParallel.ipynb | 167 +++--- solutions/07_LocalTaskParallel.ipynb | 66 ++- solutions/08_Numba vectorize.ipynb | 8 +- solutions/09_NumbaIntro.ipynb | 140 +++-- ...10_Speeding up your code with Cython.ipynb | 89 ++-- .../11_Writing your own Python bindings.ipynb | 16 +- solutions/12_Introduction to MPI.ipynb | 65 ++- solutions/13_Introduction to CuPy.ipynb | 32 +- solutions/14_CUDA for Python.ipynb | 24 +- solutions/15_CUDA and MPI.ipynb | 126 +++-- solutions/16_Introduction to Dask.ipynb | 56 +- solutions/17_Debugging.ipynb | 295 ++++++----- solutions/build.sh | 2 +- solutions/code | 2 +- solutions/data | 2 +- solutions/hpcpy23 | 26 + 43 files changed, 2176 insertions(+), 1105 deletions(-) delete mode 100644 hpcpy22 create mode 100755 hpcpy23 create mode 100755 solutions/hpcpy23 diff --git a/00_Introduction to IPython.ipynb b/00_Introduction to IPython.ipynb index 7d3b2c3..f981775 100644 --- a/00_Introduction to IPython.ipynb +++ b/00_Introduction to IPython.ipynb @@ -20,7 +20,7 @@ }, "source": [ "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "12 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -169,9 +169,7 @@ "tags": [] }, "outputs": [], - "source": [ - "import random" - ] + "source": [] }, { "cell_type": "markdown", @@ -549,47 +547,35 @@ }, "outputs": [], "source": [ - "# %load http://matplotlib.org/mpl_examples/mplot3d/surface3d_demo.py\n", - "'''\n", - "======================\n", - "3D surface (color map)\n", - "======================\n", + "# %load https://matplotlib.org/stable/_downloads/0c69e8950c767c2d95108979a24ace2f/surface3d_simple.py\n", "\n", - "Demonstrates plotting a 3D surface colored with the coolwarm color map.\n", - "The surface is made opaque by using antialiased=False.\n", + "\"\"\"\n", + "=====================\n", + "3D surface\n", + "=====================\n", "\n", - "Also demonstrates using the LinearLocator and custom formatting for the\n", - "z axis tick labels.\n", - "'''\n", - "\n", - "from mpl_toolkits.mplot3d import Axes3D\n", + "See `~mpl_toolkits.mplot3d.axes3d.Axes3D.plot_surface`.\n", + "\"\"\"\n", "import matplotlib.pyplot as plt\n", "from matplotlib import cm\n", - "from matplotlib.ticker import LinearLocator, FormatStrFormatter\n", "import numpy as np\n", "\n", + "# plt.style.use('_mpl-gallery')\n", "\n", - "fig = plt.figure()\n", - "ax = fig.gca(projection='3d')\n", - "\n", - "# Make data.\n", + "# Make data\n", "X = np.arange(-5, 5, 0.25)\n", "Y = np.arange(-5, 5, 0.25)\n", "X, Y = np.meshgrid(X, Y)\n", "R = np.sqrt(X**2 + Y**2)\n", "Z = np.sin(R)\n", "\n", - "# Plot the surface.\n", - "surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,\n", - " linewidth=0, antialiased=False)\n", - "\n", - "# Customize the z axis.\n", - "ax.set_zlim(-1.01, 1.01)\n", - "ax.zaxis.set_major_locator(LinearLocator(10))\n", - "ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))\n", + "# Plot the surface\n", + "fig, ax = plt.subplots(subplot_kw={\"projection\": \"3d\"})\n", + "ax.plot_surface(X, Y, Z, vmin=Z.min() * 2, cmap=cm.Blues)\n", "\n", - "# Add a color bar which maps values to colors.\n", - "fig.colorbar(surf, shrink=0.5, aspect=5)\n", + "ax.set(xticklabels=[],\n", + " yticklabels=[],\n", + " zticklabels=[])\n", "\n", "plt.show()\n" ] @@ -625,9 +611,9 @@ } }, "source": [ - "IPython has two ways of moving around in the directory tree: ``%cd`` and ``%pushd/%popd``. Both retain their history. ``%cd``'s history is available through ``%dhist`` whereas ``%dirs`` shows the directory stack of ``%pushd/%popd``. The ``%cd `` command has some nifty options, for example, ``%cd -2`` gets you to the second to last visited directory and ``%cd --foo`` switches to the next directory in the history than contains ``foo``. You can also set ``%bookmark``s and use them with ``%cd``.\n", + "IPython has two ways of moving around in the directory tree: ``%cd`` and ``%pushd/%popd``. Both retain their history. ``%cd``'s history is available through ``%dhist`` whereas ``%dirs`` shows the directory stack of ``%pushd/%popd``. The ``%cd `` command has some nifty options, for example, ``%cd -2`` gets you to the second to last visited directory. You can also set ``%bookmark``s and use them with ``%cd``.\n", "\n", - "Make a new sub directory called scripts/mandelbrot using ``%mkdir -p scripts/mandelbrot``. Change into the directory scripts/mandelbrot using ``%cd``. Go two levels up using ``%cd ..`` twice. Look at the history using ``%dhist``. Change into mandelbrot using ``%cd --brot``. Finally use ``%cd -0`` to get back to where you started from." + "Make a new sub directory called scripts/mandelbrot using ``%mkdir -p scripts/mandelbrot``. Change into the directory scripts/mandelbrot using ``%cd``. Go one level up using ``%cd ..``. Look at the history using ``%dhist``. Finally use ``%cd -0`` to get back to where you started from." ] }, { @@ -772,7 +758,8 @@ }, "outputs": [], "source": [ - "a = Out[13] # Assign Out[?] to a (replace with index from two cells above)" + "a = Out[13] # Assign Out[?] to a (replace with index from two cells above)\n", + "a" ] }, { @@ -916,9 +903,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -930,7 +917,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.6" } }, "nbformat": 4, diff --git a/01_Bottlenecks.ipynb b/01_Bottlenecks.ipynb index d8ebdc8..742c473 100644 --- a/01_Bottlenecks.ipynb +++ b/01_Bottlenecks.ipynb @@ -11,7 +11,7 @@ "# Bottlenecks\n", "\n", "<div class=\"dateauthor\">\n", - "20 Jun 2022 | Jan H. Meinke\n", + "12 Jun 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -443,7 +443,7 @@ }, "outputs": [], "source": [ - "from numba import jit\n", + "from numba import njit as jit\n", "jdot = jit(dot)" ] }, @@ -511,7 +511,7 @@ }, "outputs": [], "source": [ - "import numpy; from numba import jit\n", + "import numpy; from numba import njit as jit\n", "\n", "@jit\n", "def dot2(a, b):\n", @@ -545,7 +545,7 @@ } }, "source": [ - "Now, elements in b are accessed in the proper order and a[i, k] is constant for the loop. This changes our estimate, because, now we read 8 bytes/op in the innermost loop. This gives us a maximum of 190 GB/s / 8 bytes/op = 24 Gop/s (48 GFLOP/s)." + "Now, elements in b are accessed in the proper order and a[i, k] is constant for the loop. This changes our estimate, because, now we read 8 bytes/op in the innermost loop. This gives us a maximum of 190 GB/s / 8 bytes/op = 24 Gop/s (48 GFLOP/s) making this compute bound on a single core." ] }, { @@ -716,6 +716,13 @@ "print(2e-9 * n**3 / t_numpy_single.best, \"GFLOP/s (single core).\") " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The maximum clock frequency of the processor is 3.4 GHz, which corresponds to a peak performance of about 54 GFLOP/s. This is pretty close." + ] + }, { "cell_type": "code", "execution_count": null, @@ -871,9 +878,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -885,7 +892,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/02_NumPy_concepts.ipynb b/02_NumPy_concepts.ipynb index 0a0138d..743b74b 100644 --- a/02_NumPy_concepts.ipynb +++ b/02_NumPy_concepts.ipynb @@ -11,7 +11,7 @@ "# NumPy - an HPC perspective\n", "\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Olav Zimmermann\n", + "12 June 2023 | Olav Zimmermann\n", "</div>" ] }, @@ -320,7 +320,7 @@ " <tr><td><code><b><a href=\"https://www.dask.org/\">dask</a></b></code></td><td>dask array: only subset of ndarray functionality</td><td>tiled ndarrays larger than main memory, distributed processing on multiple nodes</td></tr>\n", " <tr><td><code><b><a href=\"https://www.dask.org/\">dask</a></b></code></td><td>dask dataframe: only subset of pandas dataframe functionality</td><td>tiled dataframes larger than main memory, distributed processing on multiple nodes</td></tr>\n", " <tr><td><code><b><a href=\"https://docs.rapids.ai/api/cudf/nightly/user_guide/10min.html\">dask-cuDF</a></b></code></td><td>cuDF dataframe: subset of pandas dataframe functionality</td><td>tiled dataframes on multiple GPUs and multiple nodes</td></tr>\n", - " <tr><td><code><b><a href=\"https://sparse.pydata.org/en/0.13.0/\">sparse</a></b></code></td><td>ndarray functionality on sparse arrays (COO layout)</td><td></td></tr>\n", + " <tr><td><code><b><a href=\"https://sparse.pydata.org/en/0.14.0/\">sparse</a></b></code></td><td>ndarray functionality on sparse arrays (COO layout)</td><td></td></tr>\n", " <tr><td><code><b><a href=\"https://docs.scipy.org/doc/scipy/reference/sparse.html\">SciPy.sparse</a></b></code></td><td>ndarray functionality on sparse arrays (all layouts)</td><td></td></tr>\n", " </table>" ] @@ -355,9 +355,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -369,7 +369,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/03_ThinkVector.ipynb b/03_ThinkVector.ipynb index 06b96d5..1c5fc81 100644 --- a/03_ThinkVector.ipynb +++ b/03_ThinkVector.ipynb @@ -5,13 +5,14 @@ "metadata": { "slideshow": { "slide_type": "slide" - } + }, + "tags": [] }, "source": [ "# Think Vector\n", "\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "12 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -374,7 +375,7 @@ } }, "source": [ - "Functions that act on one array (or several arrays of the same shape) and return a vector of the same shape are called ``ufuncs``. When we wrote vw = v * w, we executed the ufunc \\__mul\\__. Functions, like ``dot`` that have a different output shape than input shape are called generalized ufuncs." + "Functions that act on one array (or several arrays of the same shape) and return a vector of the same shape are called ``ufuncs``. When we wrote vw = v * w, we executed the ufunc \\__mul\\__. Functions, like ``dot`` that have a different output shape than input shape are called ``generalized ufuncs``." ] }, { @@ -530,7 +531,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")\n", + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")\n", "print(\"|A-B| = %.3f\" % numpy.linalg.norm(A-B))" ] }, @@ -636,7 +637,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")\n", + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")\n", "print(\"|A-B| = %.3f\" % numpy.linalg.norm(A-B))\n" ] }, @@ -758,9 +759,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "source_hidden": true - }, "tags": [] }, "outputs": [], @@ -981,9 +979,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -995,7 +993,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/04_Particle Dynamics.ipynb b/04_Particle Dynamics.ipynb index 6ef39a8..2d6c356 100644 --- a/04_Particle Dynamics.ipynb +++ b/04_Particle Dynamics.ipynb @@ -3,11 +3,16 @@ { "cell_type": "markdown", "id": "5451ef11-f683-4995-bda8-c9d87abaec49", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "# Particle Dynamics with Python\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "12 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -15,7 +20,12 @@ "cell_type": "code", "execution_count": null, "id": "5822f3b3-bc03-4e2f-85f1-57cb246e3a05", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "import math\n", @@ -27,7 +37,12 @@ "cell_type": "code", "execution_count": null, "id": "f7d1939b-7d73-4c0c-9d8a-d6ea39d48b49", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "%matplotlib inline" @@ -35,59 +50,183 @@ }, { "cell_type": "markdown", - "id": "19819e70-b42c-405a-958f-70c05a972ee6", - "metadata": {}, + "id": "b6798959-bbef-4f71-b696-e1069554c403", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ "Particle dynamics simulations are common in various scientific fields. They are used to simulate \n", "the formation of galaxies and the movements of molecules in a cell. Particles can have different\n", - "properties such as mass and charge and interact in different ways.\n", - "\n", + "properties such as mass and charge and interact in different ways." + ] + }, + { + "cell_type": "markdown", + "id": "9f9b8f9d-c834-4b86-9ef1-e385694d4b8c", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Equations of motion\n", "A classical particle dynamics code solves Newton's equation of motion:\n", "\n", - "$$\\mathbf F = m \\mathbf a,$$\n", - "\n", + "$$\\mathbf F = m \\mathbf a \\ \\ \\ \\ [\\mathtt{1}],$$" + ] + }, + { + "cell_type": "markdown", + "id": "2c250750-32b7-4a74-8c3e-5c3eb6c4a13d", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "where $\\mathbf F$ is the force, $m$ the mass, and $\\mathbf a$ the acceleration. $\\mathbf F$ and \n", "$\\mathbf a$ are vectors.\n", "\n", - "In general, this problem is solvable analytically for two particles only . If there are more \n", + "In general, this problem is solvable analytically for two particles only. If there are more \n", "particles, we have to look for a numerical solution.\n", "\n", - "You may remember that you can calculate the velocity $\\mathbf v$ of a particle as\n", - "\n", - "$$\\mathbf v(t + dt) = \\mathbf v(t) + \\mathbf a(t) dt$$\n", - "\n", - "and the position $\\mathbf r$ as\n", - "\n", - "$$\\mathbf r(t + dt) = \\mathbf r(t) + \\mathbf v(t)dt + \\frac 1 2 \\mathbf a(t) dt^2.$$\n", - "\n", + "You may remember that you can calculate the velocity $\\mathbf v$ of a particle as" + ] + }, + { + "cell_type": "markdown", + "id": "00ee5853-283f-4786-bd4c-81ca9ab7b3b2", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf v(t + dt) = \\mathbf v(t) + \\mathbf a(t) dt \\ \\ \\ \\ [\\mathtt{2}]$$" + ] + }, + { + "cell_type": "markdown", + "id": "a6e75808-f266-4a57-9837-5b9aa69ee436", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "and the position $\\mathbf r$ as" + ] + }, + { + "cell_type": "markdown", + "id": "27adecd9-7499-4a86-bb62-15dd40377c72", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf r(t + dt) = \\mathbf r(t) + \\mathbf v(t)dt + \\frac 1 2 \\mathbf a(t) dt^2 \\ \\ \\ \\ [\\mathtt{3}].$$" + ] + }, + { + "cell_type": "markdown", + "id": "35260044-1b70-46c5-8bfd-8475566037b4", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "If we know all the positions, velocities and masses at time $t$ and can calculate the forces, we \n", "can follow the motion of the particles over time." ] }, { "cell_type": "markdown", - "id": "50ad1731-c5b0-4922-adc8-14e507a7b6b8", - "metadata": {}, + "id": "0167c3d7-4abc-4635-b53d-aa38072ff922", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Gravitational force" + ] + }, + { + "cell_type": "markdown", + "id": "96292513-eaee-4617-bacd-4d13a1f6f8ab", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "## Gravitational force\n", "Let's assume our particles only interact via gravity. Then the force between two particles is given \n", - "by\n", - "\n", - "$$\\mathbf F_{ij}(t) = G\\frac{m_i m_j}{r_{ij}^2(t)} \\mathbf {\\hat r}_{ij}(t),$$\n", - "\n", + "by" + ] + }, + { + "cell_type": "markdown", + "id": "cbab8258-28f9-41db-9dda-7f4a5be57603", + "metadata": { + "tags": [] + }, + "source": [ + "$$\\mathbf F_{ij}(t) = G\\frac{m_i m_j}{r_{ij}^2(t)} \\mathbf {\\hat r}_{ij}(t) \\ \\ \\ \\ [\\mathtt{4}],$$" + ] + }, + { + "cell_type": "markdown", + "id": "c55acb8e-6cb4-459c-9241-9e42eb364b72", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "where $\\mathbf F_{ij}(t)$ is the force on particle $i$ due to particle $j$. $r_{ij}(t)$ is the \n", "distance between particles $i$ and $j$, and $\\mathbf {\\hat r}_{ij}(t)$ is the unit vector pointing\n", "from $i$ to $j$.\n", "\n", - "To get the total force on particle $i$, we need to sum over all $j \\neq i$:\n", - "\n", - "$$\\mathbf F_{i}(t) = \\sum_{j\\neq i} \\mathbf F_{ij}(t).$$" + "To get the total force on particle $i$, we need to sum over all $j \\neq i$:" + ] + }, + { + "cell_type": "markdown", + "id": "d36faa34-7345-4e94-b19b-62e4419417e0", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf F_{i}(t) = \\sum_{j\\neq i} \\mathbf F_{ij}(t) \\ \\ \\ \\ [\\mathtt{5}].$$" ] }, { "cell_type": "markdown", "id": "32f7c975-ed21-4c70-9168-5b7bfa5ca276", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "## The algorithm" ] @@ -107,8 +246,13 @@ }, { "cell_type": "markdown", - "id": "539c2d60-df7b-471b-a438-d9b4efb51781", - "metadata": {}, + "id": "efba7cbf-301a-4e5c-81d4-1394c5ec3c9f", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "## (Parallel) Patterns\n", "In Think Vector, we got to know some patterns. Let's see how we can apply them here:\n", @@ -124,7 +268,19 @@ " \n", "Calculate the new position:\n", " This is a map, too.\n", - " \n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "76d2db76-3bac-4465-9512-babcef5e721b", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "Now, let's try to express this in code.\n", " " ] @@ -133,7 +289,12 @@ "cell_type": "code", "execution_count": null, "id": "b4525c8a-378a-45b7-b1e2-b67f5f07d397", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "# Initialize positions and velocities\n", @@ -143,9 +304,11 @@ "dt = 0.1 # time step\n", "G = 1 # For simplicity we set the universal graviational constant to 1\n", "m = 1 # This corresponds to 150 x 10^9 kg\n", + "# random initial positions\n", "x = [random.uniform(-L2, L2) for i in range(N)]\n", "y = [random.uniform(-L2, L2) for i in range(N)]\n", "z = [random.uniform(-L2, L2) for i in range(N)]\n", + "# zero initial velocities\n", "vx = [0 for i in range(N)]\n", "vy = [0 for i in range(N)]\n", "vz = [0 for i in range(N)]" @@ -154,24 +317,62 @@ { "cell_type": "markdown", "id": "8fd053d2-8c88-4666-82ed-0316fe21ac34", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "### Calculating forces" ] }, { "cell_type": "markdown", - "id": "41861767-e08d-45b8-802a-28b269e3f7ee", - "metadata": {}, + "id": "ac5e70be-cafd-41cd-b866-5b98ee28fb0a", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "To calculate the forces (see eq. 4), we need a distance matrix, i.e. the distance $d_{ij}$ between each pair of particles ($r_{ij}$ in eq. 4). First we calculate the distance vector for each component, x, y, and z separately, this requires 3 map operations. Then we calculate from these vectors the pairwise distances in three dimensional space:" + ] + }, + { + "cell_type": "markdown", + "id": "c1d0d68d-23a4-45e1-a431-91e575056e21", + "metadata": { + "tags": [] + }, + "source": [ + "$$d=\\sqrt{dx^2+dy^2+dz^2} \\ \\ \\ \\ [\\mathtt{6}]$$" + ] + }, + { + "cell_type": "markdown", + "id": "0b29d4d1-b6ef-4615-ab11-0bed26267252", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "To calculate the force, we need the distance vector first. These are actually 3 maps (one for each component). The result is a distance matrix. As mentioned before maps are expressed as list generators:" + "(another map operation). As mentioned before maps can be expressed as list comprehensions:" ] }, { "cell_type": "code", "execution_count": null, "id": "338142b6-f973-4f7a-b5a4-77e76f3b758f", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ "Dxx = [(i - j) for j in x for i in x]\n", @@ -183,21 +384,31 @@ { "cell_type": "markdown", "id": "d0156a2d-13ae-46dd-b3a8-cb7eb1aca0bf", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "Now that we have the vector components and the magnitude of the vector, we can calculate the forces." + "Now that we have the vector components and the magnitudes of the vectors, we can calculate the forces (see eq. 4). We then sum all the forces acting on one particle for each particle (see eq. 5). Note that we also calculate the forces separately for each component." ] }, { "cell_type": "code", "execution_count": null, "id": "e841a076-504d-445b-b006-b931e3cb0bc2", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ - "Fxx = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dxx, D)] # epsilon prevents a zero in the dominator.\n", - "Fyy = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dyy, D)]\n", - "Fzz = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dzz, D)]\n", + "Fxx = [G * m * m * dxx / (d * d * d + epsilon) for dxx, d in zip(Dxx, D)] # epsilon prevents a zero in the dominator.\n", + "Fyy = [G * m * m * dyy / (d * d * d + epsilon) for dyy, d in zip(Dyy, D)]\n", + "Fzz = [G * m * m * dzz / (d * d * d + epsilon) for dzz, d in zip(Dzz, D)]\n", "Fx = [sum(Fxx[i * N: (i + 1) * N]) for i in range(N)]\n", "Fy = [sum(Fyy[i * N: (i + 1) * N]) for i in range(N)]\n", "Fz = [sum(Fzz[i * N: (i + 1) * N]) for i in range(N)]" @@ -206,19 +417,39 @@ { "cell_type": "markdown", "id": "3de052ac-7591-4477-8285-cc15c0019a7a", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "Let's visualize the forces on the particles:" ] }, + { + "cell_type": "markdown", + "id": "235e1971-24e0-4cf8-ac27-779e5ae37684", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "### Visualize forces" + ] + }, { "cell_type": "code", "execution_count": null, "id": "1133b4bb-111b-4aca-9326-22a7c29c8522", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "ax = plt.figure(figsize=(6, 6)).add_subplot(projection='3d')\n", + "ax = plt.figure(figsize=(5, 5)).add_subplot(projection='3d')\n", "ax.scatter3D(x, y, z)\n", "ax.quiver(x, y, z, Fx, Fy, Fz)" ] @@ -226,7 +457,12 @@ { "cell_type": "markdown", "id": "ccea23e5-4f4b-4ff6-b379-8d45e3fe15f4", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "### Integrating the equation of motion" ] @@ -234,9 +470,14 @@ { "cell_type": "markdown", "id": "dba27f9b-350e-4e65-9f42-e3615ee30a84", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "We are ready to update the positions and velocities of our particles:" + "We are ready to update the positions and velocities of our particles. First we use eq. 3 to calculate the new positions. Note that we substituted $\\bf{a}$ in eq. 3 by $\\frac{\\mathbf{F}}{m}$ using eq. 1. " ] }, { @@ -251,11 +492,29 @@ "z = [i + v * dt + 0.5 * f / m * dt * dt for i, v, f in zip(z, vz, Fz)]" ] }, + { + "cell_type": "markdown", + "id": "52959ed7-d454-40fb-98f1-9df161873c87", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "Using the same expression for $\\bf{a}$ as above we now use eq. 2 to calculate the new velocities:" + ] + }, { "cell_type": "code", "execution_count": null, "id": "2266d4e8-8f67-4979-ae47-abf8508673a4", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ "vx = [v + f / m * dt for v, f in zip(vx, Fx)]\n", @@ -266,11 +525,29 @@ { "cell_type": "markdown", "id": "e4cff076-759c-477c-9758-41bb730cd606", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "Let's take a look at the particle positions and velocities:" ] }, + { + "cell_type": "markdown", + "id": "92a88a32-4ee1-44ce-b371-afd412359a3b", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "### Visualize velocities" + ] + }, { "cell_type": "code", "execution_count": null, @@ -278,7 +555,7 @@ "metadata": {}, "outputs": [], "source": [ - "ax = plt.figure(figsize=(6, 6)).add_subplot(projection='3d')\n", + "ax = plt.figure(figsize=(5, 5)).add_subplot(projection='3d')\n", "ax.scatter3D(x, y, z)\n", "ax.quiver(x, y, z, vx, vy, vz)" ] @@ -286,15 +563,25 @@ { "cell_type": "markdown", "id": "65984f53-4b54-4f6d-aaa1-6de391150539", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ - "That's it. By going back to the [calculation of the forces](#Calculating-forces), we can follow the motion of the particles over time." + "That's it. By going back to the [calculation of the forces](#Calculating-forces) and iterating over the steps again, we can follow the motion of the particles over time." ] }, { "cell_type": "markdown", "id": "f1f30004-a9c3-4499-84e0-976937b9f8a8", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "## Exercise\n", "Rewrite the program in a vectorized manner using `ndarray`s." @@ -304,32 +591,34 @@ "cell_type": "code", "execution_count": null, "id": "039819a6-698f-43a6-a4f0-4f7b8852fbb1", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "8cb45f43-29e2-49df-a976-bf7790fe5a44", - "metadata": {}, + "id": "5a141c1e-22b6-40be-80d5-25ad2648972c", + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ - "### Solution:" + "Tipp: if your velocities are too small to see the directions of the velocity arrows, just scale vx,vy, and vz in the ax.quiver lines. Where do they point to? Can you do the same scaling in the list version above?" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1f236119-af8c-499d-86cf-1d6b98f9e5fd", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -341,7 +630,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/05_Profiling a simple md code.ipynb b/05_Profiling a simple md code.ipynb index 1590da7..33a7b56 100644 --- a/05_Profiling a simple md code.ipynb +++ b/05_Profiling a simple md code.ipynb @@ -10,7 +10,7 @@ "source": [ "# Profiling\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -639,20 +639,13 @@ }, "outputs": [], "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -664,7 +657,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/06_LocalParallel.ipynb b/06_LocalParallel.ipynb index 7124fa5..e716e9c 100644 --- a/06_LocalParallel.ipynb +++ b/06_LocalParallel.ipynb @@ -11,7 +11,7 @@ "# Interactive Parallel Computing with IPython Parallel\n", "\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -27,7 +27,7 @@ "\n", "Click on the ``+``-sign at the top of the Files tab on the left to start a new launcher. In the launcher click on Terminal. A terminal will open as a new tab. Grab the tab and pull it to the right to have the terminal next to your notebook.\n", "\n", - "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2219/hpcpy22`.\n", + "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2318/hpcpy23`.\n", "\n", "In the terminal type ``ipcluster``. You'll see the help message telling you that you need to give it subcommand. Take a look at the message and then enter \n", "\n", @@ -110,7 +110,7 @@ } }, "source": [ - "Now let's see how we access the \"Cluster\". [IPython][IP] comes with a module [ipyparallel][IPp] that is used to access the engines, we just started. We first need to import Client.\n", + "Now let's see how we access the \"Cluster\". Originally, [ipyparallel][IPp] was developed as a part of [IPython][IP]. In the meantime it's developed separately. It is used to access the engines, we just started. We first need to import Client.\n", "\n", "[IPp]: https://ipyparallel.readthedocs.io/en/latest/\n", "[IP]: http://www.ipython.org" @@ -369,7 +369,7 @@ "outputs": [], "source": [ "with rc[:].sync_imports():\n", - " import matplotlib.pyplot" + " import numpy.linalg" ] }, { @@ -377,7 +377,8 @@ "metadata": { "slideshow": { "slide_type": "notes" - } + }, + "tags": [] }, "source": [ "Unfortunately mapping of namespaces does not work that way." @@ -388,7 +389,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "source": [ "## Using the Direct View" @@ -473,7 +475,7 @@ }, "outputs": [], "source": [ - "%%px\n", + "%%px --local\n", "import threadpoolctl\n", "threadpoolctl.threadpool_limits(limits=32, user_api='blas')" ] @@ -523,8 +525,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Execute and Apply" @@ -545,9 +548,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "outputs": [], "source": [ @@ -559,8 +560,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -572,8 +574,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -585,8 +588,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -699,8 +703,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Remote functions" @@ -736,9 +741,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "outputs": [], "source": [ @@ -767,7 +770,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -779,7 +783,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "source": [ "A `remote` function, on the other hand just runs on each engine with the full set of data." @@ -820,8 +825,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Moving data around" @@ -830,9 +836,7 @@ { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "source": [ "So far the runtime has taken care of moving data to and from the engines, but we can do this explicitely. There are 4 commands to do that:\n", @@ -1029,6 +1033,20 @@ "y" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "type(y)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1048,7 +1066,7 @@ } }, "source": [ - "Latency (the time until something happens) and bandwidth (the amount of data we get through the network) are two important properties of your parallel system that define what is practical and what is not. We will use the ``%timeit`` magic to measure these properties. ``%timit`` and its sibbling ``%%timeit`` measure the run time of a statement (cell in the case of ``%%timeit``) by executing the statement multiple times (by default at least 3 times). For short running routines many loops of 3 executions are performed and the minimum time measured is then displayed. The number of loops and the number of executions can be adjusted. Take a look at the documentation. Give it a try." + "Latency (the time until something happens) and bandwidth (the amount of data we get through the network) are two important properties of your parallel system that define what is practical and what is not. We will use the ``%timeit`` magic to measure these properties. ``%timeit`` and its sibbling ``%%timeit`` measure the run time of a statement (cell in the case of ``%%timeit``) by executing the statement multiple times (by default at least 7 repeats). For short running routines a loop of many executions is performed per repeat and the minimum time measured is then displayed. The number of loops and the number of repeats can be adjusted. Take a look at the documentation. Give it a try." ] }, { @@ -1124,7 +1142,7 @@ }, "outputs": [], "source": [ - "%timeit dview.execute('')" + "%timeit -n 10 dview.execute('')" ] }, { @@ -1148,7 +1166,7 @@ }, "outputs": [], "source": [ - "%timeit dview.apply(lambda x : x, '')" + "%timeit -n 10 dview.apply(lambda x : x, '')" ] }, { @@ -1209,7 +1227,7 @@ }, "outputs": [], "source": [ - "%timeit dview.execute('')" + "%timeit -n 10 dview.execute('')" ] }, { @@ -1222,7 +1240,7 @@ }, "outputs": [], "source": [ - "%timeit dview.apply(lambda x : x, '')" + "%timeit -n 10 dview.apply(lambda x : x, '')" ] }, { @@ -1238,6 +1256,32 @@ "%timeit -n 1 -r 4 rc[0].execute('c = a.dot(b)')" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "source": [ + "Note, that the previous call was non-blocking since this is the default for ``execute`` and we have not specified anything else for the view rc[0]. The next line shows the blocking variant:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%timeit -n 10 -r 7 rc[0].execute('c = a.dot(b)', block=True)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1248,7 +1292,7 @@ }, "outputs": [], "source": [ - "%timeit a.dot(b)" + "%timeit -n 10 -r 7 a.dot(b)" ] }, { @@ -1272,7 +1316,7 @@ } }, "source": [ - "We can start about 500 parallel tasks per second and finish about a quarter as many. This gives an estimate of the granularity we need to use this model for efficient parallelization. Any task that takes less time than this will be dominated by the overhead." + "We can start about 2000 parallel tasks per second and finish about a tenth as many. This gives an estimate of the granularity we need to use this model for efficient parallelization. Any task that takes less time than this will be dominated by the overhead." ] }, { @@ -1322,15 +1366,15 @@ }, "outputs": [], "source": [ - "%timeit dview.push(dict(a=a))\n", - "%timeit dview.push(dict(a=a[:128*1024]))\n", - "%timeit dview.push(dict(a=a[:64*1024]))\n", - "%timeit dview.push(dict(a=a[:32*1024]))\n", - "%timeit dview.push(dict(a=a[:16*1024]))\n", - "%timeit dview.push(dict(a=a[:8*1024]))\n", - "%timeit dview.push(dict(a=a[:4*1024]))\n", - "%timeit dview.push(dict(a=a[:2*1024]))\n", - "%timeit dview.push(dict(a=a[:1024]))" + "%timeit -n 20 dview.push(dict(a=a))\n", + "%timeit -n 20 dview.push(dict(a=a[:128*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:64*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:32*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:16*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:8*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:4*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:2*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:1024]))" ] }, { @@ -1341,7 +1385,7 @@ } }, "source": [ - "Calculate the bandwidth for the largest array and the smallest array." + "Calculate the bandwidth for the largest array and the smallest array. Replace the numbers below with the time you measured." ] }, { @@ -1354,8 +1398,8 @@ }, "outputs": [], "source": [ - "bwmax = len(rc) * 256 * 8 / 9.8e-3\n", - "bwmin = len(rc) * 8 / 6.1e-3\n", + "bwmax = len(rc) * 256 * 8 / 9.83-3\n", + "bwmin = len(rc) * 8 / 4.25e-3\n", "print(\"The bandwidth is between %.2f kB/s and %.2f kB/s.\" %( bwmin, bwmax))" ] }, @@ -1478,25 +1522,26 @@ } }, "source": [ - "There are different ways to parallelize a matrix-matrix multiplication. Each element of the matrix can be calculated independently." + "There are different ways to parallelize a matrix-matrix multiplication. Each element of the matrix can be calculated independently, but this currently seems to crash the ipcluster, so we'll skip the execution." ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, - "outputs": [], "source": [ - "%%timeit \n", + "```ipython\n", + "%%timeit\n", "p = len(rc)\n", "# Distribute the elements of the result viewmatrix round robin.\n", - "C1h = [[rc[(i * n + j) % p].apply(np.dot, A[i,:], B[:,j]) for j in range(n)] for i in range(n)]\n", + "C1h = [[rc[(i * n + j) % p].apply(np.dot, A[i,:], B[:,j]) \n", + " for j in range(n)] for i in range(n)]\n", "# Wait until the calculation is done\n", - "dview.wait()\n" + "dview.wait()\n", + "```" ] }, { @@ -1507,7 +1552,7 @@ } }, "source": [ - "This, however, produces $n^2$ short tasks and the overhead (latency) is just overwhelming.\n", + "It produces $n^2$ short tasks and the overhead (latency) is just overwhelming.\n", "\n", "We want to calculate\n", "\n", @@ -1798,7 +1843,7 @@ "source": [ "Nothing says, we have to stop at 4 tiles nor do we have to use square tiles. We could also recursively subdivide our tiles.\n", "\n", - "The code is not any faster, because our implementation of numpy already blocks the matrices and uses all cores, but it shows the principle." + "The code is not any faster, because our implementation of numpy already blocks the matrices and uses all cores, but it shows the principle. Also, remember that we are transferring the data to the engines in every call!" ] }, { @@ -1812,9 +1857,9 @@ "metadata": { "celltoolbar": "Slideshow", "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1826,7 +1871,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/07_LocalTaskParallel.ipynb b/07_LocalTaskParallel.ipynb index 06251ec..bdd868d 100644 --- a/07_LocalTaskParallel.ipynb +++ b/07_LocalTaskParallel.ipynb @@ -60,6 +60,17 @@ "import numpy as np" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%px --local\n", + "import threadpoolctl\n", + "threadpoolctl.threadpool_limits(limits=32, user_api='blas')" + ] + }, { "cell_type": "code", "execution_count": null, @@ -326,6 +337,55 @@ "BlockMatrixMultiply?" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's increase the size of the matrix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n = 16384\n", + "A = np.random.random([n, n])\n", + "B = np.random.random([n, n])\n", + "C = np.dot(A, B)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit C=np.dot(A,B)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit BlockMatrixMultiply(A, B, n // 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit BlockMatrixMultiplyLB(A, B, lview, n)\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 2) # 4 tasks\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 4) # 16 tasks\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 8) # 64 tasks" + ] + }, { "cell_type": "code", "execution_count": null, @@ -336,9 +396,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -350,7 +410,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/08_Numba vectorize.ipynb b/08_Numba vectorize.ipynb index 41f7143..7a3da64 100644 --- a/08_Numba vectorize.ipynb +++ b/08_Numba vectorize.ipynb @@ -11,7 +11,7 @@ "# Numba vectorize\n", "\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -540,9 +540,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -554,7 +554,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/09_NumbaIntro.ipynb b/09_NumbaIntro.ipynb index 58c653a..084c6f9 100644 --- a/09_NumbaIntro.ipynb +++ b/09_NumbaIntro.ipynb @@ -11,7 +11,7 @@ "# Introduction to Numba's jit compiler\n", "\n", "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -23,7 +23,7 @@ } }, "source": [ - "Numba provides a just-in-time (jit) compiler, a decorator `vectorize` that we can use to define `ufunc`s that are fast and flexible, and an interface to CUDA- and ROCm-capable GPUs that allows us to write CUDA kernels in Python! In this notebook, we'll focus on the jit compiler." + "Numba provides a just-in-time (jit) compiler, a decorator `vectorize` that we can use to define `ufunc`s that are fast and flexible, and an interface to CUDA- and ROCm-capable GPUs that allows us to write GPU kernels in Python! In this notebook, we'll focus on the jit compiler." ] }, { @@ -54,13 +54,14 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy\n", - "from numba import jit\n", + "from numba import njit as jit\n", "from matplotlib import pyplot as plt " ] }, @@ -82,7 +83,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -104,13 +106,26 @@ "When we call `python_sum`, the interpreter goes through it line by line. For each item it has to interpret `res += x` and execute it, i.e., call apropriate C routines that have been compiled for the processor. The only requirements for `a` in this function are that it is iterable and its elements support the `+` operator. For the following little benchmark, we'll use an `ndarray` of random numbers." ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "The Faster CPython project is working on performance improvements for CPython. This includes inlining function calls and choosing special paths if the interpreter detects that types and objects are stable [PEP659](https://peps.python.org/pep-0659/)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -123,7 +138,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -136,7 +152,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -151,7 +168,7 @@ } }, "source": [ - "Please calculate the floating point operations per second for `python_sum`. Btw., remember the peak performance of a single core on JUWELS is about 40 GFLOP/s." + "Please calculate the floating point operations per second for `python_sum`. Btw., remember the peak performance of a single core on JUSUF is about 36 GFLOP/s." ] }, { @@ -253,7 +270,7 @@ } }, "source": [ - "Yes, there are good reasons to love Python (and other higher programming languages).\n", + "Yes, there are good reasons to love higher programming languages.\n", "\n", "Let's run the code:\n", "```\n", @@ -261,7 +278,7 @@ "Sum: 5033.24 in 0.717281 µs. 13941.5 MFLOP. \n", "```\n", "\n", - "The function takes about 0.7 µs. This is about 2000 times faster than the interpreted Python loop. \n", + "The function takes about 0.7 µs. This is more than 10,000 times faster than the interpreted Python loop. \n", "Wouldn't it be great if we could take the Python code in `python_sum` and compile it to machine \n", "code to get some of this speedup?" ] @@ -322,7 +339,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "numba_sum = jit(python_sum)" @@ -334,7 +353,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -358,7 +378,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -405,7 +426,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -429,7 +451,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -454,7 +477,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -473,7 +497,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -487,7 +512,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -500,7 +526,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -524,7 +551,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -544,7 +572,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -557,7 +586,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -570,7 +600,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -584,7 +615,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -597,7 +629,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -610,7 +643,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -628,7 +662,9 @@ }, "source": [ "### Exercise: prange\n", - "Numba can parallelize loops with ``prange``. Import ``prange`` from numba and change the range in row into a prange. You also need to add the arguments ``nopython=True`` and ``parallel=True`` to the jit decorator.\n", + "Numba can parallelize loops with ``prange``. Import ``prange`` from numba and change the range in row into a prange. You also need to add the ``parallel=True`` to the jit decorator.\n", + "\n", + "We imported ``njit`` as ``jit`` at the beginning of the notebooks since ``nopython=True`` will soon become the default. If you use ```from numba import jit``` you need to explicitly write ``nopython=True`` below.\n", "\n", "Rerun and compare.\n", "\n", @@ -654,7 +690,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -673,7 +710,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -686,7 +724,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -710,11 +749,12 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ - "@jit(nopython=True)\n", + "@jit\n", "def numba_mm3(a, b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in range(a.shape[0]):\n", @@ -729,7 +769,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -742,11 +783,12 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ - "@jit(nopython = True)\n", + "@jit\n", "def numba_mm4(a, b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in range(a.shape[0]):\n", @@ -762,7 +804,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -775,7 +818,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -1125,11 +1169,7 @@ "source": [ "This is much better. The `ps` at the end of `vaddps` stands for *packed single precision* indicating \n", "a SIMD instruction. The `ymm` registers used are 256 bits wide, which corresponds to 8 single precision\n", - "numbers at a time.\n", - "\n", - "Skylake-X also has `zmm` registers with a width of 512 bit or 16 single precision numbers, but when\n", - "they are used the maximum frequency of the processor is reduced. It can happen that the performance \n", - "using `ymm` registers at higher frequency is actually better." + "numbers at a time." ] }, { @@ -1184,9 +1224,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1198,7 +1238,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/10_Speeding up your code with Cython.ipynb b/10_Speeding up your code with Cython.ipynb index 571852b..f3b6669 100644 --- a/10_Speeding up your code with Cython.ipynb +++ b/10_Speeding up your code with Cython.ipynb @@ -20,7 +20,7 @@ }, "source": [ "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -65,7 +65,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -78,7 +79,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -103,7 +105,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -121,7 +124,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -135,7 +139,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -149,7 +154,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -164,7 +170,7 @@ } }, "source": [ - "Elementwise access to NumPy arrays can in the meantime be just as fast as access for lists.\n", + "Elementwise access to NumPy arrays is often slower as elementwise access to lists.\n", "\n", "Now let us invoke Cython" ] @@ -175,7 +181,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -194,7 +201,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -208,7 +216,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -232,7 +241,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -277,7 +287,7 @@ } }, "source": [ - "The arguments `v` and `w` are very general. If we know, however, that we are only going to pass ndarrays of integers, we can be more specific:" + "The arguments `v` and `w` are very general. If we know that we are only going to pass ndarrays of integers, we can be more specific:" ] }, { @@ -286,7 +296,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -320,7 +331,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -344,7 +356,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -368,7 +381,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -403,7 +417,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -430,7 +445,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -454,7 +470,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -482,7 +499,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -507,7 +525,8 @@ "metadata": { "slideshow": { "slide_type": "-" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -548,7 +567,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -576,7 +596,7 @@ "source": [ "Since Cython generates compiled Python extensions, we can release the GIL and run things in parallel if we don't make calls to the Python API.\n", "\n", - "As we've seen our inner loop is free of any Python calls (the annotated code is white). Since OpenMP supports reductions, we can parallelize the loop using Cython's ``prange``. Within ``prange`` we have to explicitely release the GIL by setting ``nogil=True``. We also need to pass the compiler and linker flags for OpenMP." + "As we've seen our inner loop is free of any Python calls (the annotated code is white). Since OpenMP supports reductions, we can parallelize the loop using Cython's ``prange``. Within ``prange`` we have to explicitly release the GIL by setting ``nogil=True``. We also need to pass the compiler and linker flags for OpenMP." ] }, { @@ -585,7 +605,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -615,7 +636,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -728,9 +750,9 @@ } }, "source": [ - "So far we have used IPython and the Cython magic to build and test our extension within a notebook. Once we are satisfied and want to put our extension in production, we want to be able to build the extension without IPython. The recommended way to do that is to use `distutils` and a `setup.py` file.\n", + "So far we have used IPython and the Cython magic to build and test our extension within a notebook. Once we are satisfied and want to put our extension in production, we want to be able to build the extension without IPython. The recommended way to do that is to use the `setuptools` provided with Cython and a `setup.py` file. For details see the [documentation](https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#basic-setup-py).\n", "\n", - "Note that distutils has been marked as deprecated as of Python 3.10, but we are still using Python 3.9.x on our systems." + "Note that distutils has been marked as deprecated as of Python 3.10." ] }, { @@ -749,7 +771,7 @@ "metadata": {}, "source": [ "```python\n", - "from distutils.core import setup\n", + "from setuptools import setup\n", "from Cython.Build import cythonize\n", "setup(name=\"Sum of integers\",\n", " ext_modules=cythonize(\"sum.pyx\"),\n", @@ -785,8 +807,7 @@ "metadata": {}, "source": [ "```python\n", - "from distutils.core import setup\n", - "from distutils.extension import Extension\n", + "from setuptools import Extension, setup\n", "from Cython.Build import cythonize\n", "\n", "ext_modules = [\n", @@ -1270,9 +1291,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1284,7 +1305,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/11_Writing your own Python bindings.ipynb b/11_Writing your own Python bindings.ipynb index 78dabd2..96beaf4 100644 --- a/11_Writing your own Python bindings.ipynb +++ b/11_Writing your own Python bindings.ipynb @@ -16,7 +16,7 @@ "metadata": {}, "source": [ "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -92,7 +92,7 @@ "\n", "Wait until the build has finished and then continue with this notebook.\n", "\n", - "**Tip:** You can open a terminal from within JupyterLab by going to File->New->Terminal. To get the right environment in a terminal `source $PROJECT_training2119/hpcpy22`." + "**Tip:** You can open a terminal from within JupyterLab by going to File->New->Terminal. To get the right environment in a terminal `source $PROJECT_training2318/hpcpy23`." ] }, { @@ -222,7 +222,7 @@ } }, "source": [ - "What if word_frequency had been written Fortran?" + "What if word_frequency had been written in Fortran?" ] }, { @@ -268,7 +268,7 @@ "source": [ "### Exercise\n", "Use the terminal that you used earlier to run `build.sh` or open a new one. Make sure you are in the \n", - "tutorial directory. Source `hpcpy22` using `source $PROJECT/hpcpy22`. Change into code/textstats/ and compile \n", + "tutorial directory. Source `hpcpy23` using `source $PROJECT/hpcpy23`. Change into code/textstats/ and compile \n", "the file word_frequency.F90 with the following command:\n", "\n", "```bash\n", @@ -368,7 +368,7 @@ "source": [ "Now, the name of the function will always be `word_frequency`. `bind` takes as optional argument the name under which the function should be known to C: bind(c, name=\"wf\") would let us call the function as `wf(filename, word)` from C (and Python).\n", "\n", - "To learn more about [CFFI](https://bitbucket.org/cffi/cffi) look at it's [documentation](https://cffi.readthedocs.io/en/latest/)." + "To learn more about CFFI look at it's [documentation](https://cffi.readthedocs.io/en/latest/)." ] }, { @@ -1481,9 +1481,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1495,7 +1495,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/12_Introduction to MPI.ipynb b/12_Introduction to MPI.ipynb index 8a0c0e3..dd9cf88 100644 --- a/12_Introduction to MPI.ipynb +++ b/12_Introduction to MPI.ipynb @@ -11,7 +11,7 @@ "# Introduction to MPI\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -218,7 +218,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A slbio python3 hello_mpi.py " + "!srun --pty -n 4 -p batch -A training2318 --reservation tr2318-20230615-cpu python3 hello_mpi.py " ] }, { @@ -315,7 +315,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A training2219 --time 00:10:00 python3 hello_ptp.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 hello_ptp.py" ] }, { @@ -348,7 +348,7 @@ } }, "source": [ - "If you need to send data to another rank and receive data from the same rank, combining `Send` and `Recv` command is dangerous and easily leads to deadlocks. Use `Sendrecv` instead." + "If you need to send data to another rank and receive data from the same rank, combining `Send` and `Recv` commands is dangerous and easily leads to deadlocks. Use `Sendrecv` instead." ] }, { @@ -405,7 +405,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A training2219 --time 00:10:00 python3 hello_sendrecv.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 hello_sendrecv.py" ] }, { @@ -458,7 +458,7 @@ "tags": [] }, "source": [ - "Next, we'll sum up the partial results and then use sum up (`reduce`) the partial results:" + "Next, we'll sum up `a_partial` and then use `reduce` to sum up the partial results:" ] }, { @@ -558,7 +558,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_reduction.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_reduction.py" ] }, { @@ -580,7 +580,7 @@ } }, "source": [ - "`mpi4py` offers two version of many calls. The first one is written in uppercase. It uses memory buffers, e.g., `np.array`, and maps the call directly to the appropriate C call. The second version is written in lower case and takes arbitrary Python object. The result is given as the return value. Note, that for the uppercase versions all `a_partial` must have the same size!" + "`mpi4py` offers two version of many calls. The first one is written in uppercase. It uses memory buffers, e.g., `numpy.array`, and maps the call directly to the appropriate C call. The second version is written in lower case and takes arbitrary Python object. The result is given as the return value. Note, that for the uppercase versions all `a_partial` must have the same size!" ] }, { @@ -665,7 +665,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_upper.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_upper.py" ] }, { @@ -676,7 +676,7 @@ } }, "source": [ - "The following works independent of the size of a_partial:" + "The following code uses the lowercase versions of the calls and works independent of the size of a_partial:" ] }, { @@ -751,7 +751,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_lower.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_lower.py" ] }, { @@ -1038,7 +1038,7 @@ "3. Time the execution of the program from the second part of the exercise.\n", "\n", " a) Keep the size of the system constant and increase the number of ranks/domain, e.g., using 2, \n", - " 4, 8, and 16 ranks. How \n", + " 4, 8, and 16 ranks. How does the timing change?\n", " \n", " b) Keep the size of the domains constant, i.e., the total size is a multiple of the number of \n", " ranks. Again increase the number of ranks\n", @@ -1344,7 +1344,7 @@ "source": [ "Click on the ``+``-sign at the top of the Files tab on the left to start a new launcher. In the launcher click on Terminal. A terminal will open as a new tab. Grab the tab and pull it to the right to have the terminal next to your notebook.\n", "\n", - "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2219/hpcpy22`." + "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2318/hpcpy23`." ] }, { @@ -1362,7 +1362,7 @@ "\n", "```bash\n", "export OMP_NUM_THREADS=32\n", - "srun -n 4 -c 32 --ntasks-per-node 4 --time 00:30:00 -A training2219 ipengine start\n", + "srun -n 4 -c 32 --ntasks-per-node 4 --time 00:30:00 -A training2318 --reservation tr2318-20230615-cpu ipengine start\n", "```\n", "\n", "**Note**, you can can start the controller and the engines in separate terminals. That will keep the output separate." @@ -1736,13 +1736,20 @@ "source": [ "sum(sum_partial)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1754,7 +1761,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/13_Introduction to CuPy.ipynb b/13_Introduction to CuPy.ipynb index 4d5438b..e2cafd8 100644 --- a/13_Introduction to CuPy.ipynb +++ b/13_Introduction to CuPy.ipynb @@ -10,7 +10,7 @@ "source": [ "# Introduction to CuPy\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>\n", "<img src=\"images/cupy.png\" style=\"float:right\">" ] @@ -134,7 +134,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul.py" + "!srun --pty -N 1 -p gpus -A training2318 --time 00:10:00 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul.py" ] }, { @@ -214,7 +214,21 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul_w_timing.py" + "!srun --pty -N 1 -p gpus -A training2318 --time 00:10:00 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul_w_timing.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!srun --pty -N 1 -p develgpus -A training2318 --time 00:10:00 python3 cupy_matrix_mul_w_timing.py" ] }, { @@ -375,7 +389,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul_w_timing2.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul_w_timing2.py" ] }, { @@ -434,7 +448,7 @@ }, "outputs": [], "source": [ - "!srun -p batch -n 1 -c 256 -A training2219 python numpy_matrix_mul_w_timing2.py" + "!srun -p batch -n 1 -c 256 -A training2318 --pty --reservation tr2318-20230615-cpu python3 numpy_matrix_mul_w_timing2.py" ] }, { @@ -568,7 +582,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_to_and_fro.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cupy_to_and_fro.py" ] }, { @@ -656,9 +670,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -670,7 +684,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/14_CUDA for Python.ipynb b/14_CUDA for Python.ipynb index d9241f8..4964eb3 100644 --- a/14_CUDA for Python.ipynb +++ b/14_CUDA for Python.ipynb @@ -11,7 +11,7 @@ "# Numba and GPUs\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -158,7 +158,7 @@ " return i\n", " return maxtime\n", "\n", - "if __name__ == \"__main__:\n", + "if __name__ == \"__main__\":\n", " import numpy\n", " x = numpy.linspace(-2, 2, 500)\n", " y = numpy.linspace(-1.5, 1.5, 375)\n", @@ -182,7 +182,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython mandelbrot_vectorize_cuda.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython mandelbrot_vectorize_cuda.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean():.3f}±{t_gpu.std():.3f} s.\")" ] @@ -305,7 +305,7 @@ "source": [ "GPUs were (and are) made to display graphics on your screen. It doesn't matter how quickly a GPU can update a single pixel. It's important how quickly it can update all of the pixels on the screen (more than 2 million on an HD display). In addition it often must perform the same operation on a lot of vertices or pixels. \n", "\n", - "These two conditions let to a different execution model." + "These two conditions led to a different execution model." ] }, { @@ -833,7 +833,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot1.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot1.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -907,7 +907,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot2.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot2.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -966,7 +966,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot3.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot3.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1147,7 +1147,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot4.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot4.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1207,7 +1207,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cuda_matrixmul.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cuda_matrixmul.py" ] }, { @@ -1378,9 +1378,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1392,7 +1392,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/15_CUDA and MPI.ipynb b/15_CUDA and MPI.ipynb index ae23563..8f0eafb 100644 --- a/15_CUDA and MPI.ipynb +++ b/15_CUDA and MPI.ipynb @@ -11,7 +11,7 @@ "# CUDA for Python and MPI4Py\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -344,12 +344,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "skip" - }, - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -419,7 +414,7 @@ "import cupy\n", "# Create an array with N * number_of_ranks elements\n", "N = 1000\n", - "a_partial = cup.empty(N)\n", + "a_partial = cupy.empty(N)\n", "if my_rank == 0:\n", " a = cupy.random.random(N * number_of_ranks)\n", "else:\n", @@ -532,12 +527,12 @@ " block = 256\n", " grid = N // block if N % block == 0 else N // block + 1 \n", " shift[grid, block](-0.75, a_partial)\n", - " print(f\"[{my_rank}] The average of a_partial is {cupy.mean(a_partial):.3f}\")\n", + " print(f\"[{my_rank}] The average of a_partial after shifting is {cupy.mean(a_partial):.3f}\")\n", " # Collect the data again on rank 0\n", " comm.Gather(a_partial, a, root = 0) \n", "\n", " if my_rank == 0:\n", - " print(\"The average of a is %.2f\" % cupy.mean(a)) # Result should be near zero.\n", + " print(\"The average of a after shifting is %.2f\" % cupy.mean(a)) # Result should be near zero.\n", " \n", " \n", "if __name__ == \"__main__\":\n", @@ -555,15 +550,22 @@ }, "outputs": [], "source": [ - "!srun -p gpus -n 4 -A training2219 xenv -L mpi-settings/CUDA python cuda_aware_mpi_shift.py" + "!srun -p gpus -n 4 -A training2318 --reservation tr2318-20230615-gpu xenv -L mpi-settings/CUDA python3 cuda_aware_mpi_shift.py" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -575,7 +577,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/16_Introduction to Dask.ipynb b/16_Introduction to Dask.ipynb index 62c461d..8df4e8f 100644 --- a/16_Introduction to Dask.ipynb +++ b/16_Introduction to Dask.ipynb @@ -11,7 +11,7 @@ "# Introduction to Dask\n", "\n", "<div class=\"dateauthor\">\n", - "10 June 2021 | Olav Zimmermann\n", + "16 June 2023 | Olav Zimmermann\n", "</div>" ] }, @@ -165,7 +165,7 @@ } }, "source": [ - "The task graph generated by `dask` can be visualized (don't try this for large graphs!)." + "The task graph generated by `dask` can be visualized (don't try this for large graphs, i.e. more input tasks!)." ] }, { @@ -211,22 +211,6 @@ "- Change the program in a way that enables you to estimate how much overhead per task is incurred by Dask." ] }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - }, - "tags": [ - "Poll" - ] - }, - "source": [ - "Poll: \n", - "- largest number of inputs you can process under 8 seconds: A) 12 B) 24 C) 48 D) 96\n", - "- task overhead per task: A) 100µs B) 200µs C) 1ms D) 2ms" - ] - }, { "cell_type": "markdown", "metadata": { @@ -283,7 +267,8 @@ "source": [ "l=[x for x in range(1000000)]\n", "s= db.from_sequence(l,npartitions=4) # you can manually set the number of partitions\n", - "mysum=s.fold(add) # fold performs a parallel reduction " + "mysum=s.fold(add) # fold performs a parallel reduction \n", + "mysum.dask # another inpection method for task graphs in dask" ] }, { @@ -306,7 +291,7 @@ "outputs": [], "source": [ "%time result=mysum.compute()\n", - "result=mysum.compute\n", + "result=mysum.compute()\n", "result" ] }, @@ -320,8 +305,7 @@ }, "outputs": [], "source": [ - "%time r=list(s.filter(lambda x: x % 2 == 0).map(lambda x: x * 1.2))\n", - "r[:5] #note: apparently no type coercion!" + "%time r=list(s.filter(lambda x: x % 2 == 0).map(lambda x: x * 1.2))" ] }, { @@ -345,7 +329,8 @@ "source": [ "**Exercise:**\n", "\n", - "Code the same operations without dask, i.e. using a) just python and b) using numpy and measure the runtime of the calculations. \n", + "Code the same operations without dask, i.e. using a) just python and b) using numpy and measure the runtime of the calculations.\n", + "Make sure to return a list in all cases.\n", "\n", "Conclusions? " ] @@ -393,7 +378,7 @@ "source": [ "## dask.array\n", "\n", - "**`dask.dataframe`** is the distributed equivalent of numpy ndarray." + "**`dask.array`** is the distributed equivalent of numpy ndarray." ] }, { @@ -489,7 +474,7 @@ "outputs": [], "source": [ "x_dask = da.random.normal(10, 0.1, size=(10000,3000), chunks=(5000,3000)) # using as many chunks as CPU cores is good for random number calculation\n", - "x_rechunked=x_dask.rechunk((1000,3000)) # larger chunks are no longer better for dot product calculation\n", + "x_rechunked=x_dask.rechunk((2500,3000)) # larger chunks are no longer better for dot product calculation\n", "y_dask = x_rechunked.transpose()\n", "result=x_dask.dot(y_dask)\n", "#with ProgressBar():\n", @@ -517,7 +502,7 @@ } }, "source": [ - "`dask.distributed` features a sophisticated **web-based monitoring** based on the package `bokeh`. See **Dashboard** when you started the client above that shows the address and port of the web server." + "`dask.distributed` features a sophisticated **web-based monitoring** based on the package `bokeh`." ] }, { @@ -583,9 +568,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2021", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy21" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -597,7 +582,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/17_Debugging.ipynb b/17_Debugging.ipynb index 22117b2..dbd87d3 100644 --- a/17_Debugging.ipynb +++ b/17_Debugging.ipynb @@ -6,17 +6,13 @@ "source": [ "# Debugging Python\n", "<div class=\"dateauthor\">\n", - "07 June 2021 | Jan H. Meinke\n", + "06 June 2023 | Jan H. Meinke, Olav Zimmermann\n", "</div>" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ "What do you do if a program doesn't produce the results you want? You can stare at the code and try to figure out the mistake. You can add lots of print statements to your code. Or you can use a debugger.\n", "\n", @@ -25,221 +21,184 @@ }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "Debugging has its own terminology: You step in and out of functions. You move up and down the stack. You set break points, inspect variables, etc. This is the basic functionality that every debugger should (and every debugger I know does) support.\n", - "\n", - "In this notebook, we'll look at debugging a program with PDB in the notebook and pudb in a terminal window. You'll learn how to start a debugging session and do all the things, I talked about in the previous paragraph." + "### _\"Debuggers don't remove bugs. They only show them in slow motion.\"_ (Unknown)" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## PDB" + "Debugging has its own terminology: You step in and out of functions. You move up and down the call stack. You set break points, inspect variables, etc. This is the basic functionality that every debugger should (and every debugger we know does) support.\n", + "\n", + "In this notebook, we'll introduce several different debuggers. We'll debug code within a notebook cell with the builtin debugger of JupyterLab as well as with PDB. Then we will use pudb to debug a program in a terminal window. You'll learn how to start a debugging session and do all the things, described in the previous paragraph." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "Python comes with its own debugger called \"The Python debugger\" (pdb). PDB is available from within a notebook, but it's not very convenient to use." + "## Runtime debugging with the JupyterLab builtin debugger" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "source": [ - "Let's take the following function, which contains a bug and throws an exception." + "Before running the following cell try to guess what will happen: will it throw an error or a warning or will it execute normally? \n", + "If it is one of the latter two cases, what will it print?" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, + "metadata": {}, "outputs": [], "source": [ - "#%%writefile buggy.py\n", - "def imabuggyincrement(i,a):\n", - " \"\"\"Increment a[i] by 1.\"\"\"\n", - " if ii < len(a):\n", - " a[i] += 1;\n", + "a,b,c,d,e=range(5)\n", + "from numpy import *\n", + "f=array([a,b,c,d,e], dtype=int)\n", + "def doubleme(input_array):\n", + " result=input_array*2\n", + " return result\n", + "def doublesummer(input_vec):\n", + " result=doubleme(input_vec)\n", + " result=result.sum()\n", + " return result\n", + "print(f'The result is {doublesummer(f)}.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using a debugger to execute a code (or part of it) step by step is also called **runtime debugging**. \n", "\n", - "a = list(range(10))\n", - "ii = 4\n", - "imabuggyincrement(10, a)" + "You can switch on JupyterLab's internal debugger by clicking on the small bug icon at the top right of the notebook, next to the kernel name. You will see several panels appear in the right sidebar. In addition, each code cell of the notebook now got line numbers.\n", + "\n", + "Click on the line number of line 11 in the code cell above. A red dot appearing in front of the line number indicates that you just set a **break point**. At a break point the debugger will stop, allowing you to inspect the state of each variable that is defined at this point. To start the debugger and let it execute the code up to the break point just re-execute the cell [Shift-Return].\n", + "\n", + "The navigation symbols at the top of the CallStack panel will now no longer be grayed out and allow you to execute the code line by line. With \"next\" you step over function calls within the line. With \"step in\" you can jump into the python functions called in this line of code (but not into any C library functions).\n", + "\n", + "The \"Variables\" panel allows you to view either the global or the local variables and to switch between tree and table view. (for arrays the table view is preferable)\n", + "\n", + "**Exercise:** Try to find the bug in the code above. You can set a break point at any line. In case that you want to reset the kernel use the circle arrow button at the top of the notebook.\n", + "\n", + "**Note:** The builtin debugger interface is a very recent addition to JupyterHub and only provides very limited functionality and convenience.\n" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, + "metadata": {}, "source": [ - "## Debug magic" + "## Post mortem debugging with PDB" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "If a cell has just thrown an exception, you can inspect it with the %debug magic. Try `help` to see the available commands. Type `exit` to leave the debugger." + "If a program fails, you can no longer execute the code step by step. Nevertheless, the debugger can help you to inspect the state of the code at the time of failure. This usage is also called **post mortem debugging**. Python comes with its own debugger called \"The Python debugger\" (pdb). PDB is also available from within a notebook, but it's not very convenient to use." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Use `p i` to print the value of `i`. You can also try to print out the value of `a[i]` using `p a[i]`. Inspect the other variables. Do you see what went wrong?" + "Let's take the following function, which contains a bug and throws an exception. **(Please switch off the internal debugger before executing the cell!)**" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, + "metadata": {}, "outputs": [], "source": [ - "%debug" + "#%%writefile buggy.py\n", + "def imabuggyincrement(i,a):\n", + " \"\"\"Increment a[i] by 1.\"\"\"\n", + " if ii < len(a):\n", + " a[i] += 1;\n", + "\n", + "a = list(range(10))\n", + "ii = 4\n", + "imabuggyincrement(10, a)" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Available debuggers" + "### The %debug magic of pdb for notebooks" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, + "metadata": {}, "source": [ - "* pdb (builtin)\n", - "* pudb\n", - "* IDEs (All the IDEs we mentioned have debugging support)" + "The cell above has just thrown an exception and within a notebook you can use the `%debug` magic provided by pdb to inspect it. Try `help` to see the available commands. Type `exit` to leave the debugger." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "source": [ - "Uncomment the ``%%writefile`` magic before the function defintion of ``imabuggyincrement`` and execute the cell again so that it gets written to file buggy.py" + "Use `p i` to print the value of `i`. You can also try to print out the value of `a[i]` using `p a[i]`. Inspect the other variables. Do you see what went wrong?" ] }, { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "Next start pudb in a terminal with the script name as an argument. If you haven't done this in this terminal shell before, you need to source hpcpy20:" + "%debug" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "```bash\n", - "source hpcpy21\n", - "pudb3 buggy.py\n", - "```" + "## Debugging a program with pudb" ] }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "source": [ - "We'll give you a short demonstration and then you can play with it for a little while." + "Uncomment the ``%%writefile`` magic before the function defintion of ``imabuggyincrement`` and execute the cell again so that it gets written to file buggy.py" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Remote debugging" + "Next start pudb in a terminal with the script name as an argument. If you haven't done this in this terminal shell before, you need to source hpcpy23:" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, + "metadata": {}, "source": [ - "For example, PyDev, Wing Personal, Visual Studio, and PyCharm Professional (199 €/a with perpetual fallback license) support remote debugging. It can also be done with the ``ptvsd`` and Visual Studio Code." + "```bash\n", + "source $PROJECT_training2318/hpcpy23\n", + "pudb buggy.py\n", + "```" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Debugging Python extensions\n", - "We'll talk about this more tomorrow (maybe)." + "We'll give you a short demonstration and then you can play with it for a little while." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ "## Note\n", "\n", @@ -248,21 +207,15 @@ }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "A better way to check for bounds as I did in `iambuggyincrement` is not to do it at all but use a try...except statement instead:" + "Another way to check for bounds as the one in `imabuggyincrement` is not to do it at all but use a try...except statement instead:" ] }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "slide" - } + "tags": [] }, "source": [ "```python\n", @@ -274,7 +227,7 @@ " pass\n", " \n", "def main(arg=[]):\n", - " a = list(range(10)\n", + " a = list(range(10))\n", " ii = 4 # Now this is limited to the scope of main()\n", " imabuggyincrement(10, a)\n", " \n", @@ -283,13 +236,63 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that you should only use the `except` statement together with `pass` in cases where you expect a certain type of error but can't control the circumstances that lead to that error. This pattern effectively hides an error state of the program and could lead to unwanted side effects if used carelessly." + ] + }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, + "source": [ + "## Overview: debuggers for Python" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* [pdb][] (builtin)\n", + "* [pudb][]\n", + "* IDEs (All the IDEs we mentioned have debugging support)\n", + "* [Linaro DDT][], former name ARMForge DDT (commercial, support for debugging parallel codes and C/C++ code, only rudimentary Python support)\n", + "* [TotalView][] (commercial, support for debugging parallel codes and C/C++ code, requires debug version of CPython, supports mixed language debugging, aware of cython, pybind11 and other bindings)\n", + "\n", + "[pdb]: https://docs.python.org/3/library/pdb.html\n", + "[pudb]: https://github.com/inducer/pudb\n", + "[Linaro DDT]: https://www.linaroforge.com/linaroDdt/\n", + "[ARMForge DDT]: https://developer.arm.com/tools-and-software/server-and-hpc/debug-and-profile/arm-forge/arm-ddt\n", + "[TotalView]: https://help.totalview.io/current/HTML/index.html#page/TotalView/totalviewlhug-python.13.01.html#ww1893192" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Remote debugging" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For example, PyDev, Wing Personal, Visual Studio, and PyCharm Professional (199 €/a with perpetual fallback license) support remote debugging. It can also be done with the ``ptvsd`` and Visual Studio Code." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Debugging Python extensions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ "The following video shows how to debug mixed Python and C++ code using Visual Studio.\n", "\n", @@ -299,35 +302,27 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.display import YouTubeVideo\n", "\n", - "YouTubeVideo(\"D9RlT06a1EI\", start=300)" + "YouTubeVideo(\"KhuMRDY4BeU\")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2021", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "hpcpy21" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -339,7 +334,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/build.sh b/build.sh index bd4b6a6..df60a47 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -source $PROJECT_training2219/hpcpy22 +source $PROJECT_training2318/hpcpy23 # Build points pushd code/point rm -rf build diff --git a/hpcpy22 b/hpcpy22 deleted file mode 100644 index b29c14c..0000000 --- a/hpcpy22 +++ /dev/null @@ -1,19 +0,0 @@ -module purge --force -module load Stages/2022 -module load GCC -module load ParaStationMPI -module load Graphviz -module load SciPy-Stack -module load numba -module load dask -module load mpi4py -module load Jupyter -module load CUDA -module load CMake -#export NUMBAPRO_NVVM=$CUDA_HOME/nvvm/lib64/libnvvm.so -#export NUMBAPRO_LIBDEVICE=$CUDA_HOME/nvvm/libdevice -export LD_LIBRARY_PATH=/p/project/training2219/resources/code/text_stats/build:$LD_LIBRARY_PATH -export LD_LIBRARY_PATH=/p/project/training2219/resources/code/point/build:$LD_LIBRARY_PATH -export PYTHONPATH=/p/project/training2219/packages/lib/python3.9/site-packages:$PYTHONPATH -export PATH=/p/project/training2219/packages/bin:$PATH -export HPCPY2022=1 diff --git a/hpcpy23 b/hpcpy23 new file mode 100755 index 0000000..447fd63 --- /dev/null +++ b/hpcpy23 @@ -0,0 +1,26 @@ +#!/bin/bash +module purge +module load Stages/2023 +module load GCC +module load ParaStationMPI +module load CMake +module load Graphviz +module load SciPy-Stack +module load numba +module load dask +module load mpi4py +module load h5py +#module load Jupyter +module load CUDA +module load cuTENSOR +module load NCCL +module load cuDNN +#export NUMBAPRO_NVVM=$CUDA_HOME/nvvm/lib64/libnvvm.so +#export NUMBAPRO_LIBDEVICE=$CUDA_HOME/nvvm/libdevice +export LD_LIBRARY_PATH=/p/project/training2318/resources/code/text_stats/build:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/p/project/training2318/resources/code/point/build:$LD_LIBRARY_PATH +export PYTHONPATH=/p/project/training2318/packages/lib/python3.10/site-packages:$PYTHONPATH +export PATH=/p/project/training2318/packages/bin:$PATH +export HPCPY2023=1 +#exec $(which python) -m ipykernel $@ + diff --git a/solutions/00_Introduction to IPython.ipynb b/solutions/00_Introduction to IPython.ipynb index 7d3b2c3..6f3cf8c 100644 --- a/solutions/00_Introduction to IPython.ipynb +++ b/solutions/00_Introduction to IPython.ipynb @@ -20,7 +20,7 @@ }, "source": [ "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "06 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -169,9 +169,7 @@ "tags": [] }, "outputs": [], - "source": [ - "import random" - ] + "source": [] }, { "cell_type": "markdown", @@ -549,47 +547,35 @@ }, "outputs": [], "source": [ - "# %load http://matplotlib.org/mpl_examples/mplot3d/surface3d_demo.py\n", - "'''\n", - "======================\n", - "3D surface (color map)\n", - "======================\n", + "# %load https://matplotlib.org/stable/_downloads/0c69e8950c767c2d95108979a24ace2f/surface3d_simple.py\n", "\n", - "Demonstrates plotting a 3D surface colored with the coolwarm color map.\n", - "The surface is made opaque by using antialiased=False.\n", + "\"\"\"\n", + "=====================\n", + "3D surface\n", + "=====================\n", "\n", - "Also demonstrates using the LinearLocator and custom formatting for the\n", - "z axis tick labels.\n", - "'''\n", - "\n", - "from mpl_toolkits.mplot3d import Axes3D\n", + "See `~mpl_toolkits.mplot3d.axes3d.Axes3D.plot_surface`.\n", + "\"\"\"\n", "import matplotlib.pyplot as plt\n", "from matplotlib import cm\n", - "from matplotlib.ticker import LinearLocator, FormatStrFormatter\n", "import numpy as np\n", "\n", + "# plt.style.use('_mpl-gallery')\n", "\n", - "fig = plt.figure()\n", - "ax = fig.gca(projection='3d')\n", - "\n", - "# Make data.\n", + "# Make data\n", "X = np.arange(-5, 5, 0.25)\n", "Y = np.arange(-5, 5, 0.25)\n", "X, Y = np.meshgrid(X, Y)\n", "R = np.sqrt(X**2 + Y**2)\n", "Z = np.sin(R)\n", "\n", - "# Plot the surface.\n", - "surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,\n", - " linewidth=0, antialiased=False)\n", - "\n", - "# Customize the z axis.\n", - "ax.set_zlim(-1.01, 1.01)\n", - "ax.zaxis.set_major_locator(LinearLocator(10))\n", - "ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))\n", + "# Plot the surface\n", + "fig, ax = plt.subplots(subplot_kw={\"projection\": \"3d\"})\n", + "ax.plot_surface(X, Y, Z, vmin=Z.min() * 2, cmap=cm.Blues)\n", "\n", - "# Add a color bar which maps values to colors.\n", - "fig.colorbar(surf, shrink=0.5, aspect=5)\n", + "ax.set(xticklabels=[],\n", + " yticklabels=[],\n", + " zticklabels=[])\n", "\n", "plt.show()\n" ] @@ -625,9 +611,9 @@ } }, "source": [ - "IPython has two ways of moving around in the directory tree: ``%cd`` and ``%pushd/%popd``. Both retain their history. ``%cd``'s history is available through ``%dhist`` whereas ``%dirs`` shows the directory stack of ``%pushd/%popd``. The ``%cd `` command has some nifty options, for example, ``%cd -2`` gets you to the second to last visited directory and ``%cd --foo`` switches to the next directory in the history than contains ``foo``. You can also set ``%bookmark``s and use them with ``%cd``.\n", + "IPython has two ways of moving around in the directory tree: ``%cd`` and ``%pushd/%popd``. Both retain their history. ``%cd``'s history is available through ``%dhist`` whereas ``%dirs`` shows the directory stack of ``%pushd/%popd``. The ``%cd `` command has some nifty options, for example, ``%cd -2`` gets you to the second to last visited directory. You can also set ``%bookmark``s and use them with ``%cd``.\n", "\n", - "Make a new sub directory called scripts/mandelbrot using ``%mkdir -p scripts/mandelbrot``. Change into the directory scripts/mandelbrot using ``%cd``. Go two levels up using ``%cd ..`` twice. Look at the history using ``%dhist``. Change into mandelbrot using ``%cd --brot``. Finally use ``%cd -0`` to get back to where you started from." + "Make a new sub directory called scripts/mandelbrot using ``%mkdir -p scripts/mandelbrot``. Change into the directory scripts/mandelbrot using ``%cd``. Go one level up using ``%cd ..``. Look at the history using ``%dhist``. Finally use ``%cd -0`` to get back to where you started from." ] }, { @@ -772,7 +758,8 @@ }, "outputs": [], "source": [ - "a = Out[13] # Assign Out[?] to a (replace with index from two cells above)" + "a = Out[13] # Assign Out[?] to a (replace with index from two cells above)\n", + "a" ] }, { @@ -916,9 +903,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -930,7 +917,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/01_Bottlenecks.ipynb b/solutions/01_Bottlenecks.ipynb index d8ebdc8..742c473 100644 --- a/solutions/01_Bottlenecks.ipynb +++ b/solutions/01_Bottlenecks.ipynb @@ -11,7 +11,7 @@ "# Bottlenecks\n", "\n", "<div class=\"dateauthor\">\n", - "20 Jun 2022 | Jan H. Meinke\n", + "12 Jun 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -443,7 +443,7 @@ }, "outputs": [], "source": [ - "from numba import jit\n", + "from numba import njit as jit\n", "jdot = jit(dot)" ] }, @@ -511,7 +511,7 @@ }, "outputs": [], "source": [ - "import numpy; from numba import jit\n", + "import numpy; from numba import njit as jit\n", "\n", "@jit\n", "def dot2(a, b):\n", @@ -545,7 +545,7 @@ } }, "source": [ - "Now, elements in b are accessed in the proper order and a[i, k] is constant for the loop. This changes our estimate, because, now we read 8 bytes/op in the innermost loop. This gives us a maximum of 190 GB/s / 8 bytes/op = 24 Gop/s (48 GFLOP/s)." + "Now, elements in b are accessed in the proper order and a[i, k] is constant for the loop. This changes our estimate, because, now we read 8 bytes/op in the innermost loop. This gives us a maximum of 190 GB/s / 8 bytes/op = 24 Gop/s (48 GFLOP/s) making this compute bound on a single core." ] }, { @@ -716,6 +716,13 @@ "print(2e-9 * n**3 / t_numpy_single.best, \"GFLOP/s (single core).\") " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The maximum clock frequency of the processor is 3.4 GHz, which corresponds to a peak performance of about 54 GFLOP/s. This is pretty close." + ] + }, { "cell_type": "code", "execution_count": null, @@ -871,9 +878,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -885,7 +892,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/02_NumPy_concepts.ipynb b/solutions/02_NumPy_concepts.ipynb index 0a0138d..743b74b 100644 --- a/solutions/02_NumPy_concepts.ipynb +++ b/solutions/02_NumPy_concepts.ipynb @@ -11,7 +11,7 @@ "# NumPy - an HPC perspective\n", "\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Olav Zimmermann\n", + "12 June 2023 | Olav Zimmermann\n", "</div>" ] }, @@ -320,7 +320,7 @@ " <tr><td><code><b><a href=\"https://www.dask.org/\">dask</a></b></code></td><td>dask array: only subset of ndarray functionality</td><td>tiled ndarrays larger than main memory, distributed processing on multiple nodes</td></tr>\n", " <tr><td><code><b><a href=\"https://www.dask.org/\">dask</a></b></code></td><td>dask dataframe: only subset of pandas dataframe functionality</td><td>tiled dataframes larger than main memory, distributed processing on multiple nodes</td></tr>\n", " <tr><td><code><b><a href=\"https://docs.rapids.ai/api/cudf/nightly/user_guide/10min.html\">dask-cuDF</a></b></code></td><td>cuDF dataframe: subset of pandas dataframe functionality</td><td>tiled dataframes on multiple GPUs and multiple nodes</td></tr>\n", - " <tr><td><code><b><a href=\"https://sparse.pydata.org/en/0.13.0/\">sparse</a></b></code></td><td>ndarray functionality on sparse arrays (COO layout)</td><td></td></tr>\n", + " <tr><td><code><b><a href=\"https://sparse.pydata.org/en/0.14.0/\">sparse</a></b></code></td><td>ndarray functionality on sparse arrays (COO layout)</td><td></td></tr>\n", " <tr><td><code><b><a href=\"https://docs.scipy.org/doc/scipy/reference/sparse.html\">SciPy.sparse</a></b></code></td><td>ndarray functionality on sparse arrays (all layouts)</td><td></td></tr>\n", " </table>" ] @@ -355,9 +355,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -369,7 +369,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/03_ThinkVector.ipynb b/solutions/03_ThinkVector.ipynb index 00ff200..404514f 100644 --- a/solutions/03_ThinkVector.ipynb +++ b/solutions/03_ThinkVector.ipynb @@ -5,13 +5,14 @@ "metadata": { "slideshow": { "slide_type": "slide" - } + }, + "tags": [] }, "source": [ "# Think Vector\n", "\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "12 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -409,7 +410,7 @@ } }, "source": [ - "Functions that act on one array (or several arrays of the same shape) and return a vector of the same shape are called ``ufuncs``. When we wrote vw = v * w, we executed the ufunc \\__mul\\__. Functions, like ``dot`` that have a different output shape than input shape are called generalized ufuncs." + "Functions that act on one array (or several arrays of the same shape) and return a vector of the same shape are called ``ufuncs``. When we wrote vw = v * w, we executed the ufunc \\__mul\\__. Functions, like ``dot`` that have a different output shape than input shape are called ``generalized ufuncs``." ] }, { @@ -565,7 +566,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")\n", + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")\n", "print(\"|A-B| = %.3f\" % numpy.linalg.norm(A-B))" ] }, @@ -671,7 +672,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")\n", + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")\n", "print(\"|A-B| = %.3f\" % numpy.linalg.norm(A-B))\n" ] }, @@ -719,7 +720,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")\n", + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")\n", "print(\"|A-B| = %.3f\" % numpy.linalg.norm(A-B))\n", "A = B.copy()" ] @@ -759,7 +760,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")" + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")" ] }, { @@ -861,9 +862,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "source_hidden": true - }, "tags": [] }, "outputs": [], @@ -1280,9 +1278,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1294,7 +1292,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/04_Particle Dynamics.ipynb b/solutions/04_Particle Dynamics.ipynb index 86a7860..76c01d9 100644 --- a/solutions/04_Particle Dynamics.ipynb +++ b/solutions/04_Particle Dynamics.ipynb @@ -3,11 +3,16 @@ { "cell_type": "markdown", "id": "5451ef11-f683-4995-bda8-c9d87abaec49", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "# Particle Dynamics with Python\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "12 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -15,7 +20,12 @@ "cell_type": "code", "execution_count": null, "id": "5822f3b3-bc03-4e2f-85f1-57cb246e3a05", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "import math\n", @@ -27,7 +37,12 @@ "cell_type": "code", "execution_count": null, "id": "f7d1939b-7d73-4c0c-9d8a-d6ea39d48b49", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "%matplotlib inline" @@ -35,59 +50,183 @@ }, { "cell_type": "markdown", - "id": "19819e70-b42c-405a-958f-70c05a972ee6", - "metadata": {}, + "id": "b6798959-bbef-4f71-b696-e1069554c403", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ "Particle dynamics simulations are common in various scientific fields. They are used to simulate \n", "the formation of galaxies and the movements of molecules in a cell. Particles can have different\n", - "properties such as mass and charge and interact in different ways.\n", - "\n", + "properties such as mass and charge and interact in different ways." + ] + }, + { + "cell_type": "markdown", + "id": "9f9b8f9d-c834-4b86-9ef1-e385694d4b8c", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Equations of motion\n", "A classical particle dynamics code solves Newton's equation of motion:\n", "\n", - "$$\\mathbf F = m \\mathbf a,$$\n", - "\n", + "$$\\mathbf F = m \\mathbf a \\ \\ \\ \\ [\\mathtt{1}],$$" + ] + }, + { + "cell_type": "markdown", + "id": "2c250750-32b7-4a74-8c3e-5c3eb6c4a13d", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "where $\\mathbf F$ is the force, $m$ the mass, and $\\mathbf a$ the acceleration. $\\mathbf F$ and \n", "$\\mathbf a$ are vectors.\n", "\n", - "In general, this problem is solvable analytically for two particles only . If there are more \n", + "In general, this problem is solvable analytically for two particles only. If there are more \n", "particles, we have to look for a numerical solution.\n", "\n", - "You may remember that you can calculate the velocity $\\mathbf v$ of a particle as\n", - "\n", - "$$\\mathbf v(t + dt) = \\mathbf v(t) + \\mathbf a(t) dt$$\n", - "\n", - "and the position $\\mathbf r$ as\n", - "\n", - "$$\\mathbf r(t + dt) = \\mathbf r(t) + \\mathbf v(t)dt + \\frac 1 2 \\mathbf a(t) dt^2.$$\n", - "\n", + "You may remember that you can calculate the velocity $\\mathbf v$ of a particle as" + ] + }, + { + "cell_type": "markdown", + "id": "00ee5853-283f-4786-bd4c-81ca9ab7b3b2", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf v(t + dt) = \\mathbf v(t) + \\mathbf a(t) dt \\ \\ \\ \\ [\\mathtt{2}]$$" + ] + }, + { + "cell_type": "markdown", + "id": "a6e75808-f266-4a57-9837-5b9aa69ee436", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "and the position $\\mathbf r$ as" + ] + }, + { + "cell_type": "markdown", + "id": "27adecd9-7499-4a86-bb62-15dd40377c72", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf r(t + dt) = \\mathbf r(t) + \\mathbf v(t)dt + \\frac 1 2 \\mathbf a(t) dt^2 \\ \\ \\ \\ [\\mathtt{3}].$$" + ] + }, + { + "cell_type": "markdown", + "id": "35260044-1b70-46c5-8bfd-8475566037b4", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "If we know all the positions, velocities and masses at time $t$ and can calculate the forces, we \n", "can follow the motion of the particles over time." ] }, { "cell_type": "markdown", - "id": "50ad1731-c5b0-4922-adc8-14e507a7b6b8", - "metadata": {}, + "id": "0167c3d7-4abc-4635-b53d-aa38072ff922", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Gravitational force" + ] + }, + { + "cell_type": "markdown", + "id": "96292513-eaee-4617-bacd-4d13a1f6f8ab", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "## Gravitational force\n", "Let's assume our particles only interact via gravity. Then the force between two particles is given \n", - "by\n", - "\n", - "$$\\mathbf F_{ij}(t) = G\\frac{m_i m_j}{r_{ij}^2(t)} \\mathbf {\\hat r}_{ij}(t),$$\n", - "\n", + "by" + ] + }, + { + "cell_type": "markdown", + "id": "cbab8258-28f9-41db-9dda-7f4a5be57603", + "metadata": { + "tags": [] + }, + "source": [ + "$$\\mathbf F_{ij}(t) = G\\frac{m_i m_j}{r_{ij}^2(t)} \\mathbf {\\hat r}_{ij}(t) \\ \\ \\ \\ [\\mathtt{4}],$$" + ] + }, + { + "cell_type": "markdown", + "id": "c55acb8e-6cb4-459c-9241-9e42eb364b72", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "where $\\mathbf F_{ij}(t)$ is the force on particle $i$ due to particle $j$. $r_{ij}(t)$ is the \n", "distance between particles $i$ and $j$, and $\\mathbf {\\hat r}_{ij}(t)$ is the unit vector pointing\n", "from $i$ to $j$.\n", "\n", - "To get the total force on particle $i$, we need to sum over all $j \\neq i$:\n", - "\n", - "$$\\mathbf F_{i}(t) = \\sum_{j\\neq i} \\mathbf F_{ij}(t).$$" + "To get the total force on particle $i$, we need to sum over all $j \\neq i$:" + ] + }, + { + "cell_type": "markdown", + "id": "d36faa34-7345-4e94-b19b-62e4419417e0", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf F_{i}(t) = \\sum_{j\\neq i} \\mathbf F_{ij}(t) \\ \\ \\ \\ [\\mathtt{5}].$$" ] }, { "cell_type": "markdown", "id": "32f7c975-ed21-4c70-9168-5b7bfa5ca276", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "## The algorithm" ] @@ -107,8 +246,13 @@ }, { "cell_type": "markdown", - "id": "539c2d60-df7b-471b-a438-d9b4efb51781", - "metadata": {}, + "id": "efba7cbf-301a-4e5c-81d4-1394c5ec3c9f", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "## (Parallel) Patterns\n", "In Think Vector, we got to know some patterns. Let's see how we can apply them here:\n", @@ -124,7 +268,19 @@ " \n", "Calculate the new position:\n", " This is a map, too.\n", - " \n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "76d2db76-3bac-4465-9512-babcef5e721b", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "Now, let's try to express this in code.\n", " " ] @@ -133,7 +289,12 @@ "cell_type": "code", "execution_count": null, "id": "b4525c8a-378a-45b7-b1e2-b67f5f07d397", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "# Initialize positions and velocities\n", @@ -143,9 +304,11 @@ "dt = 0.1 # time step\n", "G = 1 # For simplicity we set the universal graviational constant to 1\n", "m = 1 # This corresponds to 150 x 10^9 kg\n", + "# random initial positions\n", "x = [random.uniform(-L2, L2) for i in range(N)]\n", "y = [random.uniform(-L2, L2) for i in range(N)]\n", "z = [random.uniform(-L2, L2) for i in range(N)]\n", + "# zero initial velocities\n", "vx = [0 for i in range(N)]\n", "vy = [0 for i in range(N)]\n", "vz = [0 for i in range(N)]" @@ -154,24 +317,62 @@ { "cell_type": "markdown", "id": "8fd053d2-8c88-4666-82ed-0316fe21ac34", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "### Calculating forces" ] }, { "cell_type": "markdown", - "id": "41861767-e08d-45b8-802a-28b269e3f7ee", - "metadata": {}, + "id": "ac5e70be-cafd-41cd-b866-5b98ee28fb0a", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "To calculate the forces (see eq. 4), we need a distance matrix, i.e. the distance $d_{ij}$ between each pair of particles ($r_{ij}$ in eq. 4). First we calculate the distance vector for each component, x, y, and z separately, this requires 3 map operations. Then we calculate from these vectors the pairwise distances in three dimensional space:" + ] + }, + { + "cell_type": "markdown", + "id": "c1d0d68d-23a4-45e1-a431-91e575056e21", + "metadata": { + "tags": [] + }, + "source": [ + "$$d=\\sqrt{dx^2+dy^2+dz^2} \\ \\ \\ \\ [\\mathtt{6}]$$" + ] + }, + { + "cell_type": "markdown", + "id": "0b29d4d1-b6ef-4615-ab11-0bed26267252", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "To calculate the force, we need the distance vector first. These are actually 3 maps (one for each component). The result is a distance matrix. As mentioned before maps are expressed as list generators:" + "(another map operation). As mentioned before maps can be expressed as list comprehensions:" ] }, { "cell_type": "code", "execution_count": null, "id": "338142b6-f973-4f7a-b5a4-77e76f3b758f", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ "Dxx = [(i - j) for j in x for i in x]\n", @@ -183,21 +384,31 @@ { "cell_type": "markdown", "id": "d0156a2d-13ae-46dd-b3a8-cb7eb1aca0bf", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "Now that we have the vector components and the magnitude of the vector, we can calculate the forces." + "Now that we have the vector components and the magnitudes of the vectors, we can calculate the forces (see eq. 4). We then sum all the forces acting on one particle for each particle (see eq. 5). Note that we also calculate the forces separately for each component." ] }, { "cell_type": "code", "execution_count": null, "id": "e841a076-504d-445b-b006-b931e3cb0bc2", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ - "Fxx = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dxx, D)] # epsilon prevents a zero in the dominator.\n", - "Fyy = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dyy, D)]\n", - "Fzz = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dzz, D)]\n", + "Fxx = [G * m * m * dxx / (d * d * d + epsilon) for dxx, d in zip(Dxx, D)] # epsilon prevents a zero in the dominator.\n", + "Fyy = [G * m * m * dyy / (d * d * d + epsilon) for dyy, d in zip(Dyy, D)]\n", + "Fzz = [G * m * m * dzz / (d * d * d + epsilon) for dzz, d in zip(Dzz, D)]\n", "Fx = [sum(Fxx[i * N: (i + 1) * N]) for i in range(N)]\n", "Fy = [sum(Fyy[i * N: (i + 1) * N]) for i in range(N)]\n", "Fz = [sum(Fzz[i * N: (i + 1) * N]) for i in range(N)]" @@ -206,19 +417,39 @@ { "cell_type": "markdown", "id": "3de052ac-7591-4477-8285-cc15c0019a7a", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "Let's visualize the forces on the particles:" ] }, + { + "cell_type": "markdown", + "id": "235e1971-24e0-4cf8-ac27-779e5ae37684", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "### Visualize forces" + ] + }, { "cell_type": "code", "execution_count": null, "id": "1133b4bb-111b-4aca-9326-22a7c29c8522", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "ax = plt.figure(figsize=(6, 6)).add_subplot(projection='3d')\n", + "ax = plt.figure(figsize=(5, 5)).add_subplot(projection='3d')\n", "ax.scatter3D(x, y, z)\n", "ax.quiver(x, y, z, Fx, Fy, Fz)" ] @@ -226,7 +457,12 @@ { "cell_type": "markdown", "id": "ccea23e5-4f4b-4ff6-b379-8d45e3fe15f4", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "### Integrating the equation of motion" ] @@ -234,9 +470,14 @@ { "cell_type": "markdown", "id": "dba27f9b-350e-4e65-9f42-e3615ee30a84", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "We are ready to update the positions and velocities of our particles:" + "We are ready to update the positions and velocities of our particles. First we use eq. 3 to calculate the new positions. Note that we substituted $\\bf{a}$ in eq. 3 by $\\frac{\\mathbf{F}}{m}$ using eq. 1. " ] }, { @@ -251,11 +492,29 @@ "z = [i + v * dt + 0.5 * f / m * dt * dt for i, v, f in zip(z, vz, Fz)]" ] }, + { + "cell_type": "markdown", + "id": "52959ed7-d454-40fb-98f1-9df161873c87", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "Using the same expression for $\\bf{a}$ as above we now use eq. 2 to calculate the new velocities:" + ] + }, { "cell_type": "code", "execution_count": null, "id": "2266d4e8-8f67-4979-ae47-abf8508673a4", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ "vx = [v + f / m * dt for v, f in zip(vx, Fx)]\n", @@ -266,11 +525,29 @@ { "cell_type": "markdown", "id": "e4cff076-759c-477c-9758-41bb730cd606", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "Let's take a look at the particle positions and velocities:" ] }, + { + "cell_type": "markdown", + "id": "92a88a32-4ee1-44ce-b371-afd412359a3b", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "### Visualize velocities" + ] + }, { "cell_type": "code", "execution_count": null, @@ -278,7 +555,7 @@ "metadata": {}, "outputs": [], "source": [ - "ax = plt.figure(figsize=(6, 6)).add_subplot(projection='3d')\n", + "ax = plt.figure(figsize=(5, 5)).add_subplot(projection='3d')\n", "ax.scatter3D(x, y, z)\n", "ax.quiver(x, y, z, vx, vy, vz)" ] @@ -286,15 +563,25 @@ { "cell_type": "markdown", "id": "65984f53-4b54-4f6d-aaa1-6de391150539", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ - "That's it. By going back to the [calculation of the forces](#Calculating-forces), we can follow the motion of the particles over time." + "That's it. By going back to the [calculation of the forces](#Calculating-forces) and iterating over the steps again, we can follow the motion of the particles over time." ] }, { "cell_type": "markdown", "id": "f1f30004-a9c3-4499-84e0-976937b9f8a8", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "## Exercise\n", "Rewrite the program in a vectorized manner using `ndarray`s." @@ -304,14 +591,23 @@ "cell_type": "code", "execution_count": null, "id": "039819a6-698f-43a6-a4f0-4f7b8852fbb1", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "8cb45f43-29e2-49df-a976-bf7790fe5a44", - "metadata": {}, + "metadata": { + "tags": [ + "Solution" + ] + }, "source": [ "### Solution:" ] @@ -321,6 +617,9 @@ "execution_count": null, "id": "ccfc4eca-c09e-448f-93db-2122be7b484c", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -331,11 +630,32 @@ "rng = numpy.random.Generator(numpy.random.MT19937())" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "599a1169-0356-4841-a495-2b113021c652", + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [ + "Solution" + ] + }, + "outputs": [], + "source": [ + "L2=5\n", + "N=50" + ] + }, { "cell_type": "code", "execution_count": null, "id": "405b8627-e887-4ec8-85cf-c391877c0b19", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -354,6 +674,9 @@ "cell_type": "markdown", "id": "0e3c0e11-dc33-46e0-a6da-05cb42ecfd9a", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -371,6 +694,9 @@ "execution_count": null, "id": "8d8cad39-dd89-4379-b549-64a67465db3f", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -388,6 +714,9 @@ "execution_count": null, "id": "ba5403cc-d3d0-46d8-a1bb-01f5056d0963", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -404,6 +733,9 @@ "execution_count": null, "id": "500347dc-2dfd-4481-a81b-4276cbc00863", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -420,6 +752,9 @@ "execution_count": null, "id": "e0745105-fa07-4054-8ddc-274de4a510f8", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -436,6 +771,9 @@ "execution_count": null, "id": "699fb1a4-349b-46ad-acfc-177465aade2a", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -452,6 +790,9 @@ "execution_count": null, "id": "3fe70091-fffd-4613-a798-1f9c54bfbfa4", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -464,19 +805,24 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "1f236119-af8c-499d-86cf-1d6b98f9e5fd", - "metadata": {}, - "outputs": [], - "source": [] + "cell_type": "markdown", + "id": "5a141c1e-22b6-40be-80d5-25ad2648972c", + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "source": [ + "Tipp: if your velocities are too small to see the directions of the velocity arrows, just scale vx,vy, and vz in the ax.quiver lines. Where do they point to? Can you do the same scaling in the list version above?" + ] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -488,7 +834,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/solutions/05_Profiling a simple md code.ipynb b/solutions/05_Profiling a simple md code.ipynb index a4f9897..eeca5b2 100644 --- a/solutions/05_Profiling a simple md code.ipynb +++ b/solutions/05_Profiling a simple md code.ipynb @@ -10,7 +10,7 @@ "source": [ "# Profiling\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -660,16 +660,23 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [ + "Solution" + ] + }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -681,7 +688,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/06_LocalParallel.ipynb b/solutions/06_LocalParallel.ipynb index 7124fa5..e716e9c 100644 --- a/solutions/06_LocalParallel.ipynb +++ b/solutions/06_LocalParallel.ipynb @@ -11,7 +11,7 @@ "# Interactive Parallel Computing with IPython Parallel\n", "\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -27,7 +27,7 @@ "\n", "Click on the ``+``-sign at the top of the Files tab on the left to start a new launcher. In the launcher click on Terminal. A terminal will open as a new tab. Grab the tab and pull it to the right to have the terminal next to your notebook.\n", "\n", - "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2219/hpcpy22`.\n", + "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2318/hpcpy23`.\n", "\n", "In the terminal type ``ipcluster``. You'll see the help message telling you that you need to give it subcommand. Take a look at the message and then enter \n", "\n", @@ -110,7 +110,7 @@ } }, "source": [ - "Now let's see how we access the \"Cluster\". [IPython][IP] comes with a module [ipyparallel][IPp] that is used to access the engines, we just started. We first need to import Client.\n", + "Now let's see how we access the \"Cluster\". Originally, [ipyparallel][IPp] was developed as a part of [IPython][IP]. In the meantime it's developed separately. It is used to access the engines, we just started. We first need to import Client.\n", "\n", "[IPp]: https://ipyparallel.readthedocs.io/en/latest/\n", "[IP]: http://www.ipython.org" @@ -369,7 +369,7 @@ "outputs": [], "source": [ "with rc[:].sync_imports():\n", - " import matplotlib.pyplot" + " import numpy.linalg" ] }, { @@ -377,7 +377,8 @@ "metadata": { "slideshow": { "slide_type": "notes" - } + }, + "tags": [] }, "source": [ "Unfortunately mapping of namespaces does not work that way." @@ -388,7 +389,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "source": [ "## Using the Direct View" @@ -473,7 +475,7 @@ }, "outputs": [], "source": [ - "%%px\n", + "%%px --local\n", "import threadpoolctl\n", "threadpoolctl.threadpool_limits(limits=32, user_api='blas')" ] @@ -523,8 +525,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Execute and Apply" @@ -545,9 +548,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "outputs": [], "source": [ @@ -559,8 +560,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -572,8 +574,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -585,8 +588,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -699,8 +703,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Remote functions" @@ -736,9 +741,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "outputs": [], "source": [ @@ -767,7 +770,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -779,7 +783,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "source": [ "A `remote` function, on the other hand just runs on each engine with the full set of data." @@ -820,8 +825,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Moving data around" @@ -830,9 +836,7 @@ { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "source": [ "So far the runtime has taken care of moving data to and from the engines, but we can do this explicitely. There are 4 commands to do that:\n", @@ -1029,6 +1033,20 @@ "y" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "type(y)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1048,7 +1066,7 @@ } }, "source": [ - "Latency (the time until something happens) and bandwidth (the amount of data we get through the network) are two important properties of your parallel system that define what is practical and what is not. We will use the ``%timeit`` magic to measure these properties. ``%timit`` and its sibbling ``%%timeit`` measure the run time of a statement (cell in the case of ``%%timeit``) by executing the statement multiple times (by default at least 3 times). For short running routines many loops of 3 executions are performed and the minimum time measured is then displayed. The number of loops and the number of executions can be adjusted. Take a look at the documentation. Give it a try." + "Latency (the time until something happens) and bandwidth (the amount of data we get through the network) are two important properties of your parallel system that define what is practical and what is not. We will use the ``%timeit`` magic to measure these properties. ``%timeit`` and its sibbling ``%%timeit`` measure the run time of a statement (cell in the case of ``%%timeit``) by executing the statement multiple times (by default at least 7 repeats). For short running routines a loop of many executions is performed per repeat and the minimum time measured is then displayed. The number of loops and the number of repeats can be adjusted. Take a look at the documentation. Give it a try." ] }, { @@ -1124,7 +1142,7 @@ }, "outputs": [], "source": [ - "%timeit dview.execute('')" + "%timeit -n 10 dview.execute('')" ] }, { @@ -1148,7 +1166,7 @@ }, "outputs": [], "source": [ - "%timeit dview.apply(lambda x : x, '')" + "%timeit -n 10 dview.apply(lambda x : x, '')" ] }, { @@ -1209,7 +1227,7 @@ }, "outputs": [], "source": [ - "%timeit dview.execute('')" + "%timeit -n 10 dview.execute('')" ] }, { @@ -1222,7 +1240,7 @@ }, "outputs": [], "source": [ - "%timeit dview.apply(lambda x : x, '')" + "%timeit -n 10 dview.apply(lambda x : x, '')" ] }, { @@ -1238,6 +1256,32 @@ "%timeit -n 1 -r 4 rc[0].execute('c = a.dot(b)')" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "source": [ + "Note, that the previous call was non-blocking since this is the default for ``execute`` and we have not specified anything else for the view rc[0]. The next line shows the blocking variant:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%timeit -n 10 -r 7 rc[0].execute('c = a.dot(b)', block=True)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1248,7 +1292,7 @@ }, "outputs": [], "source": [ - "%timeit a.dot(b)" + "%timeit -n 10 -r 7 a.dot(b)" ] }, { @@ -1272,7 +1316,7 @@ } }, "source": [ - "We can start about 500 parallel tasks per second and finish about a quarter as many. This gives an estimate of the granularity we need to use this model for efficient parallelization. Any task that takes less time than this will be dominated by the overhead." + "We can start about 2000 parallel tasks per second and finish about a tenth as many. This gives an estimate of the granularity we need to use this model for efficient parallelization. Any task that takes less time than this will be dominated by the overhead." ] }, { @@ -1322,15 +1366,15 @@ }, "outputs": [], "source": [ - "%timeit dview.push(dict(a=a))\n", - "%timeit dview.push(dict(a=a[:128*1024]))\n", - "%timeit dview.push(dict(a=a[:64*1024]))\n", - "%timeit dview.push(dict(a=a[:32*1024]))\n", - "%timeit dview.push(dict(a=a[:16*1024]))\n", - "%timeit dview.push(dict(a=a[:8*1024]))\n", - "%timeit dview.push(dict(a=a[:4*1024]))\n", - "%timeit dview.push(dict(a=a[:2*1024]))\n", - "%timeit dview.push(dict(a=a[:1024]))" + "%timeit -n 20 dview.push(dict(a=a))\n", + "%timeit -n 20 dview.push(dict(a=a[:128*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:64*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:32*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:16*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:8*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:4*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:2*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:1024]))" ] }, { @@ -1341,7 +1385,7 @@ } }, "source": [ - "Calculate the bandwidth for the largest array and the smallest array." + "Calculate the bandwidth for the largest array and the smallest array. Replace the numbers below with the time you measured." ] }, { @@ -1354,8 +1398,8 @@ }, "outputs": [], "source": [ - "bwmax = len(rc) * 256 * 8 / 9.8e-3\n", - "bwmin = len(rc) * 8 / 6.1e-3\n", + "bwmax = len(rc) * 256 * 8 / 9.83-3\n", + "bwmin = len(rc) * 8 / 4.25e-3\n", "print(\"The bandwidth is between %.2f kB/s and %.2f kB/s.\" %( bwmin, bwmax))" ] }, @@ -1478,25 +1522,26 @@ } }, "source": [ - "There are different ways to parallelize a matrix-matrix multiplication. Each element of the matrix can be calculated independently." + "There are different ways to parallelize a matrix-matrix multiplication. Each element of the matrix can be calculated independently, but this currently seems to crash the ipcluster, so we'll skip the execution." ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, - "outputs": [], "source": [ - "%%timeit \n", + "```ipython\n", + "%%timeit\n", "p = len(rc)\n", "# Distribute the elements of the result viewmatrix round robin.\n", - "C1h = [[rc[(i * n + j) % p].apply(np.dot, A[i,:], B[:,j]) for j in range(n)] for i in range(n)]\n", + "C1h = [[rc[(i * n + j) % p].apply(np.dot, A[i,:], B[:,j]) \n", + " for j in range(n)] for i in range(n)]\n", "# Wait until the calculation is done\n", - "dview.wait()\n" + "dview.wait()\n", + "```" ] }, { @@ -1507,7 +1552,7 @@ } }, "source": [ - "This, however, produces $n^2$ short tasks and the overhead (latency) is just overwhelming.\n", + "It produces $n^2$ short tasks and the overhead (latency) is just overwhelming.\n", "\n", "We want to calculate\n", "\n", @@ -1798,7 +1843,7 @@ "source": [ "Nothing says, we have to stop at 4 tiles nor do we have to use square tiles. We could also recursively subdivide our tiles.\n", "\n", - "The code is not any faster, because our implementation of numpy already blocks the matrices and uses all cores, but it shows the principle." + "The code is not any faster, because our implementation of numpy already blocks the matrices and uses all cores, but it shows the principle. Also, remember that we are transferring the data to the engines in every call!" ] }, { @@ -1812,9 +1857,9 @@ "metadata": { "celltoolbar": "Slideshow", "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1826,7 +1871,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/07_LocalTaskParallel.ipynb b/solutions/07_LocalTaskParallel.ipynb index 06251ec..bdd868d 100644 --- a/solutions/07_LocalTaskParallel.ipynb +++ b/solutions/07_LocalTaskParallel.ipynb @@ -60,6 +60,17 @@ "import numpy as np" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%px --local\n", + "import threadpoolctl\n", + "threadpoolctl.threadpool_limits(limits=32, user_api='blas')" + ] + }, { "cell_type": "code", "execution_count": null, @@ -326,6 +337,55 @@ "BlockMatrixMultiply?" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's increase the size of the matrix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n = 16384\n", + "A = np.random.random([n, n])\n", + "B = np.random.random([n, n])\n", + "C = np.dot(A, B)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit C=np.dot(A,B)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit BlockMatrixMultiply(A, B, n // 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit BlockMatrixMultiplyLB(A, B, lview, n)\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 2) # 4 tasks\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 4) # 16 tasks\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 8) # 64 tasks" + ] + }, { "cell_type": "code", "execution_count": null, @@ -336,9 +396,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -350,7 +410,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/08_Numba vectorize.ipynb b/solutions/08_Numba vectorize.ipynb index 4661df7..85f5eaa 100644 --- a/solutions/08_Numba vectorize.ipynb +++ b/solutions/08_Numba vectorize.ipynb @@ -11,7 +11,7 @@ "# Numba vectorize\n", "\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -597,9 +597,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -611,7 +611,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/09_NumbaIntro.ipynb b/solutions/09_NumbaIntro.ipynb index 01d6aca..f2f784c 100644 --- a/solutions/09_NumbaIntro.ipynb +++ b/solutions/09_NumbaIntro.ipynb @@ -11,7 +11,7 @@ "# Introduction to Numba's jit compiler\n", "\n", "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -23,7 +23,7 @@ } }, "source": [ - "Numba provides a just-in-time (jit) compiler, a decorator `vectorize` that we can use to define `ufunc`s that are fast and flexible, and an interface to CUDA- and ROCm-capable GPUs that allows us to write CUDA kernels in Python! In this notebook, we'll focus on the jit compiler." + "Numba provides a just-in-time (jit) compiler, a decorator `vectorize` that we can use to define `ufunc`s that are fast and flexible, and an interface to CUDA- and ROCm-capable GPUs that allows us to write GPU kernels in Python! In this notebook, we'll focus on the jit compiler." ] }, { @@ -54,13 +54,14 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy\n", - "from numba import jit\n", + "from numba import njit as jit\n", "from matplotlib import pyplot as plt " ] }, @@ -82,7 +83,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -104,13 +106,26 @@ "When we call `python_sum`, the interpreter goes through it line by line. For each item it has to interpret `res += x` and execute it, i.e., call apropriate C routines that have been compiled for the processor. The only requirements for `a` in this function are that it is iterable and its elements support the `+` operator. For the following little benchmark, we'll use an `ndarray` of random numbers." ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "The Faster CPython project is working on performance improvements for CPython. This includes inlining function calls and choosing special paths if the interpreter detects that types and objects are stable [PEP659](https://peps.python.org/pep-0659/)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -123,7 +138,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -136,7 +152,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -151,7 +168,7 @@ } }, "source": [ - "Please calculate the floating point operations per second for `python_sum`. Btw., remember the peak performance of a single core on JUWELS is about 40 GFLOP/s." + "Please calculate the floating point operations per second for `python_sum`. Btw., remember the peak performance of a single core on JUSUF is about 36 GFLOP/s." ] }, { @@ -270,7 +287,7 @@ } }, "source": [ - "Yes, there are good reasons to love Python (and other higher programming languages).\n", + "Yes, there are good reasons to love higher programming languages.\n", "\n", "Let's run the code:\n", "```\n", @@ -278,7 +295,7 @@ "Sum: 5033.24 in 0.717281 µs. 13941.5 MFLOP. \n", "```\n", "\n", - "The function takes about 0.7 µs. This is about 2000 times faster than the interpreted Python loop. \n", + "The function takes about 0.7 µs. This is more than 10,000 times faster than the interpreted Python loop. \n", "Wouldn't it be great if we could take the Python code in `python_sum` and compile it to machine \n", "code to get some of this speedup?" ] @@ -339,7 +356,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "numba_sum = jit(python_sum)" @@ -351,7 +370,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -375,7 +395,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -439,7 +460,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -463,7 +485,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -488,7 +511,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -507,7 +531,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -521,7 +546,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -534,7 +560,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -558,7 +585,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -578,7 +606,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -591,7 +620,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -604,7 +634,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -618,7 +649,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -631,7 +663,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -644,7 +677,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -662,7 +696,9 @@ }, "source": [ "### Exercise: prange\n", - "Numba can parallelize loops with ``prange``. Import ``prange`` from numba and change the range in row into a prange. You also need to add the arguments ``nopython=True`` and ``parallel=True`` to the jit decorator.\n", + "Numba can parallelize loops with ``prange``. Import ``prange`` from numba and change the range in row into a prange. You also need to add the ``parallel=True`` to the jit decorator.\n", + "\n", + "We imported ``njit`` as ``jit`` at the beginning of the notebooks since ``nopython=True`` will soon become the default. If you use ```from numba import jit``` you need to explicitly write ``nopython=True`` below.\n", "\n", "Rerun and compare.\n", "\n", @@ -683,9 +719,10 @@ "outputs": [], "source": [ "# Solution for the first part\n", + "from numba import njit as jit # use nopython=True as default\n", "from numba import prange\n", "\n", - "@jit(nopython=True, parallel=True)\n", + "@jit(parallel=True)\n", "def numba_mm_par(a,b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in prange(a.shape[0]):\n", @@ -725,7 +762,10 @@ "outputs": [], "source": [ "# Solution for the extra credit:\n", - "@jit(nopython=True, parallel=True)\n", + "from numba import njit as jit # use nopython=True as default\n", + "from numba import prange\n", + "\n", + "@jit(parallel=True)\n", "def numba_mm_kj_par(a,b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in prange(a.shape[0]):\n", @@ -770,7 +810,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -789,7 +830,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -802,7 +844,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -826,11 +869,12 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ - "@jit(nopython=True)\n", + "@jit\n", "def numba_mm3(a, b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in range(a.shape[0]):\n", @@ -845,7 +889,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -858,11 +903,12 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ - "@jit(nopython = True)\n", + "@jit\n", "def numba_mm4(a, b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in range(a.shape[0]):\n", @@ -878,7 +924,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -891,7 +938,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -1365,11 +1413,7 @@ "source": [ "This is much better. The `ps` at the end of `vaddps` stands for *packed single precision* indicating \n", "a SIMD instruction. The `ymm` registers used are 256 bits wide, which corresponds to 8 single precision\n", - "numbers at a time.\n", - "\n", - "Skylake-X also has `zmm` registers with a width of 512 bit or 16 single precision numbers, but when\n", - "they are used the maximum frequency of the processor is reduced. It can happen that the performance \n", - "using `ymm` registers at higher frequency is actually better." + "numbers at a time." ] }, { @@ -1458,9 +1502,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1472,7 +1516,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/10_Speeding up your code with Cython.ipynb b/solutions/10_Speeding up your code with Cython.ipynb index 6781081..b7f8e44 100644 --- a/solutions/10_Speeding up your code with Cython.ipynb +++ b/solutions/10_Speeding up your code with Cython.ipynb @@ -20,7 +20,7 @@ }, "source": [ "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -65,7 +65,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -78,7 +79,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -103,7 +105,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -121,7 +124,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -135,7 +139,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -149,7 +154,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -164,7 +170,7 @@ } }, "source": [ - "Elementwise access to NumPy arrays can in the meantime be just as fast as access for lists.\n", + "Elementwise access to NumPy arrays is often slower as elementwise access to lists.\n", "\n", "Now let us invoke Cython" ] @@ -175,7 +181,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -194,7 +201,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -208,7 +216,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -232,7 +241,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -277,7 +287,7 @@ } }, "source": [ - "The arguments `v` and `w` are very general. If we know, however, that we are only going to pass ndarrays of integers, we can be more specific:" + "The arguments `v` and `w` are very general. If we know that we are only going to pass ndarrays of integers, we can be more specific:" ] }, { @@ -286,7 +296,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -320,7 +331,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -344,7 +356,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -368,7 +381,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -403,7 +417,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -430,7 +445,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -454,7 +470,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -482,7 +499,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -507,7 +525,8 @@ "metadata": { "slideshow": { "slide_type": "-" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -548,7 +567,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -576,7 +596,7 @@ "source": [ "Since Cython generates compiled Python extensions, we can release the GIL and run things in parallel if we don't make calls to the Python API.\n", "\n", - "As we've seen our inner loop is free of any Python calls (the annotated code is white). Since OpenMP supports reductions, we can parallelize the loop using Cython's ``prange``. Within ``prange`` we have to explicitely release the GIL by setting ``nogil=True``. We also need to pass the compiler and linker flags for OpenMP." + "As we've seen our inner loop is free of any Python calls (the annotated code is white). Since OpenMP supports reductions, we can parallelize the loop using Cython's ``prange``. Within ``prange`` we have to explicitly release the GIL by setting ``nogil=True``. We also need to pass the compiler and linker flags for OpenMP." ] }, { @@ -585,7 +605,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -615,7 +636,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -798,9 +820,9 @@ } }, "source": [ - "So far we have used IPython and the Cython magic to build and test our extension within a notebook. Once we are satisfied and want to put our extension in production, we want to be able to build the extension without IPython. The recommended way to do that is to use `distutils` and a `setup.py` file.\n", + "So far we have used IPython and the Cython magic to build and test our extension within a notebook. Once we are satisfied and want to put our extension in production, we want to be able to build the extension without IPython. The recommended way to do that is to use the `setuptools` provided with Cython and a `setup.py` file. For details see the [documentation](https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#basic-setup-py).\n", "\n", - "Note that distutils has been marked as deprecated as of Python 3.10, but we are still using Python 3.9.x on our systems." + "Note that distutils has been marked as deprecated as of Python 3.10." ] }, { @@ -819,7 +841,7 @@ "metadata": {}, "source": [ "```python\n", - "from distutils.core import setup\n", + "from setuptools import setup\n", "from Cython.Build import cythonize\n", "setup(name=\"Sum of integers\",\n", " ext_modules=cythonize(\"sum.pyx\"),\n", @@ -855,8 +877,7 @@ "metadata": {}, "source": [ "```python\n", - "from distutils.core import setup\n", - "from distutils.extension import Extension\n", + "from setuptools import Extension, setup\n", "from Cython.Build import cythonize\n", "\n", "ext_modules = [\n", @@ -1358,9 +1379,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1372,7 +1393,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/11_Writing your own Python bindings.ipynb b/solutions/11_Writing your own Python bindings.ipynb index 79ec3b6..ec69f6d 100644 --- a/solutions/11_Writing your own Python bindings.ipynb +++ b/solutions/11_Writing your own Python bindings.ipynb @@ -16,7 +16,7 @@ "metadata": {}, "source": [ "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -92,7 +92,7 @@ "\n", "Wait until the build has finished and then continue with this notebook.\n", "\n", - "**Tip:** You can open a terminal from within JupyterLab by going to File->New->Terminal. To get the right environment in a terminal `source $PROJECT_training2119/hpcpy22`." + "**Tip:** You can open a terminal from within JupyterLab by going to File->New->Terminal. To get the right environment in a terminal `source $PROJECT_training2318/hpcpy23`." ] }, { @@ -222,7 +222,7 @@ } }, "source": [ - "What if word_frequency had been written Fortran?" + "What if word_frequency had been written in Fortran?" ] }, { @@ -268,7 +268,7 @@ "source": [ "### Exercise\n", "Use the terminal that you used earlier to run `build.sh` or open a new one. Make sure you are in the \n", - "tutorial directory. Source `hpcpy22` using `source $PROJECT/hpcpy22`. Change into code/textstats/ and compile \n", + "tutorial directory. Source `hpcpy23` using `source $PROJECT/hpcpy23`. Change into code/textstats/ and compile \n", "the file word_frequency.F90 with the following command:\n", "\n", "```bash\n", @@ -414,7 +414,7 @@ "source": [ "Now, the name of the function will always be `word_frequency`. `bind` takes as optional argument the name under which the function should be known to C: bind(c, name=\"wf\") would let us call the function as `wf(filename, word)` from C (and Python).\n", "\n", - "To learn more about [CFFI](https://bitbucket.org/cffi/cffi) look at it's [documentation](https://cffi.readthedocs.io/en/latest/)." + "To learn more about CFFI look at it's [documentation](https://cffi.readthedocs.io/en/latest/)." ] }, { @@ -1527,9 +1527,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1541,7 +1541,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/12_Introduction to MPI.ipynb b/solutions/12_Introduction to MPI.ipynb index 924eff5..dc37c65 100644 --- a/solutions/12_Introduction to MPI.ipynb +++ b/solutions/12_Introduction to MPI.ipynb @@ -11,7 +11,7 @@ "# Introduction to MPI\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -218,7 +218,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A slbio python3 hello_mpi.py " + "!srun --pty -n 4 -p batch -A training2318 --reservation tr2318-20230615-cpu python3 hello_mpi.py " ] }, { @@ -315,7 +315,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A training2219 --time 00:10:00 python3 hello_ptp.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 hello_ptp.py" ] }, { @@ -348,7 +348,7 @@ } }, "source": [ - "If you need to send data to another rank and receive data from the same rank, combining `Send` and `Recv` command is dangerous and easily leads to deadlocks. Use `Sendrecv` instead." + "If you need to send data to another rank and receive data from the same rank, combining `Send` and `Recv` commands is dangerous and easily leads to deadlocks. Use `Sendrecv` instead." ] }, { @@ -405,7 +405,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A training2219 --time 00:10:00 python3 hello_sendrecv.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 hello_sendrecv.py" ] }, { @@ -458,7 +458,7 @@ "tags": [] }, "source": [ - "Next, we'll sum up the partial results and then use sum up (`reduce`) the partial results:" + "Next, we'll sum up `a_partial` and then use `reduce` to sum up the partial results:" ] }, { @@ -558,7 +558,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_reduction.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_reduction.py" ] }, { @@ -580,7 +580,7 @@ } }, "source": [ - "`mpi4py` offers two version of many calls. The first one is written in uppercase. It uses memory buffers, e.g., `np.array`, and maps the call directly to the appropriate C call. The second version is written in lower case and takes arbitrary Python object. The result is given as the return value. Note, that for the uppercase versions all `a_partial` must have the same size!" + "`mpi4py` offers two version of many calls. The first one is written in uppercase. It uses memory buffers, e.g., `numpy.array`, and maps the call directly to the appropriate C call. The second version is written in lower case and takes arbitrary Python object. The result is given as the return value. Note, that for the uppercase versions all `a_partial` must have the same size!" ] }, { @@ -665,7 +665,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_upper.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_upper.py" ] }, { @@ -676,7 +676,7 @@ } }, "source": [ - "The following works independent of the size of a_partial:" + "The following code uses the lowercase versions of the calls and works independent of the size of a_partial:" ] }, { @@ -751,7 +751,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_lower.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_lower.py" ] }, { @@ -866,7 +866,7 @@ "%%writefile mpi_ptp2.py\n", "\n", "from mpi4py import MPI\n", - "import numpy as np\n", + "import numpy\n", "\n", "comm=MPI.COMM_WORLD\n", "rank = comm.Get_rank()\n", @@ -876,15 +876,29 @@ " exit(1)\n", "\n", "nRnd=10000\n", - "part = np.random.default_rng().normal(0.0, 1.0, nRnd)\n", + "part = numpy.random.default_rng().normal(0.0, 1.0, nRnd)\n", "minval=part.min()\n", "maxval=part.max()\n", - "globmin=np.zeros(1)\n", - "globmax=np.zeros(1)\n", + "globmin=numpy.zeros(1)\n", + "globmax=numpy.zeros(1)\n", "comm.Reduce(minval,globmin,MPI.MIN)\n", "comm.Reduce(maxval,globmax,MPI.MAX)\n", "if rank == 0:\n", - " print(\"np.ptp=n.d., mpi_ptp=%f\"%(globmax-globmin))" + " print(\"numpy.ptp=n.d., mpi_ptp=%f\"%(globmax-globmin))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Solution" + ] + }, + "outputs": [], + "source": [ + "!srun --pty -n 4 -A training2318 --time 00:10:00 python3 mpi_ptp1.py\n", + "!srun --pty -n 4 -A training2318 --time 00:10:00 python3 mpi_ptp2.py" ] }, { @@ -1128,7 +1142,7 @@ "3. Time the execution of the program from the second part of the exercise.\n", "\n", " a) Keep the size of the system constant and increase the number of ranks/domain, e.g., using 2, \n", - " 4, 8, and 16 ranks. How \n", + " 4, 8, and 16 ranks. How does the timing change?\n", " \n", " b) Keep the size of the domains constant, i.e., the total size is a multiple of the number of \n", " ranks. Again increase the number of ranks\n", @@ -1542,7 +1556,7 @@ "source": [ "Click on the ``+``-sign at the top of the Files tab on the left to start a new launcher. In the launcher click on Terminal. A terminal will open as a new tab. Grab the tab and pull it to the right to have the terminal next to your notebook.\n", "\n", - "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2219/hpcpy22`." + "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2318/hpcpy23`." ] }, { @@ -1560,7 +1574,7 @@ "\n", "```bash\n", "export OMP_NUM_THREADS=32\n", - "srun -n 4 -c 32 --ntasks-per-node 4 --time 00:30:00 -A training2219 ipengine start\n", + "srun -n 4 -c 32 --ntasks-per-node 4 --time 00:30:00 -A training2318 --reservation tr2318-20230615-cpu ipengine start\n", "```\n", "\n", "**Note**, you can can start the controller and the engines in separate terminals. That will keep the output separate." @@ -1934,13 +1948,20 @@ "source": [ "sum(sum_partial)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1952,7 +1973,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/solutions/13_Introduction to CuPy.ipynb b/solutions/13_Introduction to CuPy.ipynb index f28ade4..807eb34 100644 --- a/solutions/13_Introduction to CuPy.ipynb +++ b/solutions/13_Introduction to CuPy.ipynb @@ -10,7 +10,7 @@ "source": [ "# Introduction to CuPy\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>\n", "<img src=\"images/cupy.png\" style=\"float:right\">" ] @@ -134,7 +134,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul.py" + "!srun --pty -N 1 -p gpus -A training2318 --time 00:10:00 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul.py" ] }, { @@ -214,7 +214,21 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul_w_timing.py" + "!srun --pty -N 1 -p gpus -A training2318 --time 00:10:00 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul_w_timing.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!srun --pty -N 1 -p develgpus -A training2318 --time 00:10:00 python3 cupy_matrix_mul_w_timing.py" ] }, { @@ -433,7 +447,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul_w_timing2.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul_w_timing2.py" ] }, { @@ -492,7 +506,7 @@ }, "outputs": [], "source": [ - "!srun -p batch -n 1 -c 256 -A training2219 python numpy_matrix_mul_w_timing2.py" + "!srun -p batch -n 1 -c 256 -A training2318 --pty --reservation tr2318-20230615-cpu python3 numpy_matrix_mul_w_timing2.py" ] }, { @@ -651,7 +665,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_to_and_fro.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cupy_to_and_fro.py" ] }, { @@ -739,9 +753,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -753,7 +767,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/14_CUDA for Python.ipynb b/solutions/14_CUDA for Python.ipynb index 3d2e564..6c82362 100644 --- a/solutions/14_CUDA for Python.ipynb +++ b/solutions/14_CUDA for Python.ipynb @@ -11,7 +11,7 @@ "# Numba and GPUs\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -158,7 +158,7 @@ " return i\n", " return maxtime\n", "\n", - "if __name__ == \"__main__:\n", + "if __name__ == \"__main__\":\n", " import numpy\n", " x = numpy.linspace(-2, 2, 500)\n", " y = numpy.linspace(-1.5, 1.5, 375)\n", @@ -221,7 +221,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython mandelbrot_vectorize_cuda.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython mandelbrot_vectorize_cuda.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean():.3f}±{t_gpu.std():.3f} s.\")" ] @@ -344,7 +344,7 @@ "source": [ "GPUs were (and are) made to display graphics on your screen. It doesn't matter how quickly a GPU can update a single pixel. It's important how quickly it can update all of the pixels on the screen (more than 2 million on an HD display). In addition it often must perform the same operation on a lot of vertices or pixels. \n", "\n", - "These two conditions let to a different execution model." + "These two conditions led to a different execution model." ] }, { @@ -937,7 +937,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot1.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot1.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1078,7 +1078,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot2.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot2.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1207,7 +1207,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot3.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot3.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1459,7 +1459,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot4.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot4.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1581,7 +1581,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cuda_matrixmul.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cuda_matrixmul.py" ] }, { @@ -1752,9 +1752,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1766,7 +1766,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/solutions/15_CUDA and MPI.ipynb b/solutions/15_CUDA and MPI.ipynb index c8fde4a..982c4a3 100644 --- a/solutions/15_CUDA and MPI.ipynb +++ b/solutions/15_CUDA and MPI.ipynb @@ -11,7 +11,7 @@ "# CUDA for Python and MPI4Py\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -338,6 +338,57 @@ " main()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Solution" + ] + }, + "outputs": [], + "source": [ + "!srun -n 4 -c 32 -p gpus -A training2318 python3 parallel_shift.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Picking a device" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + } + }, + "source": [ + "As you see from the example above, there's nothing special when using MPI with GPUs. The one thing that might bite you is using *multiple* MPI ranks for *multiple* GPUs on a *single* node. In this case, you might have to tell your MPI rank which GPU to use.\n", + "\n", + "If you have, for example, 4 GPUs and you know that your scheduler chooses a compact configuration, i.e., rank 0, 1, 2, 3 are on the first node, rank 4, 5, 6, 7 are on the second node, etc., you can use you rank to assign a GPU to your process:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "source": [ + "```python\n", + "\n", + "cuda.select_device(my_rank % number_of_gpus_per_node)\n", + "```" + ] + }, { "cell_type": "code", "execution_count": null, @@ -409,44 +460,6 @@ " main()" ] }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "## Picking a device" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, - "source": [ - "As you see from the example above, there's nothing special when using MPI with GPUs. The one thing that might bite you is using *multiple* MPI ranks for *multiple* GPUs on a *single* node. In this case, you might have to tell your MPI rank which GPU to use.\n", - "\n", - "If you have, for example, 4 GPUs and you know that your scheduler chooses a compact configuration, i.e., rank 0, 1, 2, 3 are on the first node, rank 4, 5, 6, 7 are on the second node, etc., you can use you rank to assign a GPU to your process:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "```python\n", - "\n", - "cuda.select_device(my_rank % number_of_gpus_per_node)\n", - "```" - ] - }, { "cell_type": "markdown", "metadata": { @@ -493,6 +506,7 @@ }, "outputs": [], "source": [ + "%%writefile cuda_mpi_mandelbrot.py\n", "# Solution for calculating the Mandelbrot set on 4 GPUs/node\n", "import numpy\n", "import mpi4py.MPI as MPI\n", @@ -584,9 +598,20 @@ "slideshow": { "slide_type": "skip" }, - "tags": [] + "tags": [ + "Solution" + ] }, "outputs": [], + "source": [ + "!srun -n 4 -c 32 -p gpus -A training2318 python3 cuda_mpi_mandelbrot.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [] }, { @@ -655,7 +680,7 @@ "import cupy\n", "# Create an array with N * number_of_ranks elements\n", "N = 1000\n", - "a_partial = cup.empty(N)\n", + "a_partial = cupy.empty(N)\n", "if my_rank == 0:\n", " a = cupy.random.random(N * number_of_ranks)\n", "else:\n", @@ -768,12 +793,12 @@ " block = 256\n", " grid = N // block if N % block == 0 else N // block + 1 \n", " shift[grid, block](-0.75, a_partial)\n", - " print(f\"[{my_rank}] The average of a_partial is {cupy.mean(a_partial):.3f}\")\n", + " print(f\"[{my_rank}] The average of a_partial after shifting is {cupy.mean(a_partial):.3f}\")\n", " # Collect the data again on rank 0\n", " comm.Gather(a_partial, a, root = 0) \n", "\n", " if my_rank == 0:\n", - " print(\"The average of a is %.2f\" % cupy.mean(a)) # Result should be near zero.\n", + " print(\"The average of a after shifting is %.2f\" % cupy.mean(a)) # Result should be near zero.\n", " \n", " \n", "if __name__ == \"__main__\":\n", @@ -791,15 +816,22 @@ }, "outputs": [], "source": [ - "!srun -p gpus -n 4 -A training2219 xenv -L mpi-settings/CUDA python cuda_aware_mpi_shift.py" + "!srun -p gpus -n 4 -A training2318 --reservation tr2318-20230615-gpu xenv -L mpi-settings/CUDA python3 cuda_aware_mpi_shift.py" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -811,7 +843,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/solutions/16_Introduction to Dask.ipynb b/solutions/16_Introduction to Dask.ipynb index 2aedd85..c57ff9a 100644 --- a/solutions/16_Introduction to Dask.ipynb +++ b/solutions/16_Introduction to Dask.ipynb @@ -11,7 +11,7 @@ "# Introduction to Dask\n", "\n", "<div class=\"dateauthor\">\n", - "10 June 2021 | Olav Zimmermann\n", + "16 June 2023 | Olav Zimmermann\n", "</div>" ] }, @@ -170,9 +170,9 @@ }, "source": [ "Poll:\n", - "- minimal walltime: A) 3s B) 4s C) 6s D) 6.5s\n", - "- how many tasks in task graph: A) 1 B) 8 C) 16 D) 18 \n", - "- max tasks in the same time as 8 tasks: A) 8 B) 24 C) 48 D) 96" + "- minimal walltime: A) 3s B) 4.5s C) 6s D) 6.5s\n", + "- how many tasks in task graph: A) 10 B) 18 C) 25 D) 32 \n", + "- max tasks in the same time as 8 tasks: A) 8 B) 48 C) 96 D) 256" ] }, { @@ -183,7 +183,7 @@ } }, "source": [ - "The task graph generated by `dask` can be visualized (don't try this for large graphs!)." + "The task graph generated by `dask` can be visualized (don't try this for large graphs, i.e. more input tasks!)." ] }, { @@ -236,7 +236,8 @@ "slide_type": "skip" }, "tags": [ - "Poll" + "Poll", + "Solution" ] }, "source": [ @@ -301,7 +302,8 @@ "source": [ "l=[x for x in range(1000000)]\n", "s= db.from_sequence(l,npartitions=4) # you can manually set the number of partitions\n", - "mysum=s.fold(add) # fold performs a parallel reduction " + "mysum=s.fold(add) # fold performs a parallel reduction \n", + "mysum.dask # another inpection method for task graphs in dask" ] }, { @@ -324,7 +326,7 @@ "outputs": [], "source": [ "%time result=mysum.compute()\n", - "result=mysum.compute\n", + "result=mysum.compute()\n", "result" ] }, @@ -338,8 +340,7 @@ }, "outputs": [], "source": [ - "%time r=list(s.filter(lambda x: x % 2 == 0).map(lambda x: x * 1.2))\n", - "r[:5] #note: apparently no type coercion!" + "%time r=list(s.filter(lambda x: x % 2 == 0).map(lambda x: x * 1.2))" ] }, { @@ -363,7 +364,8 @@ "source": [ "**Exercise:**\n", "\n", - "Code the same operations without dask, i.e. using a) just python and b) using numpy and measure the runtime of the calculations. \n", + "Code the same operations without dask, i.e. using a) just python and b) using numpy and measure the runtime of the calculations.\n", + "Make sure to return a list in all cases.\n", "\n", "Conclusions? " ] @@ -537,9 +539,25 @@ }, "outputs": [], "source": [ - "%%timeit import numpy as np\n", + "# %%timeit \n", + "import numpy as np\n", "arr=np.array(l)\n", - "out2=(arr[arr%2==0]*1.2).tolist()" + "out2=(arr[arr%2==0]*1.2).tolist()\n", + "out2[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [ + "Solution" + ] + }, + "source": [ + "Take home msgs: \n", + "- a) parallelization has overhead, simple calculations on the data are often slower than their single threaded counterparts.\n", + "- b) dask bag is a flexible but slow container that is appropriate for parallel processing of file lists or text but not for numeric calculations on lists\n", + "- c) creating data structures can be expensive (`dask.bag.from_sequence` needs almost 500ms to convert the list to a dask bag." ] }, { @@ -585,7 +603,7 @@ "source": [ "## dask.array\n", "\n", - "**`dask.dataframe`** is the distributed equivalent of numpy ndarray." + "**`dask.array`** is the distributed equivalent of numpy ndarray." ] }, { @@ -681,7 +699,7 @@ "outputs": [], "source": [ "x_dask = da.random.normal(10, 0.1, size=(10000,3000), chunks=(5000,3000)) # using as many chunks as CPU cores is good for random number calculation\n", - "x_rechunked=x_dask.rechunk((1000,3000)) # larger chunks are no longer better for dot product calculation\n", + "x_rechunked=x_dask.rechunk((2500,3000)) # larger chunks are no longer better for dot product calculation\n", "y_dask = x_rechunked.transpose()\n", "result=x_dask.dot(y_dask)\n", "#with ProgressBar():\n", @@ -709,7 +727,7 @@ } }, "source": [ - "`dask.distributed` features a sophisticated **web-based monitoring** based on the package `bokeh`. See **Dashboard** when you started the client above that shows the address and port of the web server." + "`dask.distributed` features a sophisticated **web-based monitoring** based on the package `bokeh`." ] }, { @@ -775,9 +793,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2021", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy21" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -789,7 +807,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/solutions/17_Debugging.ipynb b/solutions/17_Debugging.ipynb index 22117b2..dbd87d3 100644 --- a/solutions/17_Debugging.ipynb +++ b/solutions/17_Debugging.ipynb @@ -6,17 +6,13 @@ "source": [ "# Debugging Python\n", "<div class=\"dateauthor\">\n", - "07 June 2021 | Jan H. Meinke\n", + "06 June 2023 | Jan H. Meinke, Olav Zimmermann\n", "</div>" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ "What do you do if a program doesn't produce the results you want? You can stare at the code and try to figure out the mistake. You can add lots of print statements to your code. Or you can use a debugger.\n", "\n", @@ -25,221 +21,184 @@ }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "Debugging has its own terminology: You step in and out of functions. You move up and down the stack. You set break points, inspect variables, etc. This is the basic functionality that every debugger should (and every debugger I know does) support.\n", - "\n", - "In this notebook, we'll look at debugging a program with PDB in the notebook and pudb in a terminal window. You'll learn how to start a debugging session and do all the things, I talked about in the previous paragraph." + "### _\"Debuggers don't remove bugs. They only show them in slow motion.\"_ (Unknown)" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## PDB" + "Debugging has its own terminology: You step in and out of functions. You move up and down the call stack. You set break points, inspect variables, etc. This is the basic functionality that every debugger should (and every debugger we know does) support.\n", + "\n", + "In this notebook, we'll introduce several different debuggers. We'll debug code within a notebook cell with the builtin debugger of JupyterLab as well as with PDB. Then we will use pudb to debug a program in a terminal window. You'll learn how to start a debugging session and do all the things, described in the previous paragraph." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "Python comes with its own debugger called \"The Python debugger\" (pdb). PDB is available from within a notebook, but it's not very convenient to use." + "## Runtime debugging with the JupyterLab builtin debugger" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "source": [ - "Let's take the following function, which contains a bug and throws an exception." + "Before running the following cell try to guess what will happen: will it throw an error or a warning or will it execute normally? \n", + "If it is one of the latter two cases, what will it print?" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, + "metadata": {}, "outputs": [], "source": [ - "#%%writefile buggy.py\n", - "def imabuggyincrement(i,a):\n", - " \"\"\"Increment a[i] by 1.\"\"\"\n", - " if ii < len(a):\n", - " a[i] += 1;\n", + "a,b,c,d,e=range(5)\n", + "from numpy import *\n", + "f=array([a,b,c,d,e], dtype=int)\n", + "def doubleme(input_array):\n", + " result=input_array*2\n", + " return result\n", + "def doublesummer(input_vec):\n", + " result=doubleme(input_vec)\n", + " result=result.sum()\n", + " return result\n", + "print(f'The result is {doublesummer(f)}.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using a debugger to execute a code (or part of it) step by step is also called **runtime debugging**. \n", "\n", - "a = list(range(10))\n", - "ii = 4\n", - "imabuggyincrement(10, a)" + "You can switch on JupyterLab's internal debugger by clicking on the small bug icon at the top right of the notebook, next to the kernel name. You will see several panels appear in the right sidebar. In addition, each code cell of the notebook now got line numbers.\n", + "\n", + "Click on the line number of line 11 in the code cell above. A red dot appearing in front of the line number indicates that you just set a **break point**. At a break point the debugger will stop, allowing you to inspect the state of each variable that is defined at this point. To start the debugger and let it execute the code up to the break point just re-execute the cell [Shift-Return].\n", + "\n", + "The navigation symbols at the top of the CallStack panel will now no longer be grayed out and allow you to execute the code line by line. With \"next\" you step over function calls within the line. With \"step in\" you can jump into the python functions called in this line of code (but not into any C library functions).\n", + "\n", + "The \"Variables\" panel allows you to view either the global or the local variables and to switch between tree and table view. (for arrays the table view is preferable)\n", + "\n", + "**Exercise:** Try to find the bug in the code above. You can set a break point at any line. In case that you want to reset the kernel use the circle arrow button at the top of the notebook.\n", + "\n", + "**Note:** The builtin debugger interface is a very recent addition to JupyterHub and only provides very limited functionality and convenience.\n" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, + "metadata": {}, "source": [ - "## Debug magic" + "## Post mortem debugging with PDB" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "If a cell has just thrown an exception, you can inspect it with the %debug magic. Try `help` to see the available commands. Type `exit` to leave the debugger." + "If a program fails, you can no longer execute the code step by step. Nevertheless, the debugger can help you to inspect the state of the code at the time of failure. This usage is also called **post mortem debugging**. Python comes with its own debugger called \"The Python debugger\" (pdb). PDB is also available from within a notebook, but it's not very convenient to use." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Use `p i` to print the value of `i`. You can also try to print out the value of `a[i]` using `p a[i]`. Inspect the other variables. Do you see what went wrong?" + "Let's take the following function, which contains a bug and throws an exception. **(Please switch off the internal debugger before executing the cell!)**" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, + "metadata": {}, "outputs": [], "source": [ - "%debug" + "#%%writefile buggy.py\n", + "def imabuggyincrement(i,a):\n", + " \"\"\"Increment a[i] by 1.\"\"\"\n", + " if ii < len(a):\n", + " a[i] += 1;\n", + "\n", + "a = list(range(10))\n", + "ii = 4\n", + "imabuggyincrement(10, a)" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Available debuggers" + "### The %debug magic of pdb for notebooks" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, + "metadata": {}, "source": [ - "* pdb (builtin)\n", - "* pudb\n", - "* IDEs (All the IDEs we mentioned have debugging support)" + "The cell above has just thrown an exception and within a notebook you can use the `%debug` magic provided by pdb to inspect it. Try `help` to see the available commands. Type `exit` to leave the debugger." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "source": [ - "Uncomment the ``%%writefile`` magic before the function defintion of ``imabuggyincrement`` and execute the cell again so that it gets written to file buggy.py" + "Use `p i` to print the value of `i`. You can also try to print out the value of `a[i]` using `p a[i]`. Inspect the other variables. Do you see what went wrong?" ] }, { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "Next start pudb in a terminal with the script name as an argument. If you haven't done this in this terminal shell before, you need to source hpcpy20:" + "%debug" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "```bash\n", - "source hpcpy21\n", - "pudb3 buggy.py\n", - "```" + "## Debugging a program with pudb" ] }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "source": [ - "We'll give you a short demonstration and then you can play with it for a little while." + "Uncomment the ``%%writefile`` magic before the function defintion of ``imabuggyincrement`` and execute the cell again so that it gets written to file buggy.py" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Remote debugging" + "Next start pudb in a terminal with the script name as an argument. If you haven't done this in this terminal shell before, you need to source hpcpy23:" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, + "metadata": {}, "source": [ - "For example, PyDev, Wing Personal, Visual Studio, and PyCharm Professional (199 €/a with perpetual fallback license) support remote debugging. It can also be done with the ``ptvsd`` and Visual Studio Code." + "```bash\n", + "source $PROJECT_training2318/hpcpy23\n", + "pudb buggy.py\n", + "```" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Debugging Python extensions\n", - "We'll talk about this more tomorrow (maybe)." + "We'll give you a short demonstration and then you can play with it for a little while." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ "## Note\n", "\n", @@ -248,21 +207,15 @@ }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "A better way to check for bounds as I did in `iambuggyincrement` is not to do it at all but use a try...except statement instead:" + "Another way to check for bounds as the one in `imabuggyincrement` is not to do it at all but use a try...except statement instead:" ] }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "slide" - } + "tags": [] }, "source": [ "```python\n", @@ -274,7 +227,7 @@ " pass\n", " \n", "def main(arg=[]):\n", - " a = list(range(10)\n", + " a = list(range(10))\n", " ii = 4 # Now this is limited to the scope of main()\n", " imabuggyincrement(10, a)\n", " \n", @@ -283,13 +236,63 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that you should only use the `except` statement together with `pass` in cases where you expect a certain type of error but can't control the circumstances that lead to that error. This pattern effectively hides an error state of the program and could lead to unwanted side effects if used carelessly." + ] + }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, + "source": [ + "## Overview: debuggers for Python" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* [pdb][] (builtin)\n", + "* [pudb][]\n", + "* IDEs (All the IDEs we mentioned have debugging support)\n", + "* [Linaro DDT][], former name ARMForge DDT (commercial, support for debugging parallel codes and C/C++ code, only rudimentary Python support)\n", + "* [TotalView][] (commercial, support for debugging parallel codes and C/C++ code, requires debug version of CPython, supports mixed language debugging, aware of cython, pybind11 and other bindings)\n", + "\n", + "[pdb]: https://docs.python.org/3/library/pdb.html\n", + "[pudb]: https://github.com/inducer/pudb\n", + "[Linaro DDT]: https://www.linaroforge.com/linaroDdt/\n", + "[ARMForge DDT]: https://developer.arm.com/tools-and-software/server-and-hpc/debug-and-profile/arm-forge/arm-ddt\n", + "[TotalView]: https://help.totalview.io/current/HTML/index.html#page/TotalView/totalviewlhug-python.13.01.html#ww1893192" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Remote debugging" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For example, PyDev, Wing Personal, Visual Studio, and PyCharm Professional (199 €/a with perpetual fallback license) support remote debugging. It can also be done with the ``ptvsd`` and Visual Studio Code." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Debugging Python extensions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ "The following video shows how to debug mixed Python and C++ code using Visual Studio.\n", "\n", @@ -299,35 +302,27 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.display import YouTubeVideo\n", "\n", - "YouTubeVideo(\"D9RlT06a1EI\", start=300)" + "YouTubeVideo(\"KhuMRDY4BeU\")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2021", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "hpcpy21" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -339,7 +334,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/solutions/build.sh b/solutions/build.sh index bd4b6a6..df60a47 100755 --- a/solutions/build.sh +++ b/solutions/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -source $PROJECT_training2219/hpcpy22 +source $PROJECT_training2318/hpcpy23 # Build points pushd code/point rm -rf build diff --git a/solutions/code b/solutions/code index 2edff26..c787d1e 120000 --- a/solutions/code +++ b/solutions/code @@ -1 +1 @@ -../code \ No newline at end of file +../code/ \ No newline at end of file diff --git a/solutions/data b/solutions/data index 4909e06..eed2d0b 120000 --- a/solutions/data +++ b/solutions/data @@ -1 +1 @@ -../data \ No newline at end of file +../data/ \ No newline at end of file diff --git a/solutions/hpcpy23 b/solutions/hpcpy23 new file mode 100755 index 0000000..447fd63 --- /dev/null +++ b/solutions/hpcpy23 @@ -0,0 +1,26 @@ +#!/bin/bash +module purge +module load Stages/2023 +module load GCC +module load ParaStationMPI +module load CMake +module load Graphviz +module load SciPy-Stack +module load numba +module load dask +module load mpi4py +module load h5py +#module load Jupyter +module load CUDA +module load cuTENSOR +module load NCCL +module load cuDNN +#export NUMBAPRO_NVVM=$CUDA_HOME/nvvm/lib64/libnvvm.so +#export NUMBAPRO_LIBDEVICE=$CUDA_HOME/nvvm/libdevice +export LD_LIBRARY_PATH=/p/project/training2318/resources/code/text_stats/build:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/p/project/training2318/resources/code/point/build:$LD_LIBRARY_PATH +export PYTHONPATH=/p/project/training2318/packages/lib/python3.10/site-packages:$PYTHONPATH +export PATH=/p/project/training2318/packages/bin:$PATH +export HPCPY2023=1 +#exec $(which python) -m ipykernel $@ + -- GitLab