diff --git a/00_Introduction to IPython.ipynb b/00_Introduction to IPython.ipynb index 7d3b2c3fd9ab2a40d4d0560f485f80f9a2fb30f4..f98177502321cf4127543a0724b5ca2b4862731a 100644 --- a/00_Introduction to IPython.ipynb +++ b/00_Introduction to IPython.ipynb @@ -20,7 +20,7 @@ }, "source": [ "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "12 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -169,9 +169,7 @@ "tags": [] }, "outputs": [], - "source": [ - "import random" - ] + "source": [] }, { "cell_type": "markdown", @@ -549,47 +547,35 @@ }, "outputs": [], "source": [ - "# %load http://matplotlib.org/mpl_examples/mplot3d/surface3d_demo.py\n", - "'''\n", - "======================\n", - "3D surface (color map)\n", - "======================\n", + "# %load https://matplotlib.org/stable/_downloads/0c69e8950c767c2d95108979a24ace2f/surface3d_simple.py\n", "\n", - "Demonstrates plotting a 3D surface colored with the coolwarm color map.\n", - "The surface is made opaque by using antialiased=False.\n", + "\"\"\"\n", + "=====================\n", + "3D surface\n", + "=====================\n", "\n", - "Also demonstrates using the LinearLocator and custom formatting for the\n", - "z axis tick labels.\n", - "'''\n", - "\n", - "from mpl_toolkits.mplot3d import Axes3D\n", + "See `~mpl_toolkits.mplot3d.axes3d.Axes3D.plot_surface`.\n", + "\"\"\"\n", "import matplotlib.pyplot as plt\n", "from matplotlib import cm\n", - "from matplotlib.ticker import LinearLocator, FormatStrFormatter\n", "import numpy as np\n", "\n", + "# plt.style.use('_mpl-gallery')\n", "\n", - "fig = plt.figure()\n", - "ax = fig.gca(projection='3d')\n", - "\n", - "# Make data.\n", + "# Make data\n", "X = np.arange(-5, 5, 0.25)\n", "Y = np.arange(-5, 5, 0.25)\n", "X, Y = np.meshgrid(X, Y)\n", "R = np.sqrt(X**2 + Y**2)\n", "Z = np.sin(R)\n", "\n", - "# Plot the surface.\n", - "surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,\n", - " linewidth=0, antialiased=False)\n", - "\n", - "# Customize the z axis.\n", - "ax.set_zlim(-1.01, 1.01)\n", - "ax.zaxis.set_major_locator(LinearLocator(10))\n", - "ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))\n", + "# Plot the surface\n", + "fig, ax = plt.subplots(subplot_kw={\"projection\": \"3d\"})\n", + "ax.plot_surface(X, Y, Z, vmin=Z.min() * 2, cmap=cm.Blues)\n", "\n", - "# Add a color bar which maps values to colors.\n", - "fig.colorbar(surf, shrink=0.5, aspect=5)\n", + "ax.set(xticklabels=[],\n", + " yticklabels=[],\n", + " zticklabels=[])\n", "\n", "plt.show()\n" ] @@ -625,9 +611,9 @@ } }, "source": [ - "IPython has two ways of moving around in the directory tree: ``%cd`` and ``%pushd/%popd``. Both retain their history. ``%cd``'s history is available through ``%dhist`` whereas ``%dirs`` shows the directory stack of ``%pushd/%popd``. The ``%cd `` command has some nifty options, for example, ``%cd -2`` gets you to the second to last visited directory and ``%cd --foo`` switches to the next directory in the history than contains ``foo``. You can also set ``%bookmark``s and use them with ``%cd``.\n", + "IPython has two ways of moving around in the directory tree: ``%cd`` and ``%pushd/%popd``. Both retain their history. ``%cd``'s history is available through ``%dhist`` whereas ``%dirs`` shows the directory stack of ``%pushd/%popd``. The ``%cd `` command has some nifty options, for example, ``%cd -2`` gets you to the second to last visited directory. You can also set ``%bookmark``s and use them with ``%cd``.\n", "\n", - "Make a new sub directory called scripts/mandelbrot using ``%mkdir -p scripts/mandelbrot``. Change into the directory scripts/mandelbrot using ``%cd``. Go two levels up using ``%cd ..`` twice. Look at the history using ``%dhist``. Change into mandelbrot using ``%cd --brot``. Finally use ``%cd -0`` to get back to where you started from." + "Make a new sub directory called scripts/mandelbrot using ``%mkdir -p scripts/mandelbrot``. Change into the directory scripts/mandelbrot using ``%cd``. Go one level up using ``%cd ..``. Look at the history using ``%dhist``. Finally use ``%cd -0`` to get back to where you started from." ] }, { @@ -772,7 +758,8 @@ }, "outputs": [], "source": [ - "a = Out[13] # Assign Out[?] to a (replace with index from two cells above)" + "a = Out[13] # Assign Out[?] to a (replace with index from two cells above)\n", + "a" ] }, { @@ -916,9 +903,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -930,7 +917,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.6" } }, "nbformat": 4, diff --git a/01_Bottlenecks.ipynb b/01_Bottlenecks.ipynb index d8ebdc8955b440b4cd8aede0d469d154ec2e1984..742c473076d110c9458a601cfa6992196aba7511 100644 --- a/01_Bottlenecks.ipynb +++ b/01_Bottlenecks.ipynb @@ -11,7 +11,7 @@ "# Bottlenecks\n", "\n", "<div class=\"dateauthor\">\n", - "20 Jun 2022 | Jan H. Meinke\n", + "12 Jun 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -443,7 +443,7 @@ }, "outputs": [], "source": [ - "from numba import jit\n", + "from numba import njit as jit\n", "jdot = jit(dot)" ] }, @@ -511,7 +511,7 @@ }, "outputs": [], "source": [ - "import numpy; from numba import jit\n", + "import numpy; from numba import njit as jit\n", "\n", "@jit\n", "def dot2(a, b):\n", @@ -545,7 +545,7 @@ } }, "source": [ - "Now, elements in b are accessed in the proper order and a[i, k] is constant for the loop. This changes our estimate, because, now we read 8 bytes/op in the innermost loop. This gives us a maximum of 190 GB/s / 8 bytes/op = 24 Gop/s (48 GFLOP/s)." + "Now, elements in b are accessed in the proper order and a[i, k] is constant for the loop. This changes our estimate, because, now we read 8 bytes/op in the innermost loop. This gives us a maximum of 190 GB/s / 8 bytes/op = 24 Gop/s (48 GFLOP/s) making this compute bound on a single core." ] }, { @@ -716,6 +716,13 @@ "print(2e-9 * n**3 / t_numpy_single.best, \"GFLOP/s (single core).\") " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The maximum clock frequency of the processor is 3.4 GHz, which corresponds to a peak performance of about 54 GFLOP/s. This is pretty close." + ] + }, { "cell_type": "code", "execution_count": null, @@ -871,9 +878,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -885,7 +892,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/02_NumPy_concepts.ipynb b/02_NumPy_concepts.ipynb index 0a0138d7aef76a0295373327e4ff55a487c64546..743b74bb52b32d5fc47f18f8e4c60abf0222d974 100644 --- a/02_NumPy_concepts.ipynb +++ b/02_NumPy_concepts.ipynb @@ -11,7 +11,7 @@ "# NumPy - an HPC perspective\n", "\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Olav Zimmermann\n", + "12 June 2023 | Olav Zimmermann\n", "</div>" ] }, @@ -320,7 +320,7 @@ " <tr><td><code><b><a href=\"https://www.dask.org/\">dask</a></b></code></td><td>dask array: only subset of ndarray functionality</td><td>tiled ndarrays larger than main memory, distributed processing on multiple nodes</td></tr>\n", " <tr><td><code><b><a href=\"https://www.dask.org/\">dask</a></b></code></td><td>dask dataframe: only subset of pandas dataframe functionality</td><td>tiled dataframes larger than main memory, distributed processing on multiple nodes</td></tr>\n", " <tr><td><code><b><a href=\"https://docs.rapids.ai/api/cudf/nightly/user_guide/10min.html\">dask-cuDF</a></b></code></td><td>cuDF dataframe: subset of pandas dataframe functionality</td><td>tiled dataframes on multiple GPUs and multiple nodes</td></tr>\n", - " <tr><td><code><b><a href=\"https://sparse.pydata.org/en/0.13.0/\">sparse</a></b></code></td><td>ndarray functionality on sparse arrays (COO layout)</td><td></td></tr>\n", + " <tr><td><code><b><a href=\"https://sparse.pydata.org/en/0.14.0/\">sparse</a></b></code></td><td>ndarray functionality on sparse arrays (COO layout)</td><td></td></tr>\n", " <tr><td><code><b><a href=\"https://docs.scipy.org/doc/scipy/reference/sparse.html\">SciPy.sparse</a></b></code></td><td>ndarray functionality on sparse arrays (all layouts)</td><td></td></tr>\n", " </table>" ] @@ -355,9 +355,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -369,7 +369,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/03_ThinkVector.ipynb b/03_ThinkVector.ipynb index 06b96d5abad224f10f4a6f9cafb9a495610ef609..1c5fc8141855d69d78bba5be977d8940f9e707be 100644 --- a/03_ThinkVector.ipynb +++ b/03_ThinkVector.ipynb @@ -5,13 +5,14 @@ "metadata": { "slideshow": { "slide_type": "slide" - } + }, + "tags": [] }, "source": [ "# Think Vector\n", "\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "12 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -374,7 +375,7 @@ } }, "source": [ - "Functions that act on one array (or several arrays of the same shape) and return a vector of the same shape are called ``ufuncs``. When we wrote vw = v * w, we executed the ufunc \\__mul\\__. Functions, like ``dot`` that have a different output shape than input shape are called generalized ufuncs." + "Functions that act on one array (or several arrays of the same shape) and return a vector of the same shape are called ``ufuncs``. When we wrote vw = v * w, we executed the ufunc \\__mul\\__. Functions, like ``dot`` that have a different output shape than input shape are called ``generalized ufuncs``." ] }, { @@ -530,7 +531,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")\n", + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")\n", "print(\"|A-B| = %.3f\" % numpy.linalg.norm(A-B))" ] }, @@ -636,7 +637,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")\n", + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")\n", "print(\"|A-B| = %.3f\" % numpy.linalg.norm(A-B))\n" ] }, @@ -758,9 +759,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "source_hidden": true - }, "tags": [] }, "outputs": [], @@ -981,9 +979,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -995,7 +993,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/04_Particle Dynamics.ipynb b/04_Particle Dynamics.ipynb index 6ef39a8ae536679a318dda928a57c4f3902d73b8..2d6c356f8a912f14cd9d9ac19f858846a3c3fc8a 100644 --- a/04_Particle Dynamics.ipynb +++ b/04_Particle Dynamics.ipynb @@ -3,11 +3,16 @@ { "cell_type": "markdown", "id": "5451ef11-f683-4995-bda8-c9d87abaec49", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "# Particle Dynamics with Python\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "12 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -15,7 +20,12 @@ "cell_type": "code", "execution_count": null, "id": "5822f3b3-bc03-4e2f-85f1-57cb246e3a05", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "import math\n", @@ -27,7 +37,12 @@ "cell_type": "code", "execution_count": null, "id": "f7d1939b-7d73-4c0c-9d8a-d6ea39d48b49", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "%matplotlib inline" @@ -35,59 +50,183 @@ }, { "cell_type": "markdown", - "id": "19819e70-b42c-405a-958f-70c05a972ee6", - "metadata": {}, + "id": "b6798959-bbef-4f71-b696-e1069554c403", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ "Particle dynamics simulations are common in various scientific fields. They are used to simulate \n", "the formation of galaxies and the movements of molecules in a cell. Particles can have different\n", - "properties such as mass and charge and interact in different ways.\n", - "\n", + "properties such as mass and charge and interact in different ways." + ] + }, + { + "cell_type": "markdown", + "id": "9f9b8f9d-c834-4b86-9ef1-e385694d4b8c", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Equations of motion\n", "A classical particle dynamics code solves Newton's equation of motion:\n", "\n", - "$$\\mathbf F = m \\mathbf a,$$\n", - "\n", + "$$\\mathbf F = m \\mathbf a \\ \\ \\ \\ [\\mathtt{1}],$$" + ] + }, + { + "cell_type": "markdown", + "id": "2c250750-32b7-4a74-8c3e-5c3eb6c4a13d", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "where $\\mathbf F$ is the force, $m$ the mass, and $\\mathbf a$ the acceleration. $\\mathbf F$ and \n", "$\\mathbf a$ are vectors.\n", "\n", - "In general, this problem is solvable analytically for two particles only . If there are more \n", + "In general, this problem is solvable analytically for two particles only. If there are more \n", "particles, we have to look for a numerical solution.\n", "\n", - "You may remember that you can calculate the velocity $\\mathbf v$ of a particle as\n", - "\n", - "$$\\mathbf v(t + dt) = \\mathbf v(t) + \\mathbf a(t) dt$$\n", - "\n", - "and the position $\\mathbf r$ as\n", - "\n", - "$$\\mathbf r(t + dt) = \\mathbf r(t) + \\mathbf v(t)dt + \\frac 1 2 \\mathbf a(t) dt^2.$$\n", - "\n", + "You may remember that you can calculate the velocity $\\mathbf v$ of a particle as" + ] + }, + { + "cell_type": "markdown", + "id": "00ee5853-283f-4786-bd4c-81ca9ab7b3b2", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf v(t + dt) = \\mathbf v(t) + \\mathbf a(t) dt \\ \\ \\ \\ [\\mathtt{2}]$$" + ] + }, + { + "cell_type": "markdown", + "id": "a6e75808-f266-4a57-9837-5b9aa69ee436", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "and the position $\\mathbf r$ as" + ] + }, + { + "cell_type": "markdown", + "id": "27adecd9-7499-4a86-bb62-15dd40377c72", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf r(t + dt) = \\mathbf r(t) + \\mathbf v(t)dt + \\frac 1 2 \\mathbf a(t) dt^2 \\ \\ \\ \\ [\\mathtt{3}].$$" + ] + }, + { + "cell_type": "markdown", + "id": "35260044-1b70-46c5-8bfd-8475566037b4", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "If we know all the positions, velocities and masses at time $t$ and can calculate the forces, we \n", "can follow the motion of the particles over time." ] }, { "cell_type": "markdown", - "id": "50ad1731-c5b0-4922-adc8-14e507a7b6b8", - "metadata": {}, + "id": "0167c3d7-4abc-4635-b53d-aa38072ff922", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Gravitational force" + ] + }, + { + "cell_type": "markdown", + "id": "96292513-eaee-4617-bacd-4d13a1f6f8ab", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "## Gravitational force\n", "Let's assume our particles only interact via gravity. Then the force between two particles is given \n", - "by\n", - "\n", - "$$\\mathbf F_{ij}(t) = G\\frac{m_i m_j}{r_{ij}^2(t)} \\mathbf {\\hat r}_{ij}(t),$$\n", - "\n", + "by" + ] + }, + { + "cell_type": "markdown", + "id": "cbab8258-28f9-41db-9dda-7f4a5be57603", + "metadata": { + "tags": [] + }, + "source": [ + "$$\\mathbf F_{ij}(t) = G\\frac{m_i m_j}{r_{ij}^2(t)} \\mathbf {\\hat r}_{ij}(t) \\ \\ \\ \\ [\\mathtt{4}],$$" + ] + }, + { + "cell_type": "markdown", + "id": "c55acb8e-6cb4-459c-9241-9e42eb364b72", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "where $\\mathbf F_{ij}(t)$ is the force on particle $i$ due to particle $j$. $r_{ij}(t)$ is the \n", "distance between particles $i$ and $j$, and $\\mathbf {\\hat r}_{ij}(t)$ is the unit vector pointing\n", "from $i$ to $j$.\n", "\n", - "To get the total force on particle $i$, we need to sum over all $j \\neq i$:\n", - "\n", - "$$\\mathbf F_{i}(t) = \\sum_{j\\neq i} \\mathbf F_{ij}(t).$$" + "To get the total force on particle $i$, we need to sum over all $j \\neq i$:" + ] + }, + { + "cell_type": "markdown", + "id": "d36faa34-7345-4e94-b19b-62e4419417e0", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf F_{i}(t) = \\sum_{j\\neq i} \\mathbf F_{ij}(t) \\ \\ \\ \\ [\\mathtt{5}].$$" ] }, { "cell_type": "markdown", "id": "32f7c975-ed21-4c70-9168-5b7bfa5ca276", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "## The algorithm" ] @@ -107,8 +246,13 @@ }, { "cell_type": "markdown", - "id": "539c2d60-df7b-471b-a438-d9b4efb51781", - "metadata": {}, + "id": "efba7cbf-301a-4e5c-81d4-1394c5ec3c9f", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "## (Parallel) Patterns\n", "In Think Vector, we got to know some patterns. Let's see how we can apply them here:\n", @@ -124,7 +268,19 @@ " \n", "Calculate the new position:\n", " This is a map, too.\n", - " \n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "76d2db76-3bac-4465-9512-babcef5e721b", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "Now, let's try to express this in code.\n", " " ] @@ -133,7 +289,12 @@ "cell_type": "code", "execution_count": null, "id": "b4525c8a-378a-45b7-b1e2-b67f5f07d397", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "# Initialize positions and velocities\n", @@ -143,9 +304,11 @@ "dt = 0.1 # time step\n", "G = 1 # For simplicity we set the universal graviational constant to 1\n", "m = 1 # This corresponds to 150 x 10^9 kg\n", + "# random initial positions\n", "x = [random.uniform(-L2, L2) for i in range(N)]\n", "y = [random.uniform(-L2, L2) for i in range(N)]\n", "z = [random.uniform(-L2, L2) for i in range(N)]\n", + "# zero initial velocities\n", "vx = [0 for i in range(N)]\n", "vy = [0 for i in range(N)]\n", "vz = [0 for i in range(N)]" @@ -154,24 +317,62 @@ { "cell_type": "markdown", "id": "8fd053d2-8c88-4666-82ed-0316fe21ac34", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "### Calculating forces" ] }, { "cell_type": "markdown", - "id": "41861767-e08d-45b8-802a-28b269e3f7ee", - "metadata": {}, + "id": "ac5e70be-cafd-41cd-b866-5b98ee28fb0a", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "To calculate the forces (see eq. 4), we need a distance matrix, i.e. the distance $d_{ij}$ between each pair of particles ($r_{ij}$ in eq. 4). First we calculate the distance vector for each component, x, y, and z separately, this requires 3 map operations. Then we calculate from these vectors the pairwise distances in three dimensional space:" + ] + }, + { + "cell_type": "markdown", + "id": "c1d0d68d-23a4-45e1-a431-91e575056e21", + "metadata": { + "tags": [] + }, + "source": [ + "$$d=\\sqrt{dx^2+dy^2+dz^2} \\ \\ \\ \\ [\\mathtt{6}]$$" + ] + }, + { + "cell_type": "markdown", + "id": "0b29d4d1-b6ef-4615-ab11-0bed26267252", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "To calculate the force, we need the distance vector first. These are actually 3 maps (one for each component). The result is a distance matrix. As mentioned before maps are expressed as list generators:" + "(another map operation). As mentioned before maps can be expressed as list comprehensions:" ] }, { "cell_type": "code", "execution_count": null, "id": "338142b6-f973-4f7a-b5a4-77e76f3b758f", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ "Dxx = [(i - j) for j in x for i in x]\n", @@ -183,21 +384,31 @@ { "cell_type": "markdown", "id": "d0156a2d-13ae-46dd-b3a8-cb7eb1aca0bf", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "Now that we have the vector components and the magnitude of the vector, we can calculate the forces." + "Now that we have the vector components and the magnitudes of the vectors, we can calculate the forces (see eq. 4). We then sum all the forces acting on one particle for each particle (see eq. 5). Note that we also calculate the forces separately for each component." ] }, { "cell_type": "code", "execution_count": null, "id": "e841a076-504d-445b-b006-b931e3cb0bc2", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ - "Fxx = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dxx, D)] # epsilon prevents a zero in the dominator.\n", - "Fyy = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dyy, D)]\n", - "Fzz = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dzz, D)]\n", + "Fxx = [G * m * m * dxx / (d * d * d + epsilon) for dxx, d in zip(Dxx, D)] # epsilon prevents a zero in the dominator.\n", + "Fyy = [G * m * m * dyy / (d * d * d + epsilon) for dyy, d in zip(Dyy, D)]\n", + "Fzz = [G * m * m * dzz / (d * d * d + epsilon) for dzz, d in zip(Dzz, D)]\n", "Fx = [sum(Fxx[i * N: (i + 1) * N]) for i in range(N)]\n", "Fy = [sum(Fyy[i * N: (i + 1) * N]) for i in range(N)]\n", "Fz = [sum(Fzz[i * N: (i + 1) * N]) for i in range(N)]" @@ -206,19 +417,39 @@ { "cell_type": "markdown", "id": "3de052ac-7591-4477-8285-cc15c0019a7a", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "Let's visualize the forces on the particles:" ] }, + { + "cell_type": "markdown", + "id": "235e1971-24e0-4cf8-ac27-779e5ae37684", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "### Visualize forces" + ] + }, { "cell_type": "code", "execution_count": null, "id": "1133b4bb-111b-4aca-9326-22a7c29c8522", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "ax = plt.figure(figsize=(6, 6)).add_subplot(projection='3d')\n", + "ax = plt.figure(figsize=(5, 5)).add_subplot(projection='3d')\n", "ax.scatter3D(x, y, z)\n", "ax.quiver(x, y, z, Fx, Fy, Fz)" ] @@ -226,7 +457,12 @@ { "cell_type": "markdown", "id": "ccea23e5-4f4b-4ff6-b379-8d45e3fe15f4", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "### Integrating the equation of motion" ] @@ -234,9 +470,14 @@ { "cell_type": "markdown", "id": "dba27f9b-350e-4e65-9f42-e3615ee30a84", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "We are ready to update the positions and velocities of our particles:" + "We are ready to update the positions and velocities of our particles. First we use eq. 3 to calculate the new positions. Note that we substituted $\\bf{a}$ in eq. 3 by $\\frac{\\mathbf{F}}{m}$ using eq. 1. " ] }, { @@ -251,11 +492,29 @@ "z = [i + v * dt + 0.5 * f / m * dt * dt for i, v, f in zip(z, vz, Fz)]" ] }, + { + "cell_type": "markdown", + "id": "52959ed7-d454-40fb-98f1-9df161873c87", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "Using the same expression for $\\bf{a}$ as above we now use eq. 2 to calculate the new velocities:" + ] + }, { "cell_type": "code", "execution_count": null, "id": "2266d4e8-8f67-4979-ae47-abf8508673a4", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ "vx = [v + f / m * dt for v, f in zip(vx, Fx)]\n", @@ -266,11 +525,29 @@ { "cell_type": "markdown", "id": "e4cff076-759c-477c-9758-41bb730cd606", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "Let's take a look at the particle positions and velocities:" ] }, + { + "cell_type": "markdown", + "id": "92a88a32-4ee1-44ce-b371-afd412359a3b", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "### Visualize velocities" + ] + }, { "cell_type": "code", "execution_count": null, @@ -278,7 +555,7 @@ "metadata": {}, "outputs": [], "source": [ - "ax = plt.figure(figsize=(6, 6)).add_subplot(projection='3d')\n", + "ax = plt.figure(figsize=(5, 5)).add_subplot(projection='3d')\n", "ax.scatter3D(x, y, z)\n", "ax.quiver(x, y, z, vx, vy, vz)" ] @@ -286,15 +563,25 @@ { "cell_type": "markdown", "id": "65984f53-4b54-4f6d-aaa1-6de391150539", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ - "That's it. By going back to the [calculation of the forces](#Calculating-forces), we can follow the motion of the particles over time." + "That's it. By going back to the [calculation of the forces](#Calculating-forces) and iterating over the steps again, we can follow the motion of the particles over time." ] }, { "cell_type": "markdown", "id": "f1f30004-a9c3-4499-84e0-976937b9f8a8", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "## Exercise\n", "Rewrite the program in a vectorized manner using `ndarray`s." @@ -304,32 +591,34 @@ "cell_type": "code", "execution_count": null, "id": "039819a6-698f-43a6-a4f0-4f7b8852fbb1", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [] }, { "cell_type": "markdown", - "id": "8cb45f43-29e2-49df-a976-bf7790fe5a44", - "metadata": {}, + "id": "5a141c1e-22b6-40be-80d5-25ad2648972c", + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ - "### Solution:" + "Tipp: if your velocities are too small to see the directions of the velocity arrows, just scale vx,vy, and vz in the ax.quiver lines. Where do they point to? Can you do the same scaling in the list version above?" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1f236119-af8c-499d-86cf-1d6b98f9e5fd", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -341,7 +630,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/05_Profiling a simple md code.ipynb b/05_Profiling a simple md code.ipynb index 1590da7ae51deee65bfd51925a7a7b08a42a2b60..33a7b5601c19ff31668884fb152a7747ba3de164 100644 --- a/05_Profiling a simple md code.ipynb +++ b/05_Profiling a simple md code.ipynb @@ -10,7 +10,7 @@ "source": [ "# Profiling\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -639,20 +639,13 @@ }, "outputs": [], "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -664,7 +657,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/06_LocalParallel.ipynb b/06_LocalParallel.ipynb index 7124fa5dbc527c59c923e7160e7b57962ce69792..e716e9c43cba437fc60fb2df5e3c49a0642bcbdc 100644 --- a/06_LocalParallel.ipynb +++ b/06_LocalParallel.ipynb @@ -11,7 +11,7 @@ "# Interactive Parallel Computing with IPython Parallel\n", "\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -27,7 +27,7 @@ "\n", "Click on the ``+``-sign at the top of the Files tab on the left to start a new launcher. In the launcher click on Terminal. A terminal will open as a new tab. Grab the tab and pull it to the right to have the terminal next to your notebook.\n", "\n", - "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2219/hpcpy22`.\n", + "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2318/hpcpy23`.\n", "\n", "In the terminal type ``ipcluster``. You'll see the help message telling you that you need to give it subcommand. Take a look at the message and then enter \n", "\n", @@ -110,7 +110,7 @@ } }, "source": [ - "Now let's see how we access the \"Cluster\". [IPython][IP] comes with a module [ipyparallel][IPp] that is used to access the engines, we just started. We first need to import Client.\n", + "Now let's see how we access the \"Cluster\". Originally, [ipyparallel][IPp] was developed as a part of [IPython][IP]. In the meantime it's developed separately. It is used to access the engines, we just started. We first need to import Client.\n", "\n", "[IPp]: https://ipyparallel.readthedocs.io/en/latest/\n", "[IP]: http://www.ipython.org" @@ -369,7 +369,7 @@ "outputs": [], "source": [ "with rc[:].sync_imports():\n", - " import matplotlib.pyplot" + " import numpy.linalg" ] }, { @@ -377,7 +377,8 @@ "metadata": { "slideshow": { "slide_type": "notes" - } + }, + "tags": [] }, "source": [ "Unfortunately mapping of namespaces does not work that way." @@ -388,7 +389,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "source": [ "## Using the Direct View" @@ -473,7 +475,7 @@ }, "outputs": [], "source": [ - "%%px\n", + "%%px --local\n", "import threadpoolctl\n", "threadpoolctl.threadpool_limits(limits=32, user_api='blas')" ] @@ -523,8 +525,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Execute and Apply" @@ -545,9 +548,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "outputs": [], "source": [ @@ -559,8 +560,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -572,8 +574,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -585,8 +588,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -699,8 +703,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Remote functions" @@ -736,9 +741,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "outputs": [], "source": [ @@ -767,7 +770,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -779,7 +783,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "source": [ "A `remote` function, on the other hand just runs on each engine with the full set of data." @@ -820,8 +825,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Moving data around" @@ -830,9 +836,7 @@ { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "source": [ "So far the runtime has taken care of moving data to and from the engines, but we can do this explicitely. There are 4 commands to do that:\n", @@ -1029,6 +1033,20 @@ "y" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "type(y)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1048,7 +1066,7 @@ } }, "source": [ - "Latency (the time until something happens) and bandwidth (the amount of data we get through the network) are two important properties of your parallel system that define what is practical and what is not. We will use the ``%timeit`` magic to measure these properties. ``%timit`` and its sibbling ``%%timeit`` measure the run time of a statement (cell in the case of ``%%timeit``) by executing the statement multiple times (by default at least 3 times). For short running routines many loops of 3 executions are performed and the minimum time measured is then displayed. The number of loops and the number of executions can be adjusted. Take a look at the documentation. Give it a try." + "Latency (the time until something happens) and bandwidth (the amount of data we get through the network) are two important properties of your parallel system that define what is practical and what is not. We will use the ``%timeit`` magic to measure these properties. ``%timeit`` and its sibbling ``%%timeit`` measure the run time of a statement (cell in the case of ``%%timeit``) by executing the statement multiple times (by default at least 7 repeats). For short running routines a loop of many executions is performed per repeat and the minimum time measured is then displayed. The number of loops and the number of repeats can be adjusted. Take a look at the documentation. Give it a try." ] }, { @@ -1124,7 +1142,7 @@ }, "outputs": [], "source": [ - "%timeit dview.execute('')" + "%timeit -n 10 dview.execute('')" ] }, { @@ -1148,7 +1166,7 @@ }, "outputs": [], "source": [ - "%timeit dview.apply(lambda x : x, '')" + "%timeit -n 10 dview.apply(lambda x : x, '')" ] }, { @@ -1209,7 +1227,7 @@ }, "outputs": [], "source": [ - "%timeit dview.execute('')" + "%timeit -n 10 dview.execute('')" ] }, { @@ -1222,7 +1240,7 @@ }, "outputs": [], "source": [ - "%timeit dview.apply(lambda x : x, '')" + "%timeit -n 10 dview.apply(lambda x : x, '')" ] }, { @@ -1238,6 +1256,32 @@ "%timeit -n 1 -r 4 rc[0].execute('c = a.dot(b)')" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "source": [ + "Note, that the previous call was non-blocking since this is the default for ``execute`` and we have not specified anything else for the view rc[0]. The next line shows the blocking variant:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%timeit -n 10 -r 7 rc[0].execute('c = a.dot(b)', block=True)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1248,7 +1292,7 @@ }, "outputs": [], "source": [ - "%timeit a.dot(b)" + "%timeit -n 10 -r 7 a.dot(b)" ] }, { @@ -1272,7 +1316,7 @@ } }, "source": [ - "We can start about 500 parallel tasks per second and finish about a quarter as many. This gives an estimate of the granularity we need to use this model for efficient parallelization. Any task that takes less time than this will be dominated by the overhead." + "We can start about 2000 parallel tasks per second and finish about a tenth as many. This gives an estimate of the granularity we need to use this model for efficient parallelization. Any task that takes less time than this will be dominated by the overhead." ] }, { @@ -1322,15 +1366,15 @@ }, "outputs": [], "source": [ - "%timeit dview.push(dict(a=a))\n", - "%timeit dview.push(dict(a=a[:128*1024]))\n", - "%timeit dview.push(dict(a=a[:64*1024]))\n", - "%timeit dview.push(dict(a=a[:32*1024]))\n", - "%timeit dview.push(dict(a=a[:16*1024]))\n", - "%timeit dview.push(dict(a=a[:8*1024]))\n", - "%timeit dview.push(dict(a=a[:4*1024]))\n", - "%timeit dview.push(dict(a=a[:2*1024]))\n", - "%timeit dview.push(dict(a=a[:1024]))" + "%timeit -n 20 dview.push(dict(a=a))\n", + "%timeit -n 20 dview.push(dict(a=a[:128*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:64*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:32*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:16*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:8*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:4*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:2*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:1024]))" ] }, { @@ -1341,7 +1385,7 @@ } }, "source": [ - "Calculate the bandwidth for the largest array and the smallest array." + "Calculate the bandwidth for the largest array and the smallest array. Replace the numbers below with the time you measured." ] }, { @@ -1354,8 +1398,8 @@ }, "outputs": [], "source": [ - "bwmax = len(rc) * 256 * 8 / 9.8e-3\n", - "bwmin = len(rc) * 8 / 6.1e-3\n", + "bwmax = len(rc) * 256 * 8 / 9.83-3\n", + "bwmin = len(rc) * 8 / 4.25e-3\n", "print(\"The bandwidth is between %.2f kB/s and %.2f kB/s.\" %( bwmin, bwmax))" ] }, @@ -1478,25 +1522,26 @@ } }, "source": [ - "There are different ways to parallelize a matrix-matrix multiplication. Each element of the matrix can be calculated independently." + "There are different ways to parallelize a matrix-matrix multiplication. Each element of the matrix can be calculated independently, but this currently seems to crash the ipcluster, so we'll skip the execution." ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, - "outputs": [], "source": [ - "%%timeit \n", + "```ipython\n", + "%%timeit\n", "p = len(rc)\n", "# Distribute the elements of the result viewmatrix round robin.\n", - "C1h = [[rc[(i * n + j) % p].apply(np.dot, A[i,:], B[:,j]) for j in range(n)] for i in range(n)]\n", + "C1h = [[rc[(i * n + j) % p].apply(np.dot, A[i,:], B[:,j]) \n", + " for j in range(n)] for i in range(n)]\n", "# Wait until the calculation is done\n", - "dview.wait()\n" + "dview.wait()\n", + "```" ] }, { @@ -1507,7 +1552,7 @@ } }, "source": [ - "This, however, produces $n^2$ short tasks and the overhead (latency) is just overwhelming.\n", + "It produces $n^2$ short tasks and the overhead (latency) is just overwhelming.\n", "\n", "We want to calculate\n", "\n", @@ -1798,7 +1843,7 @@ "source": [ "Nothing says, we have to stop at 4 tiles nor do we have to use square tiles. We could also recursively subdivide our tiles.\n", "\n", - "The code is not any faster, because our implementation of numpy already blocks the matrices and uses all cores, but it shows the principle." + "The code is not any faster, because our implementation of numpy already blocks the matrices and uses all cores, but it shows the principle. Also, remember that we are transferring the data to the engines in every call!" ] }, { @@ -1812,9 +1857,9 @@ "metadata": { "celltoolbar": "Slideshow", "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1826,7 +1871,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/07_LocalTaskParallel.ipynb b/07_LocalTaskParallel.ipynb index 06251ec4881e8d47934b2a36085ca02f223552ef..bdd868d7b777474ff12dc906f73536d85a982960 100644 --- a/07_LocalTaskParallel.ipynb +++ b/07_LocalTaskParallel.ipynb @@ -60,6 +60,17 @@ "import numpy as np" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%px --local\n", + "import threadpoolctl\n", + "threadpoolctl.threadpool_limits(limits=32, user_api='blas')" + ] + }, { "cell_type": "code", "execution_count": null, @@ -326,6 +337,55 @@ "BlockMatrixMultiply?" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's increase the size of the matrix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n = 16384\n", + "A = np.random.random([n, n])\n", + "B = np.random.random([n, n])\n", + "C = np.dot(A, B)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit C=np.dot(A,B)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit BlockMatrixMultiply(A, B, n // 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit BlockMatrixMultiplyLB(A, B, lview, n)\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 2) # 4 tasks\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 4) # 16 tasks\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 8) # 64 tasks" + ] + }, { "cell_type": "code", "execution_count": null, @@ -336,9 +396,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -350,7 +410,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/08_Numba vectorize.ipynb b/08_Numba vectorize.ipynb index 41f7143fde273cc296ad18de19c5b8d7afb81e0a..7a3da648d7ba77c8356e15dd321ed2583dc9fb52 100644 --- a/08_Numba vectorize.ipynb +++ b/08_Numba vectorize.ipynb @@ -11,7 +11,7 @@ "# Numba vectorize\n", "\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -540,9 +540,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -554,7 +554,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/09_NumbaIntro.ipynb b/09_NumbaIntro.ipynb index 58c653aa7798aff41d0e8d6f59ce713b72a9ee78..084c6f9233c9a1a994a2b4632e124286b804162a 100644 --- a/09_NumbaIntro.ipynb +++ b/09_NumbaIntro.ipynb @@ -11,7 +11,7 @@ "# Introduction to Numba's jit compiler\n", "\n", "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -23,7 +23,7 @@ } }, "source": [ - "Numba provides a just-in-time (jit) compiler, a decorator `vectorize` that we can use to define `ufunc`s that are fast and flexible, and an interface to CUDA- and ROCm-capable GPUs that allows us to write CUDA kernels in Python! In this notebook, we'll focus on the jit compiler." + "Numba provides a just-in-time (jit) compiler, a decorator `vectorize` that we can use to define `ufunc`s that are fast and flexible, and an interface to CUDA- and ROCm-capable GPUs that allows us to write GPU kernels in Python! In this notebook, we'll focus on the jit compiler." ] }, { @@ -54,13 +54,14 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy\n", - "from numba import jit\n", + "from numba import njit as jit\n", "from matplotlib import pyplot as plt " ] }, @@ -82,7 +83,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -104,13 +106,26 @@ "When we call `python_sum`, the interpreter goes through it line by line. For each item it has to interpret `res += x` and execute it, i.e., call apropriate C routines that have been compiled for the processor. The only requirements for `a` in this function are that it is iterable and its elements support the `+` operator. For the following little benchmark, we'll use an `ndarray` of random numbers." ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "The Faster CPython project is working on performance improvements for CPython. This includes inlining function calls and choosing special paths if the interpreter detects that types and objects are stable [PEP659](https://peps.python.org/pep-0659/)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -123,7 +138,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -136,7 +152,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -151,7 +168,7 @@ } }, "source": [ - "Please calculate the floating point operations per second for `python_sum`. Btw., remember the peak performance of a single core on JUWELS is about 40 GFLOP/s." + "Please calculate the floating point operations per second for `python_sum`. Btw., remember the peak performance of a single core on JUSUF is about 36 GFLOP/s." ] }, { @@ -253,7 +270,7 @@ } }, "source": [ - "Yes, there are good reasons to love Python (and other higher programming languages).\n", + "Yes, there are good reasons to love higher programming languages.\n", "\n", "Let's run the code:\n", "```\n", @@ -261,7 +278,7 @@ "Sum: 5033.24 in 0.717281 µs. 13941.5 MFLOP. \n", "```\n", "\n", - "The function takes about 0.7 µs. This is about 2000 times faster than the interpreted Python loop. \n", + "The function takes about 0.7 µs. This is more than 10,000 times faster than the interpreted Python loop. \n", "Wouldn't it be great if we could take the Python code in `python_sum` and compile it to machine \n", "code to get some of this speedup?" ] @@ -322,7 +339,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "numba_sum = jit(python_sum)" @@ -334,7 +353,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -358,7 +378,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -405,7 +426,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -429,7 +451,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -454,7 +477,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -473,7 +497,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -487,7 +512,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -500,7 +526,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -524,7 +551,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -544,7 +572,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -557,7 +586,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -570,7 +600,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -584,7 +615,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -597,7 +629,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -610,7 +643,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -628,7 +662,9 @@ }, "source": [ "### Exercise: prange\n", - "Numba can parallelize loops with ``prange``. Import ``prange`` from numba and change the range in row into a prange. You also need to add the arguments ``nopython=True`` and ``parallel=True`` to the jit decorator.\n", + "Numba can parallelize loops with ``prange``. Import ``prange`` from numba and change the range in row into a prange. You also need to add the ``parallel=True`` to the jit decorator.\n", + "\n", + "We imported ``njit`` as ``jit`` at the beginning of the notebooks since ``nopython=True`` will soon become the default. If you use ```from numba import jit``` you need to explicitly write ``nopython=True`` below.\n", "\n", "Rerun and compare.\n", "\n", @@ -654,7 +690,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -673,7 +710,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -686,7 +724,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -710,11 +749,12 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ - "@jit(nopython=True)\n", + "@jit\n", "def numba_mm3(a, b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in range(a.shape[0]):\n", @@ -729,7 +769,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -742,11 +783,12 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ - "@jit(nopython = True)\n", + "@jit\n", "def numba_mm4(a, b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in range(a.shape[0]):\n", @@ -762,7 +804,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -775,7 +818,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -1125,11 +1169,7 @@ "source": [ "This is much better. The `ps` at the end of `vaddps` stands for *packed single precision* indicating \n", "a SIMD instruction. The `ymm` registers used are 256 bits wide, which corresponds to 8 single precision\n", - "numbers at a time.\n", - "\n", - "Skylake-X also has `zmm` registers with a width of 512 bit or 16 single precision numbers, but when\n", - "they are used the maximum frequency of the processor is reduced. It can happen that the performance \n", - "using `ymm` registers at higher frequency is actually better." + "numbers at a time." ] }, { @@ -1184,9 +1224,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1198,7 +1238,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/10_Speeding up your code with Cython.ipynb b/10_Speeding up your code with Cython.ipynb index 571852bda8784701e96472f0d6b3b6e26d334f01..f3b666962390f340416fd0781135126104bb9d25 100644 --- a/10_Speeding up your code with Cython.ipynb +++ b/10_Speeding up your code with Cython.ipynb @@ -20,7 +20,7 @@ }, "source": [ "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -65,7 +65,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -78,7 +79,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -103,7 +105,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -121,7 +124,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -135,7 +139,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -149,7 +154,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -164,7 +170,7 @@ } }, "source": [ - "Elementwise access to NumPy arrays can in the meantime be just as fast as access for lists.\n", + "Elementwise access to NumPy arrays is often slower as elementwise access to lists.\n", "\n", "Now let us invoke Cython" ] @@ -175,7 +181,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -194,7 +201,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -208,7 +216,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -232,7 +241,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -277,7 +287,7 @@ } }, "source": [ - "The arguments `v` and `w` are very general. If we know, however, that we are only going to pass ndarrays of integers, we can be more specific:" + "The arguments `v` and `w` are very general. If we know that we are only going to pass ndarrays of integers, we can be more specific:" ] }, { @@ -286,7 +296,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -320,7 +331,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -344,7 +356,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -368,7 +381,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -403,7 +417,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -430,7 +445,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -454,7 +470,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -482,7 +499,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -507,7 +525,8 @@ "metadata": { "slideshow": { "slide_type": "-" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -548,7 +567,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -576,7 +596,7 @@ "source": [ "Since Cython generates compiled Python extensions, we can release the GIL and run things in parallel if we don't make calls to the Python API.\n", "\n", - "As we've seen our inner loop is free of any Python calls (the annotated code is white). Since OpenMP supports reductions, we can parallelize the loop using Cython's ``prange``. Within ``prange`` we have to explicitely release the GIL by setting ``nogil=True``. We also need to pass the compiler and linker flags for OpenMP." + "As we've seen our inner loop is free of any Python calls (the annotated code is white). Since OpenMP supports reductions, we can parallelize the loop using Cython's ``prange``. Within ``prange`` we have to explicitly release the GIL by setting ``nogil=True``. We also need to pass the compiler and linker flags for OpenMP." ] }, { @@ -585,7 +605,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -615,7 +636,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -728,9 +750,9 @@ } }, "source": [ - "So far we have used IPython and the Cython magic to build and test our extension within a notebook. Once we are satisfied and want to put our extension in production, we want to be able to build the extension without IPython. The recommended way to do that is to use `distutils` and a `setup.py` file.\n", + "So far we have used IPython and the Cython magic to build and test our extension within a notebook. Once we are satisfied and want to put our extension in production, we want to be able to build the extension without IPython. The recommended way to do that is to use the `setuptools` provided with Cython and a `setup.py` file. For details see the [documentation](https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#basic-setup-py).\n", "\n", - "Note that distutils has been marked as deprecated as of Python 3.10, but we are still using Python 3.9.x on our systems." + "Note that distutils has been marked as deprecated as of Python 3.10." ] }, { @@ -749,7 +771,7 @@ "metadata": {}, "source": [ "```python\n", - "from distutils.core import setup\n", + "from setuptools import setup\n", "from Cython.Build import cythonize\n", "setup(name=\"Sum of integers\",\n", " ext_modules=cythonize(\"sum.pyx\"),\n", @@ -785,8 +807,7 @@ "metadata": {}, "source": [ "```python\n", - "from distutils.core import setup\n", - "from distutils.extension import Extension\n", + "from setuptools import Extension, setup\n", "from Cython.Build import cythonize\n", "\n", "ext_modules = [\n", @@ -1270,9 +1291,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1284,7 +1305,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/11_Writing your own Python bindings.ipynb b/11_Writing your own Python bindings.ipynb index 78dabd21f28ab14d38e57b7b7f51a772a6f310e4..96beaf4ce4b378d1374c460ca1cf7cc266919ff9 100644 --- a/11_Writing your own Python bindings.ipynb +++ b/11_Writing your own Python bindings.ipynb @@ -16,7 +16,7 @@ "metadata": {}, "source": [ "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -92,7 +92,7 @@ "\n", "Wait until the build has finished and then continue with this notebook.\n", "\n", - "**Tip:** You can open a terminal from within JupyterLab by going to File->New->Terminal. To get the right environment in a terminal `source $PROJECT_training2119/hpcpy22`." + "**Tip:** You can open a terminal from within JupyterLab by going to File->New->Terminal. To get the right environment in a terminal `source $PROJECT_training2318/hpcpy23`." ] }, { @@ -222,7 +222,7 @@ } }, "source": [ - "What if word_frequency had been written Fortran?" + "What if word_frequency had been written in Fortran?" ] }, { @@ -268,7 +268,7 @@ "source": [ "### Exercise\n", "Use the terminal that you used earlier to run `build.sh` or open a new one. Make sure you are in the \n", - "tutorial directory. Source `hpcpy22` using `source $PROJECT/hpcpy22`. Change into code/textstats/ and compile \n", + "tutorial directory. Source `hpcpy23` using `source $PROJECT/hpcpy23`. Change into code/textstats/ and compile \n", "the file word_frequency.F90 with the following command:\n", "\n", "```bash\n", @@ -368,7 +368,7 @@ "source": [ "Now, the name of the function will always be `word_frequency`. `bind` takes as optional argument the name under which the function should be known to C: bind(c, name=\"wf\") would let us call the function as `wf(filename, word)` from C (and Python).\n", "\n", - "To learn more about [CFFI](https://bitbucket.org/cffi/cffi) look at it's [documentation](https://cffi.readthedocs.io/en/latest/)." + "To learn more about CFFI look at it's [documentation](https://cffi.readthedocs.io/en/latest/)." ] }, { @@ -1481,9 +1481,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1495,7 +1495,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/12_Introduction to MPI.ipynb b/12_Introduction to MPI.ipynb index 8a0c0e3f212bce3290952a2e5ebd789be3218588..dd9cf88fce022a9e30469bc08cacd06ba6ee6b46 100644 --- a/12_Introduction to MPI.ipynb +++ b/12_Introduction to MPI.ipynb @@ -11,7 +11,7 @@ "# Introduction to MPI\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -218,7 +218,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A slbio python3 hello_mpi.py " + "!srun --pty -n 4 -p batch -A training2318 --reservation tr2318-20230615-cpu python3 hello_mpi.py " ] }, { @@ -315,7 +315,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A training2219 --time 00:10:00 python3 hello_ptp.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 hello_ptp.py" ] }, { @@ -348,7 +348,7 @@ } }, "source": [ - "If you need to send data to another rank and receive data from the same rank, combining `Send` and `Recv` command is dangerous and easily leads to deadlocks. Use `Sendrecv` instead." + "If you need to send data to another rank and receive data from the same rank, combining `Send` and `Recv` commands is dangerous and easily leads to deadlocks. Use `Sendrecv` instead." ] }, { @@ -405,7 +405,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A training2219 --time 00:10:00 python3 hello_sendrecv.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 hello_sendrecv.py" ] }, { @@ -458,7 +458,7 @@ "tags": [] }, "source": [ - "Next, we'll sum up the partial results and then use sum up (`reduce`) the partial results:" + "Next, we'll sum up `a_partial` and then use `reduce` to sum up the partial results:" ] }, { @@ -558,7 +558,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_reduction.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_reduction.py" ] }, { @@ -580,7 +580,7 @@ } }, "source": [ - "`mpi4py` offers two version of many calls. The first one is written in uppercase. It uses memory buffers, e.g., `np.array`, and maps the call directly to the appropriate C call. The second version is written in lower case and takes arbitrary Python object. The result is given as the return value. Note, that for the uppercase versions all `a_partial` must have the same size!" + "`mpi4py` offers two version of many calls. The first one is written in uppercase. It uses memory buffers, e.g., `numpy.array`, and maps the call directly to the appropriate C call. The second version is written in lower case and takes arbitrary Python object. The result is given as the return value. Note, that for the uppercase versions all `a_partial` must have the same size!" ] }, { @@ -665,7 +665,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_upper.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_upper.py" ] }, { @@ -676,7 +676,7 @@ } }, "source": [ - "The following works independent of the size of a_partial:" + "The following code uses the lowercase versions of the calls and works independent of the size of a_partial:" ] }, { @@ -751,7 +751,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_lower.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_lower.py" ] }, { @@ -1038,7 +1038,7 @@ "3. Time the execution of the program from the second part of the exercise.\n", "\n", " a) Keep the size of the system constant and increase the number of ranks/domain, e.g., using 2, \n", - " 4, 8, and 16 ranks. How \n", + " 4, 8, and 16 ranks. How does the timing change?\n", " \n", " b) Keep the size of the domains constant, i.e., the total size is a multiple of the number of \n", " ranks. Again increase the number of ranks\n", @@ -1344,7 +1344,7 @@ "source": [ "Click on the ``+``-sign at the top of the Files tab on the left to start a new launcher. In the launcher click on Terminal. A terminal will open as a new tab. Grab the tab and pull it to the right to have the terminal next to your notebook.\n", "\n", - "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2219/hpcpy22`." + "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2318/hpcpy23`." ] }, { @@ -1362,7 +1362,7 @@ "\n", "```bash\n", "export OMP_NUM_THREADS=32\n", - "srun -n 4 -c 32 --ntasks-per-node 4 --time 00:30:00 -A training2219 ipengine start\n", + "srun -n 4 -c 32 --ntasks-per-node 4 --time 00:30:00 -A training2318 --reservation tr2318-20230615-cpu ipengine start\n", "```\n", "\n", "**Note**, you can can start the controller and the engines in separate terminals. That will keep the output separate." @@ -1736,13 +1736,20 @@ "source": [ "sum(sum_partial)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1754,7 +1761,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/13_Introduction to CuPy.ipynb b/13_Introduction to CuPy.ipynb index 4d5438baa512763eaa12a2dae871265457063f52..e2cafd82e34cb46fdf02a52671fb6913d4869f42 100644 --- a/13_Introduction to CuPy.ipynb +++ b/13_Introduction to CuPy.ipynb @@ -10,7 +10,7 @@ "source": [ "# Introduction to CuPy\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>\n", "<img src=\"images/cupy.png\" style=\"float:right\">" ] @@ -134,7 +134,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul.py" + "!srun --pty -N 1 -p gpus -A training2318 --time 00:10:00 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul.py" ] }, { @@ -214,7 +214,21 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul_w_timing.py" + "!srun --pty -N 1 -p gpus -A training2318 --time 00:10:00 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul_w_timing.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!srun --pty -N 1 -p develgpus -A training2318 --time 00:10:00 python3 cupy_matrix_mul_w_timing.py" ] }, { @@ -375,7 +389,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul_w_timing2.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul_w_timing2.py" ] }, { @@ -434,7 +448,7 @@ }, "outputs": [], "source": [ - "!srun -p batch -n 1 -c 256 -A training2219 python numpy_matrix_mul_w_timing2.py" + "!srun -p batch -n 1 -c 256 -A training2318 --pty --reservation tr2318-20230615-cpu python3 numpy_matrix_mul_w_timing2.py" ] }, { @@ -568,7 +582,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_to_and_fro.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cupy_to_and_fro.py" ] }, { @@ -656,9 +670,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -670,7 +684,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/14_CUDA for Python.ipynb b/14_CUDA for Python.ipynb index d9241f89e307859bb0f172b24c7d121346eea1ed..4964eb38d57825c9a645595c8bc66a1a93e61f73 100644 --- a/14_CUDA for Python.ipynb +++ b/14_CUDA for Python.ipynb @@ -11,7 +11,7 @@ "# Numba and GPUs\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -158,7 +158,7 @@ " return i\n", " return maxtime\n", "\n", - "if __name__ == \"__main__:\n", + "if __name__ == \"__main__\":\n", " import numpy\n", " x = numpy.linspace(-2, 2, 500)\n", " y = numpy.linspace(-1.5, 1.5, 375)\n", @@ -182,7 +182,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython mandelbrot_vectorize_cuda.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython mandelbrot_vectorize_cuda.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean():.3f}±{t_gpu.std():.3f} s.\")" ] @@ -305,7 +305,7 @@ "source": [ "GPUs were (and are) made to display graphics on your screen. It doesn't matter how quickly a GPU can update a single pixel. It's important how quickly it can update all of the pixels on the screen (more than 2 million on an HD display). In addition it often must perform the same operation on a lot of vertices or pixels. \n", "\n", - "These two conditions let to a different execution model." + "These two conditions led to a different execution model." ] }, { @@ -833,7 +833,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot1.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot1.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -907,7 +907,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot2.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot2.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -966,7 +966,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot3.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot3.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1147,7 +1147,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot4.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot4.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1207,7 +1207,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cuda_matrixmul.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cuda_matrixmul.py" ] }, { @@ -1378,9 +1378,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1392,7 +1392,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/15_CUDA and MPI.ipynb b/15_CUDA and MPI.ipynb index ae23563e7b1a35ff5b015a36bfdb2fb0b3d2659c..8f0eafbe5f7eab32910ddac524e2601f144eb1fb 100644 --- a/15_CUDA and MPI.ipynb +++ b/15_CUDA and MPI.ipynb @@ -11,7 +11,7 @@ "# CUDA for Python and MPI4Py\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -344,12 +344,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "skip" - }, - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -419,7 +414,7 @@ "import cupy\n", "# Create an array with N * number_of_ranks elements\n", "N = 1000\n", - "a_partial = cup.empty(N)\n", + "a_partial = cupy.empty(N)\n", "if my_rank == 0:\n", " a = cupy.random.random(N * number_of_ranks)\n", "else:\n", @@ -532,12 +527,12 @@ " block = 256\n", " grid = N // block if N % block == 0 else N // block + 1 \n", " shift[grid, block](-0.75, a_partial)\n", - " print(f\"[{my_rank}] The average of a_partial is {cupy.mean(a_partial):.3f}\")\n", + " print(f\"[{my_rank}] The average of a_partial after shifting is {cupy.mean(a_partial):.3f}\")\n", " # Collect the data again on rank 0\n", " comm.Gather(a_partial, a, root = 0) \n", "\n", " if my_rank == 0:\n", - " print(\"The average of a is %.2f\" % cupy.mean(a)) # Result should be near zero.\n", + " print(\"The average of a after shifting is %.2f\" % cupy.mean(a)) # Result should be near zero.\n", " \n", " \n", "if __name__ == \"__main__\":\n", @@ -555,15 +550,22 @@ }, "outputs": [], "source": [ - "!srun -p gpus -n 4 -A training2219 xenv -L mpi-settings/CUDA python cuda_aware_mpi_shift.py" + "!srun -p gpus -n 4 -A training2318 --reservation tr2318-20230615-gpu xenv -L mpi-settings/CUDA python3 cuda_aware_mpi_shift.py" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -575,7 +577,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/16_Introduction to Dask.ipynb b/16_Introduction to Dask.ipynb index 62c461d7664e956409580fa8b84c993ac8e4f709..8df4e8f0f98a604af0c256340176fa79f9b21f2e 100644 --- a/16_Introduction to Dask.ipynb +++ b/16_Introduction to Dask.ipynb @@ -11,7 +11,7 @@ "# Introduction to Dask\n", "\n", "<div class=\"dateauthor\">\n", - "10 June 2021 | Olav Zimmermann\n", + "16 June 2023 | Olav Zimmermann\n", "</div>" ] }, @@ -165,7 +165,7 @@ } }, "source": [ - "The task graph generated by `dask` can be visualized (don't try this for large graphs!)." + "The task graph generated by `dask` can be visualized (don't try this for large graphs, i.e. more input tasks!)." ] }, { @@ -211,22 +211,6 @@ "- Change the program in a way that enables you to estimate how much overhead per task is incurred by Dask." ] }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - }, - "tags": [ - "Poll" - ] - }, - "source": [ - "Poll: \n", - "- largest number of inputs you can process under 8 seconds: A) 12 B) 24 C) 48 D) 96\n", - "- task overhead per task: A) 100µs B) 200µs C) 1ms D) 2ms" - ] - }, { "cell_type": "markdown", "metadata": { @@ -283,7 +267,8 @@ "source": [ "l=[x for x in range(1000000)]\n", "s= db.from_sequence(l,npartitions=4) # you can manually set the number of partitions\n", - "mysum=s.fold(add) # fold performs a parallel reduction " + "mysum=s.fold(add) # fold performs a parallel reduction \n", + "mysum.dask # another inpection method for task graphs in dask" ] }, { @@ -306,7 +291,7 @@ "outputs": [], "source": [ "%time result=mysum.compute()\n", - "result=mysum.compute\n", + "result=mysum.compute()\n", "result" ] }, @@ -320,8 +305,7 @@ }, "outputs": [], "source": [ - "%time r=list(s.filter(lambda x: x % 2 == 0).map(lambda x: x * 1.2))\n", - "r[:5] #note: apparently no type coercion!" + "%time r=list(s.filter(lambda x: x % 2 == 0).map(lambda x: x * 1.2))" ] }, { @@ -345,7 +329,8 @@ "source": [ "**Exercise:**\n", "\n", - "Code the same operations without dask, i.e. using a) just python and b) using numpy and measure the runtime of the calculations. \n", + "Code the same operations without dask, i.e. using a) just python and b) using numpy and measure the runtime of the calculations.\n", + "Make sure to return a list in all cases.\n", "\n", "Conclusions? " ] @@ -393,7 +378,7 @@ "source": [ "## dask.array\n", "\n", - "**`dask.dataframe`** is the distributed equivalent of numpy ndarray." + "**`dask.array`** is the distributed equivalent of numpy ndarray." ] }, { @@ -489,7 +474,7 @@ "outputs": [], "source": [ "x_dask = da.random.normal(10, 0.1, size=(10000,3000), chunks=(5000,3000)) # using as many chunks as CPU cores is good for random number calculation\n", - "x_rechunked=x_dask.rechunk((1000,3000)) # larger chunks are no longer better for dot product calculation\n", + "x_rechunked=x_dask.rechunk((2500,3000)) # larger chunks are no longer better for dot product calculation\n", "y_dask = x_rechunked.transpose()\n", "result=x_dask.dot(y_dask)\n", "#with ProgressBar():\n", @@ -517,7 +502,7 @@ } }, "source": [ - "`dask.distributed` features a sophisticated **web-based monitoring** based on the package `bokeh`. See **Dashboard** when you started the client above that shows the address and port of the web server." + "`dask.distributed` features a sophisticated **web-based monitoring** based on the package `bokeh`." ] }, { @@ -583,9 +568,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2021", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy21" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -597,7 +582,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/17_Debugging.ipynb b/17_Debugging.ipynb index 22117b23f48eb2426597982c144d0aadf923c7de..dbd87d3b7b918b6cec67cd9dd28d88982d5a3f9e 100644 --- a/17_Debugging.ipynb +++ b/17_Debugging.ipynb @@ -6,17 +6,13 @@ "source": [ "# Debugging Python\n", "<div class=\"dateauthor\">\n", - "07 June 2021 | Jan H. Meinke\n", + "06 June 2023 | Jan H. Meinke, Olav Zimmermann\n", "</div>" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ "What do you do if a program doesn't produce the results you want? You can stare at the code and try to figure out the mistake. You can add lots of print statements to your code. Or you can use a debugger.\n", "\n", @@ -25,221 +21,184 @@ }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "Debugging has its own terminology: You step in and out of functions. You move up and down the stack. You set break points, inspect variables, etc. This is the basic functionality that every debugger should (and every debugger I know does) support.\n", - "\n", - "In this notebook, we'll look at debugging a program with PDB in the notebook and pudb in a terminal window. You'll learn how to start a debugging session and do all the things, I talked about in the previous paragraph." + "### _\"Debuggers don't remove bugs. They only show them in slow motion.\"_ (Unknown)" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## PDB" + "Debugging has its own terminology: You step in and out of functions. You move up and down the call stack. You set break points, inspect variables, etc. This is the basic functionality that every debugger should (and every debugger we know does) support.\n", + "\n", + "In this notebook, we'll introduce several different debuggers. We'll debug code within a notebook cell with the builtin debugger of JupyterLab as well as with PDB. Then we will use pudb to debug a program in a terminal window. You'll learn how to start a debugging session and do all the things, described in the previous paragraph." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "Python comes with its own debugger called \"The Python debugger\" (pdb). PDB is available from within a notebook, but it's not very convenient to use." + "## Runtime debugging with the JupyterLab builtin debugger" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "source": [ - "Let's take the following function, which contains a bug and throws an exception." + "Before running the following cell try to guess what will happen: will it throw an error or a warning or will it execute normally? \n", + "If it is one of the latter two cases, what will it print?" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, + "metadata": {}, "outputs": [], "source": [ - "#%%writefile buggy.py\n", - "def imabuggyincrement(i,a):\n", - " \"\"\"Increment a[i] by 1.\"\"\"\n", - " if ii < len(a):\n", - " a[i] += 1;\n", + "a,b,c,d,e=range(5)\n", + "from numpy import *\n", + "f=array([a,b,c,d,e], dtype=int)\n", + "def doubleme(input_array):\n", + " result=input_array*2\n", + " return result\n", + "def doublesummer(input_vec):\n", + " result=doubleme(input_vec)\n", + " result=result.sum()\n", + " return result\n", + "print(f'The result is {doublesummer(f)}.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using a debugger to execute a code (or part of it) step by step is also called **runtime debugging**. \n", "\n", - "a = list(range(10))\n", - "ii = 4\n", - "imabuggyincrement(10, a)" + "You can switch on JupyterLab's internal debugger by clicking on the small bug icon at the top right of the notebook, next to the kernel name. You will see several panels appear in the right sidebar. In addition, each code cell of the notebook now got line numbers.\n", + "\n", + "Click on the line number of line 11 in the code cell above. A red dot appearing in front of the line number indicates that you just set a **break point**. At a break point the debugger will stop, allowing you to inspect the state of each variable that is defined at this point. To start the debugger and let it execute the code up to the break point just re-execute the cell [Shift-Return].\n", + "\n", + "The navigation symbols at the top of the CallStack panel will now no longer be grayed out and allow you to execute the code line by line. With \"next\" you step over function calls within the line. With \"step in\" you can jump into the python functions called in this line of code (but not into any C library functions).\n", + "\n", + "The \"Variables\" panel allows you to view either the global or the local variables and to switch between tree and table view. (for arrays the table view is preferable)\n", + "\n", + "**Exercise:** Try to find the bug in the code above. You can set a break point at any line. In case that you want to reset the kernel use the circle arrow button at the top of the notebook.\n", + "\n", + "**Note:** The builtin debugger interface is a very recent addition to JupyterHub and only provides very limited functionality and convenience.\n" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, + "metadata": {}, "source": [ - "## Debug magic" + "## Post mortem debugging with PDB" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "If a cell has just thrown an exception, you can inspect it with the %debug magic. Try `help` to see the available commands. Type `exit` to leave the debugger." + "If a program fails, you can no longer execute the code step by step. Nevertheless, the debugger can help you to inspect the state of the code at the time of failure. This usage is also called **post mortem debugging**. Python comes with its own debugger called \"The Python debugger\" (pdb). PDB is also available from within a notebook, but it's not very convenient to use." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Use `p i` to print the value of `i`. You can also try to print out the value of `a[i]` using `p a[i]`. Inspect the other variables. Do you see what went wrong?" + "Let's take the following function, which contains a bug and throws an exception. **(Please switch off the internal debugger before executing the cell!)**" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, + "metadata": {}, "outputs": [], "source": [ - "%debug" + "#%%writefile buggy.py\n", + "def imabuggyincrement(i,a):\n", + " \"\"\"Increment a[i] by 1.\"\"\"\n", + " if ii < len(a):\n", + " a[i] += 1;\n", + "\n", + "a = list(range(10))\n", + "ii = 4\n", + "imabuggyincrement(10, a)" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Available debuggers" + "### The %debug magic of pdb for notebooks" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, + "metadata": {}, "source": [ - "* pdb (builtin)\n", - "* pudb\n", - "* IDEs (All the IDEs we mentioned have debugging support)" + "The cell above has just thrown an exception and within a notebook you can use the `%debug` magic provided by pdb to inspect it. Try `help` to see the available commands. Type `exit` to leave the debugger." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "source": [ - "Uncomment the ``%%writefile`` magic before the function defintion of ``imabuggyincrement`` and execute the cell again so that it gets written to file buggy.py" + "Use `p i` to print the value of `i`. You can also try to print out the value of `a[i]` using `p a[i]`. Inspect the other variables. Do you see what went wrong?" ] }, { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "Next start pudb in a terminal with the script name as an argument. If you haven't done this in this terminal shell before, you need to source hpcpy20:" + "%debug" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "```bash\n", - "source hpcpy21\n", - "pudb3 buggy.py\n", - "```" + "## Debugging a program with pudb" ] }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "source": [ - "We'll give you a short demonstration and then you can play with it for a little while." + "Uncomment the ``%%writefile`` magic before the function defintion of ``imabuggyincrement`` and execute the cell again so that it gets written to file buggy.py" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Remote debugging" + "Next start pudb in a terminal with the script name as an argument. If you haven't done this in this terminal shell before, you need to source hpcpy23:" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, + "metadata": {}, "source": [ - "For example, PyDev, Wing Personal, Visual Studio, and PyCharm Professional (199 €/a with perpetual fallback license) support remote debugging. It can also be done with the ``ptvsd`` and Visual Studio Code." + "```bash\n", + "source $PROJECT_training2318/hpcpy23\n", + "pudb buggy.py\n", + "```" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Debugging Python extensions\n", - "We'll talk about this more tomorrow (maybe)." + "We'll give you a short demonstration and then you can play with it for a little while." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ "## Note\n", "\n", @@ -248,21 +207,15 @@ }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "A better way to check for bounds as I did in `iambuggyincrement` is not to do it at all but use a try...except statement instead:" + "Another way to check for bounds as the one in `imabuggyincrement` is not to do it at all but use a try...except statement instead:" ] }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "slide" - } + "tags": [] }, "source": [ "```python\n", @@ -274,7 +227,7 @@ " pass\n", " \n", "def main(arg=[]):\n", - " a = list(range(10)\n", + " a = list(range(10))\n", " ii = 4 # Now this is limited to the scope of main()\n", " imabuggyincrement(10, a)\n", " \n", @@ -283,13 +236,63 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that you should only use the `except` statement together with `pass` in cases where you expect a certain type of error but can't control the circumstances that lead to that error. This pattern effectively hides an error state of the program and could lead to unwanted side effects if used carelessly." + ] + }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, + "source": [ + "## Overview: debuggers for Python" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* [pdb][] (builtin)\n", + "* [pudb][]\n", + "* IDEs (All the IDEs we mentioned have debugging support)\n", + "* [Linaro DDT][], former name ARMForge DDT (commercial, support for debugging parallel codes and C/C++ code, only rudimentary Python support)\n", + "* [TotalView][] (commercial, support for debugging parallel codes and C/C++ code, requires debug version of CPython, supports mixed language debugging, aware of cython, pybind11 and other bindings)\n", + "\n", + "[pdb]: https://docs.python.org/3/library/pdb.html\n", + "[pudb]: https://github.com/inducer/pudb\n", + "[Linaro DDT]: https://www.linaroforge.com/linaroDdt/\n", + "[ARMForge DDT]: https://developer.arm.com/tools-and-software/server-and-hpc/debug-and-profile/arm-forge/arm-ddt\n", + "[TotalView]: https://help.totalview.io/current/HTML/index.html#page/TotalView/totalviewlhug-python.13.01.html#ww1893192" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Remote debugging" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For example, PyDev, Wing Personal, Visual Studio, and PyCharm Professional (199 €/a with perpetual fallback license) support remote debugging. It can also be done with the ``ptvsd`` and Visual Studio Code." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Debugging Python extensions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ "The following video shows how to debug mixed Python and C++ code using Visual Studio.\n", "\n", @@ -299,35 +302,27 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.display import YouTubeVideo\n", "\n", - "YouTubeVideo(\"D9RlT06a1EI\", start=300)" + "YouTubeVideo(\"KhuMRDY4BeU\")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2021", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "hpcpy21" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -339,7 +334,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/build.sh b/build.sh index bd4b6a6bae24b285395a0c5c1b7968f5d8d4de6e..df60a479ee86c74ff1e5991b1a70d6f7ea579276 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -source $PROJECT_training2219/hpcpy22 +source $PROJECT_training2318/hpcpy23 # Build points pushd code/point rm -rf build diff --git a/hpcpy22 b/hpcpy22 deleted file mode 100644 index b29c14c38d0fc115942a5ce5bcc1eafaab192434..0000000000000000000000000000000000000000 --- a/hpcpy22 +++ /dev/null @@ -1,19 +0,0 @@ -module purge --force -module load Stages/2022 -module load GCC -module load ParaStationMPI -module load Graphviz -module load SciPy-Stack -module load numba -module load dask -module load mpi4py -module load Jupyter -module load CUDA -module load CMake -#export NUMBAPRO_NVVM=$CUDA_HOME/nvvm/lib64/libnvvm.so -#export NUMBAPRO_LIBDEVICE=$CUDA_HOME/nvvm/libdevice -export LD_LIBRARY_PATH=/p/project/training2219/resources/code/text_stats/build:$LD_LIBRARY_PATH -export LD_LIBRARY_PATH=/p/project/training2219/resources/code/point/build:$LD_LIBRARY_PATH -export PYTHONPATH=/p/project/training2219/packages/lib/python3.9/site-packages:$PYTHONPATH -export PATH=/p/project/training2219/packages/bin:$PATH -export HPCPY2022=1 diff --git a/hpcpy23 b/hpcpy23 new file mode 100755 index 0000000000000000000000000000000000000000..447fd63d8484bbd3ef267c5ba2d0015066b785b9 --- /dev/null +++ b/hpcpy23 @@ -0,0 +1,26 @@ +#!/bin/bash +module purge +module load Stages/2023 +module load GCC +module load ParaStationMPI +module load CMake +module load Graphviz +module load SciPy-Stack +module load numba +module load dask +module load mpi4py +module load h5py +#module load Jupyter +module load CUDA +module load cuTENSOR +module load NCCL +module load cuDNN +#export NUMBAPRO_NVVM=$CUDA_HOME/nvvm/lib64/libnvvm.so +#export NUMBAPRO_LIBDEVICE=$CUDA_HOME/nvvm/libdevice +export LD_LIBRARY_PATH=/p/project/training2318/resources/code/text_stats/build:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/p/project/training2318/resources/code/point/build:$LD_LIBRARY_PATH +export PYTHONPATH=/p/project/training2318/packages/lib/python3.10/site-packages:$PYTHONPATH +export PATH=/p/project/training2318/packages/bin:$PATH +export HPCPY2023=1 +#exec $(which python) -m ipykernel $@ + diff --git a/solutions/00_Introduction to IPython.ipynb b/solutions/00_Introduction to IPython.ipynb index 7d3b2c3fd9ab2a40d4d0560f485f80f9a2fb30f4..6f3cf8c9dea11c5f171ec78714bf7a649ad4cdef 100644 --- a/solutions/00_Introduction to IPython.ipynb +++ b/solutions/00_Introduction to IPython.ipynb @@ -20,7 +20,7 @@ }, "source": [ "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "06 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -169,9 +169,7 @@ "tags": [] }, "outputs": [], - "source": [ - "import random" - ] + "source": [] }, { "cell_type": "markdown", @@ -549,47 +547,35 @@ }, "outputs": [], "source": [ - "# %load http://matplotlib.org/mpl_examples/mplot3d/surface3d_demo.py\n", - "'''\n", - "======================\n", - "3D surface (color map)\n", - "======================\n", + "# %load https://matplotlib.org/stable/_downloads/0c69e8950c767c2d95108979a24ace2f/surface3d_simple.py\n", "\n", - "Demonstrates plotting a 3D surface colored with the coolwarm color map.\n", - "The surface is made opaque by using antialiased=False.\n", + "\"\"\"\n", + "=====================\n", + "3D surface\n", + "=====================\n", "\n", - "Also demonstrates using the LinearLocator and custom formatting for the\n", - "z axis tick labels.\n", - "'''\n", - "\n", - "from mpl_toolkits.mplot3d import Axes3D\n", + "See `~mpl_toolkits.mplot3d.axes3d.Axes3D.plot_surface`.\n", + "\"\"\"\n", "import matplotlib.pyplot as plt\n", "from matplotlib import cm\n", - "from matplotlib.ticker import LinearLocator, FormatStrFormatter\n", "import numpy as np\n", "\n", + "# plt.style.use('_mpl-gallery')\n", "\n", - "fig = plt.figure()\n", - "ax = fig.gca(projection='3d')\n", - "\n", - "# Make data.\n", + "# Make data\n", "X = np.arange(-5, 5, 0.25)\n", "Y = np.arange(-5, 5, 0.25)\n", "X, Y = np.meshgrid(X, Y)\n", "R = np.sqrt(X**2 + Y**2)\n", "Z = np.sin(R)\n", "\n", - "# Plot the surface.\n", - "surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,\n", - " linewidth=0, antialiased=False)\n", - "\n", - "# Customize the z axis.\n", - "ax.set_zlim(-1.01, 1.01)\n", - "ax.zaxis.set_major_locator(LinearLocator(10))\n", - "ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))\n", + "# Plot the surface\n", + "fig, ax = plt.subplots(subplot_kw={\"projection\": \"3d\"})\n", + "ax.plot_surface(X, Y, Z, vmin=Z.min() * 2, cmap=cm.Blues)\n", "\n", - "# Add a color bar which maps values to colors.\n", - "fig.colorbar(surf, shrink=0.5, aspect=5)\n", + "ax.set(xticklabels=[],\n", + " yticklabels=[],\n", + " zticklabels=[])\n", "\n", "plt.show()\n" ] @@ -625,9 +611,9 @@ } }, "source": [ - "IPython has two ways of moving around in the directory tree: ``%cd`` and ``%pushd/%popd``. Both retain their history. ``%cd``'s history is available through ``%dhist`` whereas ``%dirs`` shows the directory stack of ``%pushd/%popd``. The ``%cd `` command has some nifty options, for example, ``%cd -2`` gets you to the second to last visited directory and ``%cd --foo`` switches to the next directory in the history than contains ``foo``. You can also set ``%bookmark``s and use them with ``%cd``.\n", + "IPython has two ways of moving around in the directory tree: ``%cd`` and ``%pushd/%popd``. Both retain their history. ``%cd``'s history is available through ``%dhist`` whereas ``%dirs`` shows the directory stack of ``%pushd/%popd``. The ``%cd `` command has some nifty options, for example, ``%cd -2`` gets you to the second to last visited directory. You can also set ``%bookmark``s and use them with ``%cd``.\n", "\n", - "Make a new sub directory called scripts/mandelbrot using ``%mkdir -p scripts/mandelbrot``. Change into the directory scripts/mandelbrot using ``%cd``. Go two levels up using ``%cd ..`` twice. Look at the history using ``%dhist``. Change into mandelbrot using ``%cd --brot``. Finally use ``%cd -0`` to get back to where you started from." + "Make a new sub directory called scripts/mandelbrot using ``%mkdir -p scripts/mandelbrot``. Change into the directory scripts/mandelbrot using ``%cd``. Go one level up using ``%cd ..``. Look at the history using ``%dhist``. Finally use ``%cd -0`` to get back to where you started from." ] }, { @@ -772,7 +758,8 @@ }, "outputs": [], "source": [ - "a = Out[13] # Assign Out[?] to a (replace with index from two cells above)" + "a = Out[13] # Assign Out[?] to a (replace with index from two cells above)\n", + "a" ] }, { @@ -916,9 +903,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -930,7 +917,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/01_Bottlenecks.ipynb b/solutions/01_Bottlenecks.ipynb index d8ebdc8955b440b4cd8aede0d469d154ec2e1984..742c473076d110c9458a601cfa6992196aba7511 100644 --- a/solutions/01_Bottlenecks.ipynb +++ b/solutions/01_Bottlenecks.ipynb @@ -11,7 +11,7 @@ "# Bottlenecks\n", "\n", "<div class=\"dateauthor\">\n", - "20 Jun 2022 | Jan H. Meinke\n", + "12 Jun 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -443,7 +443,7 @@ }, "outputs": [], "source": [ - "from numba import jit\n", + "from numba import njit as jit\n", "jdot = jit(dot)" ] }, @@ -511,7 +511,7 @@ }, "outputs": [], "source": [ - "import numpy; from numba import jit\n", + "import numpy; from numba import njit as jit\n", "\n", "@jit\n", "def dot2(a, b):\n", @@ -545,7 +545,7 @@ } }, "source": [ - "Now, elements in b are accessed in the proper order and a[i, k] is constant for the loop. This changes our estimate, because, now we read 8 bytes/op in the innermost loop. This gives us a maximum of 190 GB/s / 8 bytes/op = 24 Gop/s (48 GFLOP/s)." + "Now, elements in b are accessed in the proper order and a[i, k] is constant for the loop. This changes our estimate, because, now we read 8 bytes/op in the innermost loop. This gives us a maximum of 190 GB/s / 8 bytes/op = 24 Gop/s (48 GFLOP/s) making this compute bound on a single core." ] }, { @@ -716,6 +716,13 @@ "print(2e-9 * n**3 / t_numpy_single.best, \"GFLOP/s (single core).\") " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The maximum clock frequency of the processor is 3.4 GHz, which corresponds to a peak performance of about 54 GFLOP/s. This is pretty close." + ] + }, { "cell_type": "code", "execution_count": null, @@ -871,9 +878,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -885,7 +892,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/02_NumPy_concepts.ipynb b/solutions/02_NumPy_concepts.ipynb index 0a0138d7aef76a0295373327e4ff55a487c64546..743b74bb52b32d5fc47f18f8e4c60abf0222d974 100644 --- a/solutions/02_NumPy_concepts.ipynb +++ b/solutions/02_NumPy_concepts.ipynb @@ -11,7 +11,7 @@ "# NumPy - an HPC perspective\n", "\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Olav Zimmermann\n", + "12 June 2023 | Olav Zimmermann\n", "</div>" ] }, @@ -320,7 +320,7 @@ " <tr><td><code><b><a href=\"https://www.dask.org/\">dask</a></b></code></td><td>dask array: only subset of ndarray functionality</td><td>tiled ndarrays larger than main memory, distributed processing on multiple nodes</td></tr>\n", " <tr><td><code><b><a href=\"https://www.dask.org/\">dask</a></b></code></td><td>dask dataframe: only subset of pandas dataframe functionality</td><td>tiled dataframes larger than main memory, distributed processing on multiple nodes</td></tr>\n", " <tr><td><code><b><a href=\"https://docs.rapids.ai/api/cudf/nightly/user_guide/10min.html\">dask-cuDF</a></b></code></td><td>cuDF dataframe: subset of pandas dataframe functionality</td><td>tiled dataframes on multiple GPUs and multiple nodes</td></tr>\n", - " <tr><td><code><b><a href=\"https://sparse.pydata.org/en/0.13.0/\">sparse</a></b></code></td><td>ndarray functionality on sparse arrays (COO layout)</td><td></td></tr>\n", + " <tr><td><code><b><a href=\"https://sparse.pydata.org/en/0.14.0/\">sparse</a></b></code></td><td>ndarray functionality on sparse arrays (COO layout)</td><td></td></tr>\n", " <tr><td><code><b><a href=\"https://docs.scipy.org/doc/scipy/reference/sparse.html\">SciPy.sparse</a></b></code></td><td>ndarray functionality on sparse arrays (all layouts)</td><td></td></tr>\n", " </table>" ] @@ -355,9 +355,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -369,7 +369,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/03_ThinkVector.ipynb b/solutions/03_ThinkVector.ipynb index 00ff200399dc2bb0a6f99d493d6d98a2d501cd03..404514f480e4e0e8538a0bb2cd85c8925a0833e1 100644 --- a/solutions/03_ThinkVector.ipynb +++ b/solutions/03_ThinkVector.ipynb @@ -5,13 +5,14 @@ "metadata": { "slideshow": { "slide_type": "slide" - } + }, + "tags": [] }, "source": [ "# Think Vector\n", "\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "12 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -409,7 +410,7 @@ } }, "source": [ - "Functions that act on one array (or several arrays of the same shape) and return a vector of the same shape are called ``ufuncs``. When we wrote vw = v * w, we executed the ufunc \\__mul\\__. Functions, like ``dot`` that have a different output shape than input shape are called generalized ufuncs." + "Functions that act on one array (or several arrays of the same shape) and return a vector of the same shape are called ``ufuncs``. When we wrote vw = v * w, we executed the ufunc \\__mul\\__. Functions, like ``dot`` that have a different output shape than input shape are called ``generalized ufuncs``." ] }, { @@ -565,7 +566,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")\n", + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")\n", "print(\"|A-B| = %.3f\" % numpy.linalg.norm(A-B))" ] }, @@ -671,7 +672,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")\n", + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")\n", "print(\"|A-B| = %.3f\" % numpy.linalg.norm(A-B))\n" ] }, @@ -719,7 +720,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")\n", + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")\n", "print(\"|A-B| = %.3f\" % numpy.linalg.norm(A-B))\n", "A = B.copy()" ] @@ -759,7 +760,7 @@ "plt.subplot(1, 3, 2)\n", "plt.imshow(B, interpolation=\"nearest\")\n", "plt.subplot(1, 3, 3)\n", - "plt.imshow(A-B, interpolation=\"nearest\")" + "plt.imshow(numpy.abs(A-B), interpolation=\"nearest\")" ] }, { @@ -861,9 +862,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "source_hidden": true - }, "tags": [] }, "outputs": [], @@ -1280,9 +1278,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1294,7 +1292,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/04_Particle Dynamics.ipynb b/solutions/04_Particle Dynamics.ipynb index 86a786046d4382a4d489d1f66226bdc1cf036f73..76c01d9ea8865e6d176d9538c88a6e0bdaf6db37 100644 --- a/solutions/04_Particle Dynamics.ipynb +++ b/solutions/04_Particle Dynamics.ipynb @@ -3,11 +3,16 @@ { "cell_type": "markdown", "id": "5451ef11-f683-4995-bda8-c9d87abaec49", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "# Particle Dynamics with Python\n", "<div class=\"dateauthor\">\n", - "20 June 2022 | Jan H. Meinke\n", + "12 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -15,7 +20,12 @@ "cell_type": "code", "execution_count": null, "id": "5822f3b3-bc03-4e2f-85f1-57cb246e3a05", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "import math\n", @@ -27,7 +37,12 @@ "cell_type": "code", "execution_count": null, "id": "f7d1939b-7d73-4c0c-9d8a-d6ea39d48b49", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "%matplotlib inline" @@ -35,59 +50,183 @@ }, { "cell_type": "markdown", - "id": "19819e70-b42c-405a-958f-70c05a972ee6", - "metadata": {}, + "id": "b6798959-bbef-4f71-b696-e1069554c403", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ "Particle dynamics simulations are common in various scientific fields. They are used to simulate \n", "the formation of galaxies and the movements of molecules in a cell. Particles can have different\n", - "properties such as mass and charge and interact in different ways.\n", - "\n", + "properties such as mass and charge and interact in different ways." + ] + }, + { + "cell_type": "markdown", + "id": "9f9b8f9d-c834-4b86-9ef1-e385694d4b8c", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Equations of motion\n", "A classical particle dynamics code solves Newton's equation of motion:\n", "\n", - "$$\\mathbf F = m \\mathbf a,$$\n", - "\n", + "$$\\mathbf F = m \\mathbf a \\ \\ \\ \\ [\\mathtt{1}],$$" + ] + }, + { + "cell_type": "markdown", + "id": "2c250750-32b7-4a74-8c3e-5c3eb6c4a13d", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "where $\\mathbf F$ is the force, $m$ the mass, and $\\mathbf a$ the acceleration. $\\mathbf F$ and \n", "$\\mathbf a$ are vectors.\n", "\n", - "In general, this problem is solvable analytically for two particles only . If there are more \n", + "In general, this problem is solvable analytically for two particles only. If there are more \n", "particles, we have to look for a numerical solution.\n", "\n", - "You may remember that you can calculate the velocity $\\mathbf v$ of a particle as\n", - "\n", - "$$\\mathbf v(t + dt) = \\mathbf v(t) + \\mathbf a(t) dt$$\n", - "\n", - "and the position $\\mathbf r$ as\n", - "\n", - "$$\\mathbf r(t + dt) = \\mathbf r(t) + \\mathbf v(t)dt + \\frac 1 2 \\mathbf a(t) dt^2.$$\n", - "\n", + "You may remember that you can calculate the velocity $\\mathbf v$ of a particle as" + ] + }, + { + "cell_type": "markdown", + "id": "00ee5853-283f-4786-bd4c-81ca9ab7b3b2", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf v(t + dt) = \\mathbf v(t) + \\mathbf a(t) dt \\ \\ \\ \\ [\\mathtt{2}]$$" + ] + }, + { + "cell_type": "markdown", + "id": "a6e75808-f266-4a57-9837-5b9aa69ee436", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "and the position $\\mathbf r$ as" + ] + }, + { + "cell_type": "markdown", + "id": "27adecd9-7499-4a86-bb62-15dd40377c72", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf r(t + dt) = \\mathbf r(t) + \\mathbf v(t)dt + \\frac 1 2 \\mathbf a(t) dt^2 \\ \\ \\ \\ [\\mathtt{3}].$$" + ] + }, + { + "cell_type": "markdown", + "id": "35260044-1b70-46c5-8bfd-8475566037b4", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "If we know all the positions, velocities and masses at time $t$ and can calculate the forces, we \n", "can follow the motion of the particles over time." ] }, { "cell_type": "markdown", - "id": "50ad1731-c5b0-4922-adc8-14e507a7b6b8", - "metadata": {}, + "id": "0167c3d7-4abc-4635-b53d-aa38072ff922", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Gravitational force" + ] + }, + { + "cell_type": "markdown", + "id": "96292513-eaee-4617-bacd-4d13a1f6f8ab", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "## Gravitational force\n", "Let's assume our particles only interact via gravity. Then the force between two particles is given \n", - "by\n", - "\n", - "$$\\mathbf F_{ij}(t) = G\\frac{m_i m_j}{r_{ij}^2(t)} \\mathbf {\\hat r}_{ij}(t),$$\n", - "\n", + "by" + ] + }, + { + "cell_type": "markdown", + "id": "cbab8258-28f9-41db-9dda-7f4a5be57603", + "metadata": { + "tags": [] + }, + "source": [ + "$$\\mathbf F_{ij}(t) = G\\frac{m_i m_j}{r_{ij}^2(t)} \\mathbf {\\hat r}_{ij}(t) \\ \\ \\ \\ [\\mathtt{4}],$$" + ] + }, + { + "cell_type": "markdown", + "id": "c55acb8e-6cb4-459c-9241-9e42eb364b72", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "where $\\mathbf F_{ij}(t)$ is the force on particle $i$ due to particle $j$. $r_{ij}(t)$ is the \n", "distance between particles $i$ and $j$, and $\\mathbf {\\hat r}_{ij}(t)$ is the unit vector pointing\n", "from $i$ to $j$.\n", "\n", - "To get the total force on particle $i$, we need to sum over all $j \\neq i$:\n", - "\n", - "$$\\mathbf F_{i}(t) = \\sum_{j\\neq i} \\mathbf F_{ij}(t).$$" + "To get the total force on particle $i$, we need to sum over all $j \\neq i$:" + ] + }, + { + "cell_type": "markdown", + "id": "d36faa34-7345-4e94-b19b-62e4419417e0", + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, + "source": [ + "$$\\mathbf F_{i}(t) = \\sum_{j\\neq i} \\mathbf F_{ij}(t) \\ \\ \\ \\ [\\mathtt{5}].$$" ] }, { "cell_type": "markdown", "id": "32f7c975-ed21-4c70-9168-5b7bfa5ca276", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "## The algorithm" ] @@ -107,8 +246,13 @@ }, { "cell_type": "markdown", - "id": "539c2d60-df7b-471b-a438-d9b4efb51781", - "metadata": {}, + "id": "efba7cbf-301a-4e5c-81d4-1394c5ec3c9f", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "## (Parallel) Patterns\n", "In Think Vector, we got to know some patterns. Let's see how we can apply them here:\n", @@ -124,7 +268,19 @@ " \n", "Calculate the new position:\n", " This is a map, too.\n", - " \n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "76d2db76-3bac-4465-9512-babcef5e721b", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ "Now, let's try to express this in code.\n", " " ] @@ -133,7 +289,12 @@ "cell_type": "code", "execution_count": null, "id": "b4525c8a-378a-45b7-b1e2-b67f5f07d397", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [ "# Initialize positions and velocities\n", @@ -143,9 +304,11 @@ "dt = 0.1 # time step\n", "G = 1 # For simplicity we set the universal graviational constant to 1\n", "m = 1 # This corresponds to 150 x 10^9 kg\n", + "# random initial positions\n", "x = [random.uniform(-L2, L2) for i in range(N)]\n", "y = [random.uniform(-L2, L2) for i in range(N)]\n", "z = [random.uniform(-L2, L2) for i in range(N)]\n", + "# zero initial velocities\n", "vx = [0 for i in range(N)]\n", "vy = [0 for i in range(N)]\n", "vz = [0 for i in range(N)]" @@ -154,24 +317,62 @@ { "cell_type": "markdown", "id": "8fd053d2-8c88-4666-82ed-0316fe21ac34", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "### Calculating forces" ] }, { "cell_type": "markdown", - "id": "41861767-e08d-45b8-802a-28b269e3f7ee", - "metadata": {}, + "id": "ac5e70be-cafd-41cd-b866-5b98ee28fb0a", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "To calculate the forces (see eq. 4), we need a distance matrix, i.e. the distance $d_{ij}$ between each pair of particles ($r_{ij}$ in eq. 4). First we calculate the distance vector for each component, x, y, and z separately, this requires 3 map operations. Then we calculate from these vectors the pairwise distances in three dimensional space:" + ] + }, + { + "cell_type": "markdown", + "id": "c1d0d68d-23a4-45e1-a431-91e575056e21", + "metadata": { + "tags": [] + }, + "source": [ + "$$d=\\sqrt{dx^2+dy^2+dz^2} \\ \\ \\ \\ [\\mathtt{6}]$$" + ] + }, + { + "cell_type": "markdown", + "id": "0b29d4d1-b6ef-4615-ab11-0bed26267252", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "To calculate the force, we need the distance vector first. These are actually 3 maps (one for each component). The result is a distance matrix. As mentioned before maps are expressed as list generators:" + "(another map operation). As mentioned before maps can be expressed as list comprehensions:" ] }, { "cell_type": "code", "execution_count": null, "id": "338142b6-f973-4f7a-b5a4-77e76f3b758f", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ "Dxx = [(i - j) for j in x for i in x]\n", @@ -183,21 +384,31 @@ { "cell_type": "markdown", "id": "d0156a2d-13ae-46dd-b3a8-cb7eb1aca0bf", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "Now that we have the vector components and the magnitude of the vector, we can calculate the forces." + "Now that we have the vector components and the magnitudes of the vectors, we can calculate the forces (see eq. 4). We then sum all the forces acting on one particle for each particle (see eq. 5). Note that we also calculate the forces separately for each component." ] }, { "cell_type": "code", "execution_count": null, "id": "e841a076-504d-445b-b006-b931e3cb0bc2", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ - "Fxx = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dxx, D)] # epsilon prevents a zero in the dominator.\n", - "Fyy = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dyy, D)]\n", - "Fzz = [G * m * m * i / (d * d * d + epsilon) for i, d in zip(Dzz, D)]\n", + "Fxx = [G * m * m * dxx / (d * d * d + epsilon) for dxx, d in zip(Dxx, D)] # epsilon prevents a zero in the dominator.\n", + "Fyy = [G * m * m * dyy / (d * d * d + epsilon) for dyy, d in zip(Dyy, D)]\n", + "Fzz = [G * m * m * dzz / (d * d * d + epsilon) for dzz, d in zip(Dzz, D)]\n", "Fx = [sum(Fxx[i * N: (i + 1) * N]) for i in range(N)]\n", "Fy = [sum(Fyy[i * N: (i + 1) * N]) for i in range(N)]\n", "Fz = [sum(Fzz[i * N: (i + 1) * N]) for i in range(N)]" @@ -206,19 +417,39 @@ { "cell_type": "markdown", "id": "3de052ac-7591-4477-8285-cc15c0019a7a", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "Let's visualize the forces on the particles:" ] }, + { + "cell_type": "markdown", + "id": "235e1971-24e0-4cf8-ac27-779e5ae37684", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "### Visualize forces" + ] + }, { "cell_type": "code", "execution_count": null, "id": "1133b4bb-111b-4aca-9326-22a7c29c8522", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "ax = plt.figure(figsize=(6, 6)).add_subplot(projection='3d')\n", + "ax = plt.figure(figsize=(5, 5)).add_subplot(projection='3d')\n", "ax.scatter3D(x, y, z)\n", "ax.quiver(x, y, z, Fx, Fy, Fz)" ] @@ -226,7 +457,12 @@ { "cell_type": "markdown", "id": "ccea23e5-4f4b-4ff6-b379-8d45e3fe15f4", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "source": [ "### Integrating the equation of motion" ] @@ -234,9 +470,14 @@ { "cell_type": "markdown", "id": "dba27f9b-350e-4e65-9f42-e3615ee30a84", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, "source": [ - "We are ready to update the positions and velocities of our particles:" + "We are ready to update the positions and velocities of our particles. First we use eq. 3 to calculate the new positions. Note that we substituted $\\bf{a}$ in eq. 3 by $\\frac{\\mathbf{F}}{m}$ using eq. 1. " ] }, { @@ -251,11 +492,29 @@ "z = [i + v * dt + 0.5 * f / m * dt * dt for i, v, f in zip(z, vz, Fz)]" ] }, + { + "cell_type": "markdown", + "id": "52959ed7-d454-40fb-98f1-9df161873c87", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "Using the same expression for $\\bf{a}$ as above we now use eq. 2 to calculate the new velocities:" + ] + }, { "cell_type": "code", "execution_count": null, "id": "2266d4e8-8f67-4979-ae47-abf8508673a4", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + }, + "tags": [] + }, "outputs": [], "source": [ "vx = [v + f / m * dt for v, f in zip(vx, Fx)]\n", @@ -266,11 +525,29 @@ { "cell_type": "markdown", "id": "e4cff076-759c-477c-9758-41bb730cd606", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "Let's take a look at the particle positions and velocities:" ] }, + { + "cell_type": "markdown", + "id": "92a88a32-4ee1-44ce-b371-afd412359a3b", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "### Visualize velocities" + ] + }, { "cell_type": "code", "execution_count": null, @@ -278,7 +555,7 @@ "metadata": {}, "outputs": [], "source": [ - "ax = plt.figure(figsize=(6, 6)).add_subplot(projection='3d')\n", + "ax = plt.figure(figsize=(5, 5)).add_subplot(projection='3d')\n", "ax.scatter3D(x, y, z)\n", "ax.quiver(x, y, z, vx, vy, vz)" ] @@ -286,15 +563,25 @@ { "cell_type": "markdown", "id": "65984f53-4b54-4f6d-aaa1-6de391150539", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ - "That's it. By going back to the [calculation of the forces](#Calculating-forces), we can follow the motion of the particles over time." + "That's it. By going back to the [calculation of the forces](#Calculating-forces) and iterating over the steps again, we can follow the motion of the particles over time." ] }, { "cell_type": "markdown", "id": "f1f30004-a9c3-4499-84e0-976937b9f8a8", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "source": [ "## Exercise\n", "Rewrite the program in a vectorized manner using `ndarray`s." @@ -304,14 +591,23 @@ "cell_type": "code", "execution_count": null, "id": "039819a6-698f-43a6-a4f0-4f7b8852fbb1", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "8cb45f43-29e2-49df-a976-bf7790fe5a44", - "metadata": {}, + "metadata": { + "tags": [ + "Solution" + ] + }, "source": [ "### Solution:" ] @@ -321,6 +617,9 @@ "execution_count": null, "id": "ccfc4eca-c09e-448f-93db-2122be7b484c", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -331,11 +630,32 @@ "rng = numpy.random.Generator(numpy.random.MT19937())" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "599a1169-0356-4841-a495-2b113021c652", + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [ + "Solution" + ] + }, + "outputs": [], + "source": [ + "L2=5\n", + "N=50" + ] + }, { "cell_type": "code", "execution_count": null, "id": "405b8627-e887-4ec8-85cf-c391877c0b19", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -354,6 +674,9 @@ "cell_type": "markdown", "id": "0e3c0e11-dc33-46e0-a6da-05cb42ecfd9a", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -371,6 +694,9 @@ "execution_count": null, "id": "8d8cad39-dd89-4379-b549-64a67465db3f", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -388,6 +714,9 @@ "execution_count": null, "id": "ba5403cc-d3d0-46d8-a1bb-01f5056d0963", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -404,6 +733,9 @@ "execution_count": null, "id": "500347dc-2dfd-4481-a81b-4276cbc00863", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -420,6 +752,9 @@ "execution_count": null, "id": "e0745105-fa07-4054-8ddc-274de4a510f8", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -436,6 +771,9 @@ "execution_count": null, "id": "699fb1a4-349b-46ad-acfc-177465aade2a", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -452,6 +790,9 @@ "execution_count": null, "id": "3fe70091-fffd-4613-a798-1f9c54bfbfa4", "metadata": { + "slideshow": { + "slide_type": "skip" + }, "tags": [ "Solution" ] @@ -464,19 +805,24 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "1f236119-af8c-499d-86cf-1d6b98f9e5fd", - "metadata": {}, - "outputs": [], - "source": [] + "cell_type": "markdown", + "id": "5a141c1e-22b6-40be-80d5-25ad2648972c", + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "source": [ + "Tipp: if your velocities are too small to see the directions of the velocity arrows, just scale vx,vy, and vz in the ax.quiver lines. Where do they point to? Can you do the same scaling in the list version above?" + ] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -488,7 +834,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/solutions/05_Profiling a simple md code.ipynb b/solutions/05_Profiling a simple md code.ipynb index a4f9897eb842b622244308be3ba74805d26aaf06..eeca5b2485065930e1290fa83e0fd6272cd1dd78 100644 --- a/solutions/05_Profiling a simple md code.ipynb +++ b/solutions/05_Profiling a simple md code.ipynb @@ -10,7 +10,7 @@ "source": [ "# Profiling\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -660,16 +660,23 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [ + "Solution" + ] + }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -681,7 +688,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/06_LocalParallel.ipynb b/solutions/06_LocalParallel.ipynb index 7124fa5dbc527c59c923e7160e7b57962ce69792..e716e9c43cba437fc60fb2df5e3c49a0642bcbdc 100644 --- a/solutions/06_LocalParallel.ipynb +++ b/solutions/06_LocalParallel.ipynb @@ -11,7 +11,7 @@ "# Interactive Parallel Computing with IPython Parallel\n", "\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -27,7 +27,7 @@ "\n", "Click on the ``+``-sign at the top of the Files tab on the left to start a new launcher. In the launcher click on Terminal. A terminal will open as a new tab. Grab the tab and pull it to the right to have the terminal next to your notebook.\n", "\n", - "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2219/hpcpy22`.\n", + "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2318/hpcpy23`.\n", "\n", "In the terminal type ``ipcluster``. You'll see the help message telling you that you need to give it subcommand. Take a look at the message and then enter \n", "\n", @@ -110,7 +110,7 @@ } }, "source": [ - "Now let's see how we access the \"Cluster\". [IPython][IP] comes with a module [ipyparallel][IPp] that is used to access the engines, we just started. We first need to import Client.\n", + "Now let's see how we access the \"Cluster\". Originally, [ipyparallel][IPp] was developed as a part of [IPython][IP]. In the meantime it's developed separately. It is used to access the engines, we just started. We first need to import Client.\n", "\n", "[IPp]: https://ipyparallel.readthedocs.io/en/latest/\n", "[IP]: http://www.ipython.org" @@ -369,7 +369,7 @@ "outputs": [], "source": [ "with rc[:].sync_imports():\n", - " import matplotlib.pyplot" + " import numpy.linalg" ] }, { @@ -377,7 +377,8 @@ "metadata": { "slideshow": { "slide_type": "notes" - } + }, + "tags": [] }, "source": [ "Unfortunately mapping of namespaces does not work that way." @@ -388,7 +389,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "source": [ "## Using the Direct View" @@ -473,7 +475,7 @@ }, "outputs": [], "source": [ - "%%px\n", + "%%px --local\n", "import threadpoolctl\n", "threadpoolctl.threadpool_limits(limits=32, user_api='blas')" ] @@ -523,8 +525,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Execute and Apply" @@ -545,9 +548,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "outputs": [], "source": [ @@ -559,8 +560,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -572,8 +574,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -585,8 +588,9 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "fragment" + }, + "tags": [] }, "outputs": [], "source": [ @@ -699,8 +703,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Remote functions" @@ -736,9 +741,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "outputs": [], "source": [ @@ -767,7 +770,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -779,7 +783,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "source": [ "A `remote` function, on the other hand just runs on each engine with the full set of data." @@ -820,8 +825,9 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "skip" - } + "slide_type": "slide" + }, + "tags": [] }, "source": [ "## Moving data around" @@ -830,9 +836,7 @@ { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "source": [ "So far the runtime has taken care of moving data to and from the engines, but we can do this explicitely. There are 4 commands to do that:\n", @@ -1029,6 +1033,20 @@ "y" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "type(y)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1048,7 +1066,7 @@ } }, "source": [ - "Latency (the time until something happens) and bandwidth (the amount of data we get through the network) are two important properties of your parallel system that define what is practical and what is not. We will use the ``%timeit`` magic to measure these properties. ``%timit`` and its sibbling ``%%timeit`` measure the run time of a statement (cell in the case of ``%%timeit``) by executing the statement multiple times (by default at least 3 times). For short running routines many loops of 3 executions are performed and the minimum time measured is then displayed. The number of loops and the number of executions can be adjusted. Take a look at the documentation. Give it a try." + "Latency (the time until something happens) and bandwidth (the amount of data we get through the network) are two important properties of your parallel system that define what is practical and what is not. We will use the ``%timeit`` magic to measure these properties. ``%timeit`` and its sibbling ``%%timeit`` measure the run time of a statement (cell in the case of ``%%timeit``) by executing the statement multiple times (by default at least 7 repeats). For short running routines a loop of many executions is performed per repeat and the minimum time measured is then displayed. The number of loops and the number of repeats can be adjusted. Take a look at the documentation. Give it a try." ] }, { @@ -1124,7 +1142,7 @@ }, "outputs": [], "source": [ - "%timeit dview.execute('')" + "%timeit -n 10 dview.execute('')" ] }, { @@ -1148,7 +1166,7 @@ }, "outputs": [], "source": [ - "%timeit dview.apply(lambda x : x, '')" + "%timeit -n 10 dview.apply(lambda x : x, '')" ] }, { @@ -1209,7 +1227,7 @@ }, "outputs": [], "source": [ - "%timeit dview.execute('')" + "%timeit -n 10 dview.execute('')" ] }, { @@ -1222,7 +1240,7 @@ }, "outputs": [], "source": [ - "%timeit dview.apply(lambda x : x, '')" + "%timeit -n 10 dview.apply(lambda x : x, '')" ] }, { @@ -1238,6 +1256,32 @@ "%timeit -n 1 -r 4 rc[0].execute('c = a.dot(b)')" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "source": [ + "Note, that the previous call was non-blocking since this is the default for ``execute`` and we have not specified anything else for the view rc[0]. The next line shows the blocking variant:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%timeit -n 10 -r 7 rc[0].execute('c = a.dot(b)', block=True)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1248,7 +1292,7 @@ }, "outputs": [], "source": [ - "%timeit a.dot(b)" + "%timeit -n 10 -r 7 a.dot(b)" ] }, { @@ -1272,7 +1316,7 @@ } }, "source": [ - "We can start about 500 parallel tasks per second and finish about a quarter as many. This gives an estimate of the granularity we need to use this model for efficient parallelization. Any task that takes less time than this will be dominated by the overhead." + "We can start about 2000 parallel tasks per second and finish about a tenth as many. This gives an estimate of the granularity we need to use this model for efficient parallelization. Any task that takes less time than this will be dominated by the overhead." ] }, { @@ -1322,15 +1366,15 @@ }, "outputs": [], "source": [ - "%timeit dview.push(dict(a=a))\n", - "%timeit dview.push(dict(a=a[:128*1024]))\n", - "%timeit dview.push(dict(a=a[:64*1024]))\n", - "%timeit dview.push(dict(a=a[:32*1024]))\n", - "%timeit dview.push(dict(a=a[:16*1024]))\n", - "%timeit dview.push(dict(a=a[:8*1024]))\n", - "%timeit dview.push(dict(a=a[:4*1024]))\n", - "%timeit dview.push(dict(a=a[:2*1024]))\n", - "%timeit dview.push(dict(a=a[:1024]))" + "%timeit -n 20 dview.push(dict(a=a))\n", + "%timeit -n 20 dview.push(dict(a=a[:128*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:64*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:32*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:16*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:8*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:4*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:2*1024]))\n", + "%timeit -n 20 dview.push(dict(a=a[:1024]))" ] }, { @@ -1341,7 +1385,7 @@ } }, "source": [ - "Calculate the bandwidth for the largest array and the smallest array." + "Calculate the bandwidth for the largest array and the smallest array. Replace the numbers below with the time you measured." ] }, { @@ -1354,8 +1398,8 @@ }, "outputs": [], "source": [ - "bwmax = len(rc) * 256 * 8 / 9.8e-3\n", - "bwmin = len(rc) * 8 / 6.1e-3\n", + "bwmax = len(rc) * 256 * 8 / 9.83-3\n", + "bwmin = len(rc) * 8 / 4.25e-3\n", "print(\"The bandwidth is between %.2f kB/s and %.2f kB/s.\" %( bwmin, bwmax))" ] }, @@ -1478,25 +1522,26 @@ } }, "source": [ - "There are different ways to parallelize a matrix-matrix multiplication. Each element of the matrix can be calculated independently." + "There are different ways to parallelize a matrix-matrix multiplication. Each element of the matrix can be calculated independently, but this currently seems to crash the ipcluster, so we'll skip the execution." ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, - "outputs": [], "source": [ - "%%timeit \n", + "```ipython\n", + "%%timeit\n", "p = len(rc)\n", "# Distribute the elements of the result viewmatrix round robin.\n", - "C1h = [[rc[(i * n + j) % p].apply(np.dot, A[i,:], B[:,j]) for j in range(n)] for i in range(n)]\n", + "C1h = [[rc[(i * n + j) % p].apply(np.dot, A[i,:], B[:,j]) \n", + " for j in range(n)] for i in range(n)]\n", "# Wait until the calculation is done\n", - "dview.wait()\n" + "dview.wait()\n", + "```" ] }, { @@ -1507,7 +1552,7 @@ } }, "source": [ - "This, however, produces $n^2$ short tasks and the overhead (latency) is just overwhelming.\n", + "It produces $n^2$ short tasks and the overhead (latency) is just overwhelming.\n", "\n", "We want to calculate\n", "\n", @@ -1798,7 +1843,7 @@ "source": [ "Nothing says, we have to stop at 4 tiles nor do we have to use square tiles. We could also recursively subdivide our tiles.\n", "\n", - "The code is not any faster, because our implementation of numpy already blocks the matrices and uses all cores, but it shows the principle." + "The code is not any faster, because our implementation of numpy already blocks the matrices and uses all cores, but it shows the principle. Also, remember that we are transferring the data to the engines in every call!" ] }, { @@ -1812,9 +1857,9 @@ "metadata": { "celltoolbar": "Slideshow", "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1826,7 +1871,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/07_LocalTaskParallel.ipynb b/solutions/07_LocalTaskParallel.ipynb index 06251ec4881e8d47934b2a36085ca02f223552ef..bdd868d7b777474ff12dc906f73536d85a982960 100644 --- a/solutions/07_LocalTaskParallel.ipynb +++ b/solutions/07_LocalTaskParallel.ipynb @@ -60,6 +60,17 @@ "import numpy as np" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%px --local\n", + "import threadpoolctl\n", + "threadpoolctl.threadpool_limits(limits=32, user_api='blas')" + ] + }, { "cell_type": "code", "execution_count": null, @@ -326,6 +337,55 @@ "BlockMatrixMultiply?" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's increase the size of the matrix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n = 16384\n", + "A = np.random.random([n, n])\n", + "B = np.random.random([n, n])\n", + "C = np.dot(A, B)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit C=np.dot(A,B)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit BlockMatrixMultiply(A, B, n // 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%timeit BlockMatrixMultiplyLB(A, B, lview, n)\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 2) # 4 tasks\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 4) # 16 tasks\n", + "%timeit BlockMatrixMultiplyLB(A, B, lview, n // 8) # 64 tasks" + ] + }, { "cell_type": "code", "execution_count": null, @@ -336,9 +396,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -350,7 +410,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/08_Numba vectorize.ipynb b/solutions/08_Numba vectorize.ipynb index 4661df7bc88f11458af9ca861e1db6aaccc79d90..85f5eaa9aae3df49f0e0c72347379684e96627cf 100644 --- a/solutions/08_Numba vectorize.ipynb +++ b/solutions/08_Numba vectorize.ipynb @@ -11,7 +11,7 @@ "# Numba vectorize\n", "\n", "<div class=\"dateauthor\">\n", - "21 June 2022 | Jan H. Meinke\n", + "13 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -597,9 +597,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -611,7 +611,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/09_NumbaIntro.ipynb b/solutions/09_NumbaIntro.ipynb index 01d6aca2adbb101da4135b4bcd904a1053fa304d..f2f784c0331c9060cea40da0fe8923c415c8a9c6 100644 --- a/solutions/09_NumbaIntro.ipynb +++ b/solutions/09_NumbaIntro.ipynb @@ -11,7 +11,7 @@ "# Introduction to Numba's jit compiler\n", "\n", "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -23,7 +23,7 @@ } }, "source": [ - "Numba provides a just-in-time (jit) compiler, a decorator `vectorize` that we can use to define `ufunc`s that are fast and flexible, and an interface to CUDA- and ROCm-capable GPUs that allows us to write CUDA kernels in Python! In this notebook, we'll focus on the jit compiler." + "Numba provides a just-in-time (jit) compiler, a decorator `vectorize` that we can use to define `ufunc`s that are fast and flexible, and an interface to CUDA- and ROCm-capable GPUs that allows us to write GPU kernels in Python! In this notebook, we'll focus on the jit compiler." ] }, { @@ -54,13 +54,14 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy\n", - "from numba import jit\n", + "from numba import njit as jit\n", "from matplotlib import pyplot as plt " ] }, @@ -82,7 +83,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -104,13 +106,26 @@ "When we call `python_sum`, the interpreter goes through it line by line. For each item it has to interpret `res += x` and execute it, i.e., call apropriate C routines that have been compiled for the processor. The only requirements for `a` in this function are that it is iterable and its elements support the `+` operator. For the following little benchmark, we'll use an `ndarray` of random numbers." ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + }, + "tags": [] + }, + "source": [ + "The Faster CPython project is working on performance improvements for CPython. This includes inlining function calls and choosing special paths if the interpreter detects that types and objects are stable [PEP659](https://peps.python.org/pep-0659/)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -123,7 +138,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -136,7 +152,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -151,7 +168,7 @@ } }, "source": [ - "Please calculate the floating point operations per second for `python_sum`. Btw., remember the peak performance of a single core on JUWELS is about 40 GFLOP/s." + "Please calculate the floating point operations per second for `python_sum`. Btw., remember the peak performance of a single core on JUSUF is about 36 GFLOP/s." ] }, { @@ -270,7 +287,7 @@ } }, "source": [ - "Yes, there are good reasons to love Python (and other higher programming languages).\n", + "Yes, there are good reasons to love higher programming languages.\n", "\n", "Let's run the code:\n", "```\n", @@ -278,7 +295,7 @@ "Sum: 5033.24 in 0.717281 µs. 13941.5 MFLOP. \n", "```\n", "\n", - "The function takes about 0.7 µs. This is about 2000 times faster than the interpreted Python loop. \n", + "The function takes about 0.7 µs. This is more than 10,000 times faster than the interpreted Python loop. \n", "Wouldn't it be great if we could take the Python code in `python_sum` and compile it to machine \n", "code to get some of this speedup?" ] @@ -339,7 +356,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "numba_sum = jit(python_sum)" @@ -351,7 +370,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -375,7 +395,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -439,7 +460,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -463,7 +485,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -488,7 +511,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -507,7 +531,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -521,7 +546,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -534,7 +560,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -558,7 +585,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -578,7 +606,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -591,7 +620,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -604,7 +634,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -618,7 +649,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -631,7 +663,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -644,7 +677,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -662,7 +696,9 @@ }, "source": [ "### Exercise: prange\n", - "Numba can parallelize loops with ``prange``. Import ``prange`` from numba and change the range in row into a prange. You also need to add the arguments ``nopython=True`` and ``parallel=True`` to the jit decorator.\n", + "Numba can parallelize loops with ``prange``. Import ``prange`` from numba and change the range in row into a prange. You also need to add the ``parallel=True`` to the jit decorator.\n", + "\n", + "We imported ``njit`` as ``jit`` at the beginning of the notebooks since ``nopython=True`` will soon become the default. If you use ```from numba import jit``` you need to explicitly write ``nopython=True`` below.\n", "\n", "Rerun and compare.\n", "\n", @@ -683,9 +719,10 @@ "outputs": [], "source": [ "# Solution for the first part\n", + "from numba import njit as jit # use nopython=True as default\n", "from numba import prange\n", "\n", - "@jit(nopython=True, parallel=True)\n", + "@jit(parallel=True)\n", "def numba_mm_par(a,b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in prange(a.shape[0]):\n", @@ -725,7 +762,10 @@ "outputs": [], "source": [ "# Solution for the extra credit:\n", - "@jit(nopython=True, parallel=True)\n", + "from numba import njit as jit # use nopython=True as default\n", + "from numba import prange\n", + "\n", + "@jit(parallel=True)\n", "def numba_mm_kj_par(a,b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in prange(a.shape[0]):\n", @@ -770,7 +810,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -789,7 +830,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -802,7 +844,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -826,11 +869,12 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ - "@jit(nopython=True)\n", + "@jit\n", "def numba_mm3(a, b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in range(a.shape[0]):\n", @@ -845,7 +889,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -858,11 +903,12 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ - "@jit(nopython = True)\n", + "@jit\n", "def numba_mm4(a, b):\n", " res = numpy.zeros((a.shape[0], b.shape[1]))\n", " for row in range(a.shape[0]):\n", @@ -878,7 +924,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -891,7 +938,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -1365,11 +1413,7 @@ "source": [ "This is much better. The `ps` at the end of `vaddps` stands for *packed single precision* indicating \n", "a SIMD instruction. The `ymm` registers used are 256 bits wide, which corresponds to 8 single precision\n", - "numbers at a time.\n", - "\n", - "Skylake-X also has `zmm` registers with a width of 512 bit or 16 single precision numbers, but when\n", - "they are used the maximum frequency of the processor is reduced. It can happen that the performance \n", - "using `ymm` registers at higher frequency is actually better." + "numbers at a time." ] }, { @@ -1458,9 +1502,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1472,7 +1516,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/10_Speeding up your code with Cython.ipynb b/solutions/10_Speeding up your code with Cython.ipynb index 67810811ca061dd644c5218b0429c434674a0bca..b7f8e44e14834de7f5035697086424801d8c0984 100644 --- a/solutions/10_Speeding up your code with Cython.ipynb +++ b/solutions/10_Speeding up your code with Cython.ipynb @@ -20,7 +20,7 @@ }, "source": [ "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -65,7 +65,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -78,7 +79,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -103,7 +105,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -121,7 +124,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -135,7 +139,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -149,7 +154,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -164,7 +170,7 @@ } }, "source": [ - "Elementwise access to NumPy arrays can in the meantime be just as fast as access for lists.\n", + "Elementwise access to NumPy arrays is often slower as elementwise access to lists.\n", "\n", "Now let us invoke Cython" ] @@ -175,7 +181,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -194,7 +201,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -208,7 +216,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -232,7 +241,8 @@ "metadata": { "slideshow": { "slide_type": "subslide" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -277,7 +287,7 @@ } }, "source": [ - "The arguments `v` and `w` are very general. If we know, however, that we are only going to pass ndarrays of integers, we can be more specific:" + "The arguments `v` and `w` are very general. If we know that we are only going to pass ndarrays of integers, we can be more specific:" ] }, { @@ -286,7 +296,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -320,7 +331,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -344,7 +356,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -368,7 +381,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -403,7 +417,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -430,7 +445,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -454,7 +470,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -482,7 +499,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -507,7 +525,8 @@ "metadata": { "slideshow": { "slide_type": "-" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -548,7 +567,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -576,7 +596,7 @@ "source": [ "Since Cython generates compiled Python extensions, we can release the GIL and run things in parallel if we don't make calls to the Python API.\n", "\n", - "As we've seen our inner loop is free of any Python calls (the annotated code is white). Since OpenMP supports reductions, we can parallelize the loop using Cython's ``prange``. Within ``prange`` we have to explicitely release the GIL by setting ``nogil=True``. We also need to pass the compiler and linker flags for OpenMP." + "As we've seen our inner loop is free of any Python calls (the annotated code is white). Since OpenMP supports reductions, we can parallelize the loop using Cython's ``prange``. Within ``prange`` we have to explicitly release the GIL by setting ``nogil=True``. We also need to pass the compiler and linker flags for OpenMP." ] }, { @@ -585,7 +605,8 @@ "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -615,7 +636,8 @@ "metadata": { "slideshow": { "slide_type": "skip" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -798,9 +820,9 @@ } }, "source": [ - "So far we have used IPython and the Cython magic to build and test our extension within a notebook. Once we are satisfied and want to put our extension in production, we want to be able to build the extension without IPython. The recommended way to do that is to use `distutils` and a `setup.py` file.\n", + "So far we have used IPython and the Cython magic to build and test our extension within a notebook. Once we are satisfied and want to put our extension in production, we want to be able to build the extension without IPython. The recommended way to do that is to use the `setuptools` provided with Cython and a `setup.py` file. For details see the [documentation](https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#basic-setup-py).\n", "\n", - "Note that distutils has been marked as deprecated as of Python 3.10, but we are still using Python 3.9.x on our systems." + "Note that distutils has been marked as deprecated as of Python 3.10." ] }, { @@ -819,7 +841,7 @@ "metadata": {}, "source": [ "```python\n", - "from distutils.core import setup\n", + "from setuptools import setup\n", "from Cython.Build import cythonize\n", "setup(name=\"Sum of integers\",\n", " ext_modules=cythonize(\"sum.pyx\"),\n", @@ -855,8 +877,7 @@ "metadata": {}, "source": [ "```python\n", - "from distutils.core import setup\n", - "from distutils.extension import Extension\n", + "from setuptools import Extension, setup\n", "from Cython.Build import cythonize\n", "\n", "ext_modules = [\n", @@ -1358,9 +1379,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022 (local)", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1372,7 +1393,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/11_Writing your own Python bindings.ipynb b/solutions/11_Writing your own Python bindings.ipynb index 79ec3b60839100d956dd0970538cb1fbfb02eeeb..ec69f6d7cc68e7e07b456d106fc0bed6f2f921ae 100644 --- a/solutions/11_Writing your own Python bindings.ipynb +++ b/solutions/11_Writing your own Python bindings.ipynb @@ -16,7 +16,7 @@ "metadata": {}, "source": [ "<div class=\"dateauthor\">\n", - "22 June 2022 | Jan H. Meinke\n", + "14 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -92,7 +92,7 @@ "\n", "Wait until the build has finished and then continue with this notebook.\n", "\n", - "**Tip:** You can open a terminal from within JupyterLab by going to File->New->Terminal. To get the right environment in a terminal `source $PROJECT_training2119/hpcpy22`." + "**Tip:** You can open a terminal from within JupyterLab by going to File->New->Terminal. To get the right environment in a terminal `source $PROJECT_training2318/hpcpy23`." ] }, { @@ -222,7 +222,7 @@ } }, "source": [ - "What if word_frequency had been written Fortran?" + "What if word_frequency had been written in Fortran?" ] }, { @@ -268,7 +268,7 @@ "source": [ "### Exercise\n", "Use the terminal that you used earlier to run `build.sh` or open a new one. Make sure you are in the \n", - "tutorial directory. Source `hpcpy22` using `source $PROJECT/hpcpy22`. Change into code/textstats/ and compile \n", + "tutorial directory. Source `hpcpy23` using `source $PROJECT/hpcpy23`. Change into code/textstats/ and compile \n", "the file word_frequency.F90 with the following command:\n", "\n", "```bash\n", @@ -414,7 +414,7 @@ "source": [ "Now, the name of the function will always be `word_frequency`. `bind` takes as optional argument the name under which the function should be known to C: bind(c, name=\"wf\") would let us call the function as `wf(filename, word)` from C (and Python).\n", "\n", - "To learn more about [CFFI](https://bitbucket.org/cffi/cffi) look at it's [documentation](https://cffi.readthedocs.io/en/latest/)." + "To learn more about CFFI look at it's [documentation](https://cffi.readthedocs.io/en/latest/)." ] }, { @@ -1527,9 +1527,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1541,7 +1541,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/12_Introduction to MPI.ipynb b/solutions/12_Introduction to MPI.ipynb index 924eff5820635e0df31df578cd7b4227043969a2..dc37c65ac761011aa573c411bd5ece7e15778983 100644 --- a/solutions/12_Introduction to MPI.ipynb +++ b/solutions/12_Introduction to MPI.ipynb @@ -11,7 +11,7 @@ "# Introduction to MPI\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -218,7 +218,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A slbio python3 hello_mpi.py " + "!srun --pty -n 4 -p batch -A training2318 --reservation tr2318-20230615-cpu python3 hello_mpi.py " ] }, { @@ -315,7 +315,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A training2219 --time 00:10:00 python3 hello_ptp.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 hello_ptp.py" ] }, { @@ -348,7 +348,7 @@ } }, "source": [ - "If you need to send data to another rank and receive data from the same rank, combining `Send` and `Recv` command is dangerous and easily leads to deadlocks. Use `Sendrecv` instead." + "If you need to send data to another rank and receive data from the same rank, combining `Send` and `Recv` commands is dangerous and easily leads to deadlocks. Use `Sendrecv` instead." ] }, { @@ -405,7 +405,7 @@ }, "outputs": [], "source": [ - "!srun --pty -n 4 -p batch -A training2219 --time 00:10:00 python3 hello_sendrecv.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 hello_sendrecv.py" ] }, { @@ -458,7 +458,7 @@ "tags": [] }, "source": [ - "Next, we'll sum up the partial results and then use sum up (`reduce`) the partial results:" + "Next, we'll sum up `a_partial` and then use `reduce` to sum up the partial results:" ] }, { @@ -558,7 +558,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_reduction.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_reduction.py" ] }, { @@ -580,7 +580,7 @@ } }, "source": [ - "`mpi4py` offers two version of many calls. The first one is written in uppercase. It uses memory buffers, e.g., `np.array`, and maps the call directly to the appropriate C call. The second version is written in lower case and takes arbitrary Python object. The result is given as the return value. Note, that for the uppercase versions all `a_partial` must have the same size!" + "`mpi4py` offers two version of many calls. The first one is written in uppercase. It uses memory buffers, e.g., `numpy.array`, and maps the call directly to the appropriate C call. The second version is written in lower case and takes arbitrary Python object. The result is given as the return value. Note, that for the uppercase versions all `a_partial` must have the same size!" ] }, { @@ -665,7 +665,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_upper.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_upper.py" ] }, { @@ -676,7 +676,7 @@ } }, "source": [ - "The following works independent of the size of a_partial:" + "The following code uses the lowercase versions of the calls and works independent of the size of a_partial:" ] }, { @@ -751,7 +751,7 @@ }, "outputs": [], "source": [ - "!srun -n 4 -p batch -A training2219 --time 00:10:00 python3 mpi_lower.py" + "!srun --pty -n 4 -p batch -A training2318 --time 00:10:00 --reservation tr2318-20230615-cpu python3 mpi_lower.py" ] }, { @@ -866,7 +866,7 @@ "%%writefile mpi_ptp2.py\n", "\n", "from mpi4py import MPI\n", - "import numpy as np\n", + "import numpy\n", "\n", "comm=MPI.COMM_WORLD\n", "rank = comm.Get_rank()\n", @@ -876,15 +876,29 @@ " exit(1)\n", "\n", "nRnd=10000\n", - "part = np.random.default_rng().normal(0.0, 1.0, nRnd)\n", + "part = numpy.random.default_rng().normal(0.0, 1.0, nRnd)\n", "minval=part.min()\n", "maxval=part.max()\n", - "globmin=np.zeros(1)\n", - "globmax=np.zeros(1)\n", + "globmin=numpy.zeros(1)\n", + "globmax=numpy.zeros(1)\n", "comm.Reduce(minval,globmin,MPI.MIN)\n", "comm.Reduce(maxval,globmax,MPI.MAX)\n", "if rank == 0:\n", - " print(\"np.ptp=n.d., mpi_ptp=%f\"%(globmax-globmin))" + " print(\"numpy.ptp=n.d., mpi_ptp=%f\"%(globmax-globmin))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Solution" + ] + }, + "outputs": [], + "source": [ + "!srun --pty -n 4 -A training2318 --time 00:10:00 python3 mpi_ptp1.py\n", + "!srun --pty -n 4 -A training2318 --time 00:10:00 python3 mpi_ptp2.py" ] }, { @@ -1128,7 +1142,7 @@ "3. Time the execution of the program from the second part of the exercise.\n", "\n", " a) Keep the size of the system constant and increase the number of ranks/domain, e.g., using 2, \n", - " 4, 8, and 16 ranks. How \n", + " 4, 8, and 16 ranks. How does the timing change?\n", " \n", " b) Keep the size of the domains constant, i.e., the total size is a multiple of the number of \n", " ranks. Again increase the number of ranks\n", @@ -1542,7 +1556,7 @@ "source": [ "Click on the ``+``-sign at the top of the Files tab on the left to start a new launcher. In the launcher click on Terminal. A terminal will open as a new tab. Grab the tab and pull it to the right to have the terminal next to your notebook.\n", "\n", - "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2219/hpcpy22`." + "**Note**: The terminal does not have the same modules loaded as the notebook. To fix that type `source $PROJECT_training2318/hpcpy23`." ] }, { @@ -1560,7 +1574,7 @@ "\n", "```bash\n", "export OMP_NUM_THREADS=32\n", - "srun -n 4 -c 32 --ntasks-per-node 4 --time 00:30:00 -A training2219 ipengine start\n", + "srun -n 4 -c 32 --ntasks-per-node 4 --time 00:30:00 -A training2318 --reservation tr2318-20230615-cpu ipengine start\n", "```\n", "\n", "**Note**, you can can start the controller and the engines in separate terminals. That will keep the output separate." @@ -1934,13 +1948,20 @@ "source": [ "sum(sum_partial)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1952,7 +1973,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/solutions/13_Introduction to CuPy.ipynb b/solutions/13_Introduction to CuPy.ipynb index f28ade4ac79fefe6f3abf9fcd4d1131bebbd6762..807eb348f8c01c4c666a8ccbf9504315882b0e40 100644 --- a/solutions/13_Introduction to CuPy.ipynb +++ b/solutions/13_Introduction to CuPy.ipynb @@ -10,7 +10,7 @@ "source": [ "# Introduction to CuPy\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>\n", "<img src=\"images/cupy.png\" style=\"float:right\">" ] @@ -134,7 +134,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul.py" + "!srun --pty -N 1 -p gpus -A training2318 --time 00:10:00 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul.py" ] }, { @@ -214,7 +214,21 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul_w_timing.py" + "!srun --pty -N 1 -p gpus -A training2318 --time 00:10:00 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul_w_timing.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!srun --pty -N 1 -p develgpus -A training2318 --time 00:10:00 python3 cupy_matrix_mul_w_timing.py" ] }, { @@ -433,7 +447,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_matrix_mul_w_timing2.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cupy_matrix_mul_w_timing2.py" ] }, { @@ -492,7 +506,7 @@ }, "outputs": [], "source": [ - "!srun -p batch -n 1 -c 256 -A training2219 python numpy_matrix_mul_w_timing2.py" + "!srun -p batch -n 1 -c 256 -A training2318 --pty --reservation tr2318-20230615-cpu python3 numpy_matrix_mul_w_timing2.py" ] }, { @@ -651,7 +665,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cupy_to_and_fro.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cupy_to_and_fro.py" ] }, { @@ -739,9 +753,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -753,7 +767,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/solutions/14_CUDA for Python.ipynb b/solutions/14_CUDA for Python.ipynb index 3d2e5646d0169cc2ff72775de13c02834df14bd5..6c82362a4f7ef4e45c727abbf832ae7aa32b30ca 100644 --- a/solutions/14_CUDA for Python.ipynb +++ b/solutions/14_CUDA for Python.ipynb @@ -11,7 +11,7 @@ "# Numba and GPUs\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -158,7 +158,7 @@ " return i\n", " return maxtime\n", "\n", - "if __name__ == \"__main__:\n", + "if __name__ == \"__main__\":\n", " import numpy\n", " x = numpy.linspace(-2, 2, 500)\n", " y = numpy.linspace(-1.5, 1.5, 375)\n", @@ -221,7 +221,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython mandelbrot_vectorize_cuda.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython mandelbrot_vectorize_cuda.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean():.3f}±{t_gpu.std():.3f} s.\")" ] @@ -344,7 +344,7 @@ "source": [ "GPUs were (and are) made to display graphics on your screen. It doesn't matter how quickly a GPU can update a single pixel. It's important how quickly it can update all of the pixels on the screen (more than 2 million on an HD display). In addition it often must perform the same operation on a lot of vertices or pixels. \n", "\n", - "These two conditions let to a different execution model." + "These two conditions led to a different execution model." ] }, { @@ -937,7 +937,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot1.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot1.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1078,7 +1078,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot2.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot2.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1207,7 +1207,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot3.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot3.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1459,7 +1459,7 @@ }, "outputs": [], "source": [ - "res = !srun -p gpus -A training2219 ipython cuda_mandelbrot4.ipy\n", + "res = !srun -p gpus -A training2318 --reservation tr2318-20230615-gpu ipython cuda_mandelbrot4.ipy\n", "t_gpu = numpy.array(eval(res[-1]))\n", "print(f\"Runtime: {t_gpu.mean() * 1000:.3f}±{t_gpu.std() * 1000:.3f} ms.\")" ] @@ -1581,7 +1581,7 @@ }, "outputs": [], "source": [ - "!srun -p gpus -A training2219 python cuda_matrixmul.py" + "!srun -p gpus -A training2318 --reservation tr2318-20230615-gpu python3 cuda_matrixmul.py" ] }, { @@ -1752,9 +1752,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -1766,7 +1766,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/solutions/15_CUDA and MPI.ipynb b/solutions/15_CUDA and MPI.ipynb index c8fde4a57b6c91c4a0176090963252740b89b0d1..982c4a3aa2c129fec067be8b1f5ea86a4a85bd13 100644 --- a/solutions/15_CUDA and MPI.ipynb +++ b/solutions/15_CUDA and MPI.ipynb @@ -11,7 +11,7 @@ "# CUDA for Python and MPI4Py\n", "\n", "<div class=\"dateauthor\">\n", - "23 June 2022 | Jan H. Meinke\n", + "15 June 2023 | Jan H. Meinke\n", "</div>" ] }, @@ -338,6 +338,57 @@ " main()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "Solution" + ] + }, + "outputs": [], + "source": [ + "!srun -n 4 -c 32 -p gpus -A training2318 python3 parallel_shift.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Picking a device" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "notes" + } + }, + "source": [ + "As you see from the example above, there's nothing special when using MPI with GPUs. The one thing that might bite you is using *multiple* MPI ranks for *multiple* GPUs on a *single* node. In this case, you might have to tell your MPI rank which GPU to use.\n", + "\n", + "If you have, for example, 4 GPUs and you know that your scheduler chooses a compact configuration, i.e., rank 0, 1, 2, 3 are on the first node, rank 4, 5, 6, 7 are on the second node, etc., you can use you rank to assign a GPU to your process:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "source": [ + "```python\n", + "\n", + "cuda.select_device(my_rank % number_of_gpus_per_node)\n", + "```" + ] + }, { "cell_type": "code", "execution_count": null, @@ -409,44 +460,6 @@ " main()" ] }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "## Picking a device" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, - "source": [ - "As you see from the example above, there's nothing special when using MPI with GPUs. The one thing that might bite you is using *multiple* MPI ranks for *multiple* GPUs on a *single* node. In this case, you might have to tell your MPI rank which GPU to use.\n", - "\n", - "If you have, for example, 4 GPUs and you know that your scheduler chooses a compact configuration, i.e., rank 0, 1, 2, 3 are on the first node, rank 4, 5, 6, 7 are on the second node, etc., you can use you rank to assign a GPU to your process:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "```python\n", - "\n", - "cuda.select_device(my_rank % number_of_gpus_per_node)\n", - "```" - ] - }, { "cell_type": "markdown", "metadata": { @@ -493,6 +506,7 @@ }, "outputs": [], "source": [ + "%%writefile cuda_mpi_mandelbrot.py\n", "# Solution for calculating the Mandelbrot set on 4 GPUs/node\n", "import numpy\n", "import mpi4py.MPI as MPI\n", @@ -584,9 +598,20 @@ "slideshow": { "slide_type": "skip" }, - "tags": [] + "tags": [ + "Solution" + ] }, "outputs": [], + "source": [ + "!srun -n 4 -c 32 -p gpus -A training2318 python3 cuda_mpi_mandelbrot.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [] }, { @@ -655,7 +680,7 @@ "import cupy\n", "# Create an array with N * number_of_ranks elements\n", "N = 1000\n", - "a_partial = cup.empty(N)\n", + "a_partial = cupy.empty(N)\n", "if my_rank == 0:\n", " a = cupy.random.random(N * number_of_ranks)\n", "else:\n", @@ -768,12 +793,12 @@ " block = 256\n", " grid = N // block if N % block == 0 else N // block + 1 \n", " shift[grid, block](-0.75, a_partial)\n", - " print(f\"[{my_rank}] The average of a_partial is {cupy.mean(a_partial):.3f}\")\n", + " print(f\"[{my_rank}] The average of a_partial after shifting is {cupy.mean(a_partial):.3f}\")\n", " # Collect the data again on rank 0\n", " comm.Gather(a_partial, a, root = 0) \n", "\n", " if my_rank == 0:\n", - " print(\"The average of a is %.2f\" % cupy.mean(a)) # Result should be near zero.\n", + " print(\"The average of a after shifting is %.2f\" % cupy.mean(a)) # Result should be near zero.\n", " \n", " \n", "if __name__ == \"__main__\":\n", @@ -791,15 +816,22 @@ }, "outputs": [], "source": [ - "!srun -p gpus -n 4 -A training2219 xenv -L mpi-settings/CUDA python cuda_aware_mpi_shift.py" + "!srun -p gpus -n 4 -A training2318 --reservation tr2318-20230615-gpu xenv -L mpi-settings/CUDA python3 cuda_aware_mpi_shift.py" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2022", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy22" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -811,7 +843,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/solutions/16_Introduction to Dask.ipynb b/solutions/16_Introduction to Dask.ipynb index 2aedd855d9692c467d2ece96eead2af9728c17c6..c57ff9a764d016a54105b794cb165416b0e3c87e 100644 --- a/solutions/16_Introduction to Dask.ipynb +++ b/solutions/16_Introduction to Dask.ipynb @@ -11,7 +11,7 @@ "# Introduction to Dask\n", "\n", "<div class=\"dateauthor\">\n", - "10 June 2021 | Olav Zimmermann\n", + "16 June 2023 | Olav Zimmermann\n", "</div>" ] }, @@ -170,9 +170,9 @@ }, "source": [ "Poll:\n", - "- minimal walltime: A) 3s B) 4s C) 6s D) 6.5s\n", - "- how many tasks in task graph: A) 1 B) 8 C) 16 D) 18 \n", - "- max tasks in the same time as 8 tasks: A) 8 B) 24 C) 48 D) 96" + "- minimal walltime: A) 3s B) 4.5s C) 6s D) 6.5s\n", + "- how many tasks in task graph: A) 10 B) 18 C) 25 D) 32 \n", + "- max tasks in the same time as 8 tasks: A) 8 B) 48 C) 96 D) 256" ] }, { @@ -183,7 +183,7 @@ } }, "source": [ - "The task graph generated by `dask` can be visualized (don't try this for large graphs!)." + "The task graph generated by `dask` can be visualized (don't try this for large graphs, i.e. more input tasks!)." ] }, { @@ -236,7 +236,8 @@ "slide_type": "skip" }, "tags": [ - "Poll" + "Poll", + "Solution" ] }, "source": [ @@ -301,7 +302,8 @@ "source": [ "l=[x for x in range(1000000)]\n", "s= db.from_sequence(l,npartitions=4) # you can manually set the number of partitions\n", - "mysum=s.fold(add) # fold performs a parallel reduction " + "mysum=s.fold(add) # fold performs a parallel reduction \n", + "mysum.dask # another inpection method for task graphs in dask" ] }, { @@ -324,7 +326,7 @@ "outputs": [], "source": [ "%time result=mysum.compute()\n", - "result=mysum.compute\n", + "result=mysum.compute()\n", "result" ] }, @@ -338,8 +340,7 @@ }, "outputs": [], "source": [ - "%time r=list(s.filter(lambda x: x % 2 == 0).map(lambda x: x * 1.2))\n", - "r[:5] #note: apparently no type coercion!" + "%time r=list(s.filter(lambda x: x % 2 == 0).map(lambda x: x * 1.2))" ] }, { @@ -363,7 +364,8 @@ "source": [ "**Exercise:**\n", "\n", - "Code the same operations without dask, i.e. using a) just python and b) using numpy and measure the runtime of the calculations. \n", + "Code the same operations without dask, i.e. using a) just python and b) using numpy and measure the runtime of the calculations.\n", + "Make sure to return a list in all cases.\n", "\n", "Conclusions? " ] @@ -537,9 +539,25 @@ }, "outputs": [], "source": [ - "%%timeit import numpy as np\n", + "# %%timeit \n", + "import numpy as np\n", "arr=np.array(l)\n", - "out2=(arr[arr%2==0]*1.2).tolist()" + "out2=(arr[arr%2==0]*1.2).tolist()\n", + "out2[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [ + "Solution" + ] + }, + "source": [ + "Take home msgs: \n", + "- a) parallelization has overhead, simple calculations on the data are often slower than their single threaded counterparts.\n", + "- b) dask bag is a flexible but slow container that is appropriate for parallel processing of file lists or text but not for numeric calculations on lists\n", + "- c) creating data structures can be expensive (`dask.bag.from_sequence` needs almost 500ms to convert the list to a dask bag." ] }, { @@ -585,7 +603,7 @@ "source": [ "## dask.array\n", "\n", - "**`dask.dataframe`** is the distributed equivalent of numpy ndarray." + "**`dask.array`** is the distributed equivalent of numpy ndarray." ] }, { @@ -681,7 +699,7 @@ "outputs": [], "source": [ "x_dask = da.random.normal(10, 0.1, size=(10000,3000), chunks=(5000,3000)) # using as many chunks as CPU cores is good for random number calculation\n", - "x_rechunked=x_dask.rechunk((1000,3000)) # larger chunks are no longer better for dot product calculation\n", + "x_rechunked=x_dask.rechunk((2500,3000)) # larger chunks are no longer better for dot product calculation\n", "y_dask = x_rechunked.transpose()\n", "result=x_dask.dot(y_dask)\n", "#with ProgressBar():\n", @@ -709,7 +727,7 @@ } }, "source": [ - "`dask.distributed` features a sophisticated **web-based monitoring** based on the package `bokeh`. See **Dashboard** when you started the client above that shows the address and port of the web server." + "`dask.distributed` features a sophisticated **web-based monitoring** based on the package `bokeh`." ] }, { @@ -775,9 +793,9 @@ ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2021", + "display_name": "HPC Python 2023 (local)", "language": "python", - "name": "hpcpy21" + "name": "hpcpy23" }, "language_info": { "codemirror_mode": { @@ -789,7 +807,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/solutions/17_Debugging.ipynb b/solutions/17_Debugging.ipynb index 22117b23f48eb2426597982c144d0aadf923c7de..dbd87d3b7b918b6cec67cd9dd28d88982d5a3f9e 100644 --- a/solutions/17_Debugging.ipynb +++ b/solutions/17_Debugging.ipynb @@ -6,17 +6,13 @@ "source": [ "# Debugging Python\n", "<div class=\"dateauthor\">\n", - "07 June 2021 | Jan H. Meinke\n", + "06 June 2023 | Jan H. Meinke, Olav Zimmermann\n", "</div>" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ "What do you do if a program doesn't produce the results you want? You can stare at the code and try to figure out the mistake. You can add lots of print statements to your code. Or you can use a debugger.\n", "\n", @@ -25,221 +21,184 @@ }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "Debugging has its own terminology: You step in and out of functions. You move up and down the stack. You set break points, inspect variables, etc. This is the basic functionality that every debugger should (and every debugger I know does) support.\n", - "\n", - "In this notebook, we'll look at debugging a program with PDB in the notebook and pudb in a terminal window. You'll learn how to start a debugging session and do all the things, I talked about in the previous paragraph." + "### _\"Debuggers don't remove bugs. They only show them in slow motion.\"_ (Unknown)" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## PDB" + "Debugging has its own terminology: You step in and out of functions. You move up and down the call stack. You set break points, inspect variables, etc. This is the basic functionality that every debugger should (and every debugger we know does) support.\n", + "\n", + "In this notebook, we'll introduce several different debuggers. We'll debug code within a notebook cell with the builtin debugger of JupyterLab as well as with PDB. Then we will use pudb to debug a program in a terminal window. You'll learn how to start a debugging session and do all the things, described in the previous paragraph." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "Python comes with its own debugger called \"The Python debugger\" (pdb). PDB is available from within a notebook, but it's not very convenient to use." + "## Runtime debugging with the JupyterLab builtin debugger" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "source": [ - "Let's take the following function, which contains a bug and throws an exception." + "Before running the following cell try to guess what will happen: will it throw an error or a warning or will it execute normally? \n", + "If it is one of the latter two cases, what will it print?" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, + "metadata": {}, "outputs": [], "source": [ - "#%%writefile buggy.py\n", - "def imabuggyincrement(i,a):\n", - " \"\"\"Increment a[i] by 1.\"\"\"\n", - " if ii < len(a):\n", - " a[i] += 1;\n", + "a,b,c,d,e=range(5)\n", + "from numpy import *\n", + "f=array([a,b,c,d,e], dtype=int)\n", + "def doubleme(input_array):\n", + " result=input_array*2\n", + " return result\n", + "def doublesummer(input_vec):\n", + " result=doubleme(input_vec)\n", + " result=result.sum()\n", + " return result\n", + "print(f'The result is {doublesummer(f)}.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using a debugger to execute a code (or part of it) step by step is also called **runtime debugging**. \n", "\n", - "a = list(range(10))\n", - "ii = 4\n", - "imabuggyincrement(10, a)" + "You can switch on JupyterLab's internal debugger by clicking on the small bug icon at the top right of the notebook, next to the kernel name. You will see several panels appear in the right sidebar. In addition, each code cell of the notebook now got line numbers.\n", + "\n", + "Click on the line number of line 11 in the code cell above. A red dot appearing in front of the line number indicates that you just set a **break point**. At a break point the debugger will stop, allowing you to inspect the state of each variable that is defined at this point. To start the debugger and let it execute the code up to the break point just re-execute the cell [Shift-Return].\n", + "\n", + "The navigation symbols at the top of the CallStack panel will now no longer be grayed out and allow you to execute the code line by line. With \"next\" you step over function calls within the line. With \"step in\" you can jump into the python functions called in this line of code (but not into any C library functions).\n", + "\n", + "The \"Variables\" panel allows you to view either the global or the local variables and to switch between tree and table view. (for arrays the table view is preferable)\n", + "\n", + "**Exercise:** Try to find the bug in the code above. You can set a break point at any line. In case that you want to reset the kernel use the circle arrow button at the top of the notebook.\n", + "\n", + "**Note:** The builtin debugger interface is a very recent addition to JupyterHub and only provides very limited functionality and convenience.\n" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, + "metadata": {}, "source": [ - "## Debug magic" + "## Post mortem debugging with PDB" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "If a cell has just thrown an exception, you can inspect it with the %debug magic. Try `help` to see the available commands. Type `exit` to leave the debugger." + "If a program fails, you can no longer execute the code step by step. Nevertheless, the debugger can help you to inspect the state of the code at the time of failure. This usage is also called **post mortem debugging**. Python comes with its own debugger called \"The Python debugger\" (pdb). PDB is also available from within a notebook, but it's not very convenient to use." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Use `p i` to print the value of `i`. You can also try to print out the value of `a[i]` using `p a[i]`. Inspect the other variables. Do you see what went wrong?" + "Let's take the following function, which contains a bug and throws an exception. **(Please switch off the internal debugger before executing the cell!)**" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, + "metadata": {}, "outputs": [], "source": [ - "%debug" + "#%%writefile buggy.py\n", + "def imabuggyincrement(i,a):\n", + " \"\"\"Increment a[i] by 1.\"\"\"\n", + " if ii < len(a):\n", + " a[i] += 1;\n", + "\n", + "a = list(range(10))\n", + "ii = 4\n", + "imabuggyincrement(10, a)" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Available debuggers" + "### The %debug magic of pdb for notebooks" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, + "metadata": {}, "source": [ - "* pdb (builtin)\n", - "* pudb\n", - "* IDEs (All the IDEs we mentioned have debugging support)" + "The cell above has just thrown an exception and within a notebook you can use the `%debug` magic provided by pdb to inspect it. Try `help` to see the available commands. Type `exit` to leave the debugger." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "source": [ - "Uncomment the ``%%writefile`` magic before the function defintion of ``imabuggyincrement`` and execute the cell again so that it gets written to file buggy.py" + "Use `p i` to print the value of `i`. You can also try to print out the value of `a[i]` using `p a[i]`. Inspect the other variables. Do you see what went wrong?" ] }, { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "Next start pudb in a terminal with the script name as an argument. If you haven't done this in this terminal shell before, you need to source hpcpy20:" + "%debug" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "```bash\n", - "source hpcpy21\n", - "pudb3 buggy.py\n", - "```" + "## Debugging a program with pudb" ] }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, "source": [ - "We'll give you a short demonstration and then you can play with it for a little while." + "Uncomment the ``%%writefile`` magic before the function defintion of ``imabuggyincrement`` and execute the cell again so that it gets written to file buggy.py" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Remote debugging" + "Next start pudb in a terminal with the script name as an argument. If you haven't done this in this terminal shell before, you need to source hpcpy23:" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, + "metadata": {}, "source": [ - "For example, PyDev, Wing Personal, Visual Studio, and PyCharm Professional (199 €/a with perpetual fallback license) support remote debugging. It can also be done with the ``ptvsd`` and Visual Studio Code." + "```bash\n", + "source $PROJECT_training2318/hpcpy23\n", + "pudb buggy.py\n", + "```" ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ - "## Debugging Python extensions\n", - "We'll talk about this more tomorrow (maybe)." + "We'll give you a short demonstration and then you can play with it for a little while." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, + "metadata": {}, "source": [ "## Note\n", "\n", @@ -248,21 +207,15 @@ }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "notes" - } - }, + "metadata": {}, "source": [ - "A better way to check for bounds as I did in `iambuggyincrement` is not to do it at all but use a try...except statement instead:" + "Another way to check for bounds as the one in `imabuggyincrement` is not to do it at all but use a try...except statement instead:" ] }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "slide" - } + "tags": [] }, "source": [ "```python\n", @@ -274,7 +227,7 @@ " pass\n", " \n", "def main(arg=[]):\n", - " a = list(range(10)\n", + " a = list(range(10))\n", " ii = 4 # Now this is limited to the scope of main()\n", " imabuggyincrement(10, a)\n", " \n", @@ -283,13 +236,63 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that you should only use the `except` statement together with `pass` in cases where you expect a certain type of error but can't control the circumstances that lead to that error. This pattern effectively hides an error state of the program and could lead to unwanted side effects if used carelessly." + ] + }, { "cell_type": "markdown", "metadata": { - "slideshow": { - "slide_type": "skip" - } + "tags": [] }, + "source": [ + "## Overview: debuggers for Python" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* [pdb][] (builtin)\n", + "* [pudb][]\n", + "* IDEs (All the IDEs we mentioned have debugging support)\n", + "* [Linaro DDT][], former name ARMForge DDT (commercial, support for debugging parallel codes and C/C++ code, only rudimentary Python support)\n", + "* [TotalView][] (commercial, support for debugging parallel codes and C/C++ code, requires debug version of CPython, supports mixed language debugging, aware of cython, pybind11 and other bindings)\n", + "\n", + "[pdb]: https://docs.python.org/3/library/pdb.html\n", + "[pudb]: https://github.com/inducer/pudb\n", + "[Linaro DDT]: https://www.linaroforge.com/linaroDdt/\n", + "[ARMForge DDT]: https://developer.arm.com/tools-and-software/server-and-hpc/debug-and-profile/arm-forge/arm-ddt\n", + "[TotalView]: https://help.totalview.io/current/HTML/index.html#page/TotalView/totalviewlhug-python.13.01.html#ww1893192" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Remote debugging" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For example, PyDev, Wing Personal, Visual Studio, and PyCharm Professional (199 €/a with perpetual fallback license) support remote debugging. It can also be done with the ``ptvsd`` and Visual Studio Code." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Debugging Python extensions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ "The following video shows how to debug mixed Python and C++ code using Visual Studio.\n", "\n", @@ -299,35 +302,27 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.display import YouTubeVideo\n", "\n", - "YouTubeVideo(\"D9RlT06a1EI\", start=300)" + "YouTubeVideo(\"KhuMRDY4BeU\")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, + "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "HPC Python 2021", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "hpcpy21" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -339,7 +334,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/solutions/build.sh b/solutions/build.sh index bd4b6a6bae24b285395a0c5c1b7968f5d8d4de6e..df60a479ee86c74ff1e5991b1a70d6f7ea579276 100755 --- a/solutions/build.sh +++ b/solutions/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -source $PROJECT_training2219/hpcpy22 +source $PROJECT_training2318/hpcpy23 # Build points pushd code/point rm -rf build diff --git a/solutions/code b/solutions/code index 2edff2610e81084123a9969fc73223981f6d87b8..c787d1ee6a68815e557245d52ad924db7e184eae 120000 --- a/solutions/code +++ b/solutions/code @@ -1 +1 @@ -../code \ No newline at end of file +../code/ \ No newline at end of file diff --git a/solutions/data b/solutions/data index 4909e06efb479a01e44e67265074c726796f4959..eed2d0bc7940e7ec030de8b8307e476dd01c56ae 120000 --- a/solutions/data +++ b/solutions/data @@ -1 +1 @@ -../data \ No newline at end of file +../data/ \ No newline at end of file diff --git a/solutions/hpcpy23 b/solutions/hpcpy23 new file mode 100755 index 0000000000000000000000000000000000000000..447fd63d8484bbd3ef267c5ba2d0015066b785b9 --- /dev/null +++ b/solutions/hpcpy23 @@ -0,0 +1,26 @@ +#!/bin/bash +module purge +module load Stages/2023 +module load GCC +module load ParaStationMPI +module load CMake +module load Graphviz +module load SciPy-Stack +module load numba +module load dask +module load mpi4py +module load h5py +#module load Jupyter +module load CUDA +module load cuTENSOR +module load NCCL +module load cuDNN +#export NUMBAPRO_NVVM=$CUDA_HOME/nvvm/lib64/libnvvm.so +#export NUMBAPRO_LIBDEVICE=$CUDA_HOME/nvvm/libdevice +export LD_LIBRARY_PATH=/p/project/training2318/resources/code/text_stats/build:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/p/project/training2318/resources/code/point/build:$LD_LIBRARY_PATH +export PYTHONPATH=/p/project/training2318/packages/lib/python3.10/site-packages:$PYTHONPATH +export PATH=/p/project/training2318/packages/bin:$PATH +export HPCPY2023=1 +#exec $(which python) -m ipykernel $@ +