diff --git a/01-deep-learning-on-supercomputers.md b/01-deep-learning-on-supercomputers.md
index 79007ce1d07d812090fbe8617266f1db312610b2..915a51841629c639b1fb0180c06e6b87d7ceb911 100644
--- a/01-deep-learning-on-supercomputers.md
+++ b/01-deep-learning-on-supercomputers.md
@@ -394,10 +394,16 @@ git clone https://gitlab.jsc.fz-juelich.de/strube1/2023-nov-intro-to-supercompti
 
 ---
 
-## Again, please access the slides to clone repository:
+## If you haven't done so, please access the slides to clone repository:
 
 ![](images/slides.png)
 
+- ```bash
+git clone https://gitlab.jsc.fz-juelich.de/strube1/2023-nov-intro-to-supercompting-jsc.git
+```
+
+
+
 ---
 
 ## DEMO TIME!
@@ -434,14 +440,14 @@ from fastai.vision.models.xresnet import *
 
 ---
 
-## Bringing your data in
+## Bringing your data in*
 
 ```python
 from fastai.vision.all import *
 from fastai.distributed import *
 from fastai.vision.models.xresnet import *
-
-path = untar_data(URLs.IMAGEWOOF_320)
+# DOWNLOADS DATASET - we need to do this on the login node
+path = untar_data(URLs.IMAGEWOOF_320) 
 
 
 
@@ -457,7 +463,6 @@ path = untar_data(URLs.IMAGEWOOF_320)
 
 ```
 
-
 ---
 
 ## Loading your data
@@ -515,17 +520,21 @@ learn.fine_tune(6)
 - Only add new requirements
 - [Link to gitlab repo](https://gitlab.jsc.fz-juelich.de/kesselheim1/sc_venv_template)
 - ```bash
+cd $HOME/2023-nov-intro-to-supercompting-jsc.git/src
 git clone https://gitlab.jsc.fz-juelich.de/kesselheim1/sc_venv_template.git
 ```
-- Add this to requirements.txt:
+- Add this to sc_venv_template/requirements.txt:
 - ```python
 fastai
 deepspeed
 accelerate
 ```
-- (the last one will become `accelerate` later this week)
-- Run `./setup.sh`
-- `source activate.sh`
+
+- ```bash
+sc_venv_template/setup.sh
+source sc_venv_template/activate.sh
+```
+
 - Done! You installed everything you need
 
 ---
@@ -533,7 +542,7 @@ accelerate
 ## Submission Script
 
 ```bash
-#!/bin/bash -x
+#!/bin/bash
 #SBATCH --account=training2334
 #SBATCH --nodes=1
 #SBATCH --job-name=ai-serial
@@ -556,6 +565,33 @@ time srun python serial.py
 
 ---
 
+## Download dataset
+
+- Compute nodes have no internet
+- We need to download the dataset
+
+---
+
+## Download dataset
+
+```bash
+cd $HOME/2023-nov-intro-to-supercompting-jsc/src
+source sc_venv_template/activate.sh
+python serial.py
+
+(Some warnings)
+epoch     train_loss  valid_loss  accuracy  top_k_accuracy  time    
+Epoch 1/1 : |-------------------------------------------------------------| 0.71% [1/141 00:07<16:40]
+```
+
+- It started training, on the login node's CPUs (WRONG!!!)
+- That means we have the data!
+- We just cancel with Ctrl+C
+
+
+---
+
+
 ## Running it
 
 - ```bash
@@ -564,12 +600,13 @@ sbatch serial.slurm
 ```
 - On Juwels Booster, should take about 5 minutes
 - On a cpu system this would take half a day
+- Check the out-serial-XXXXXX and err-serial-XXXXXX files
 
 ---
 
 ## Going data parallel
 
-- Same code as before, let's show the differences
+- Almost same code as before, let's show the differences
 
 ---
 
@@ -630,6 +667,13 @@ with learn.distrib_ctx():
 
 - Please check the course repository: [src/distrib.slurm](https://gitlab.jsc.fz-juelich.de/strube1/2023-nov-intro-to-supercompting-jsc/-/blob/main/src/distrib.slurm)
 
+- Main differences: 
+
+- ```bash
+#SBATCH --cpus-per-task=48
+#SBATCH --gres=gpu:4
+```
+
 ---
 
 ## Let's check the outputs!
@@ -670,6 +714,7 @@ real	1m19.979s
 
 - Distributed run suffered a bit on the accuracy šŸŽÆ and loss 😩
   - In exchange for speed šŸŽļø
+  - Train a bit longer and you're good!
 - It's more than 4x faster because the library is multi-threaded (and now we use 48 threads)
 - I/O is automatically parallelized / sharded by Fast.AI library
 - Data parallel is a simple and effective way to distribute DL workload šŸ’Ŗ
@@ -708,6 +753,7 @@ real	1m15.651s
 - Accuracy and loss suffered
 - This is a very simple model, so it's not surprising
     - It fits into 4gb, we "stretched" it to a 320gb system
+    - It's not a good fit for this system
 - You need bigger models to really exercise the gpu and scaling
 - There's a lot more to that, but for now, let's focus on medium/big sized models
     - For Gigantic and Humongous-sized models, there's a DL scaling course at JSC!
diff --git a/public/01-deep-learning-on-supercomputers.html b/public/01-deep-learning-on-supercomputers.html
index f435715abd5f940bb07312cde5b703756255b00e..aa8997ec5fded7f030aa8e9f9bf46e0a261ef620 100644
--- a/public/01-deep-learning-on-supercomputers.html
+++ b/public/01-deep-learning-on-supercomputers.html
@@ -607,10 +607,16 @@ gpus</li>
 <h2>Are we there yet?</h2>
 <p><img data-src="images/are-we-there-yet-4.gif" /></p>
 </section>
-<section id="again-please-access-the-slides-to-clone-repository"
+<section
+id="if-you-havent-done-so-please-access-the-slides-to-clone-repository"
 class="slide level2">
-<h2>Again, please access the slides to clone repository:</h2>
+<h2>If you haven’t done so, please access the slides to clone
+repository:</h2>
 <p><img data-src="images/slides.png" /></p>
+<ul>
+<li class="fragment"><div class="sourceCode" id="cb2"><pre
+class="sourceCode bash"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">git</span> clone https://gitlab.jsc.fz-juelich.de/strube1/2023-nov-intro-to-supercompting-jsc.git</span></code></pre></div></li>
+</ul>
 </section>
 <section id="demo-time" class="slide level2">
 <h2>DEMO TIME!</h2>
@@ -624,33 +630,12 @@ node</li>
 </section>
 <section id="expected-imports" class="slide level2">
 <h2>Expected imports</h2>
-<div class="sourceCode" id="cb2"><pre
-class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.<span class="bu">all</span> <span class="im">import</span> <span class="op">*</span></span>
-<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.distributed <span class="im">import</span> <span class="op">*</span></span>
-<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.models.xresnet <span class="im">import</span> <span class="op">*</span></span>
-<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-10"><a href="#cb2-10" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-11"><a href="#cb2-11" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-12"><a href="#cb2-12" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-13"><a href="#cb2-13" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-14"><a href="#cb2-14" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-15"><a href="#cb2-15" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-16"><a href="#cb2-16" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb2-17"><a href="#cb2-17" aria-hidden="true" tabindex="-1"></a></span></code></pre></div>
-</section>
-<section id="bringing-your-data-in" class="slide level2">
-<h2>Bringing your data in</h2>
 <div class="sourceCode" id="cb3"><pre
 class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.<span class="bu">all</span> <span class="im">import</span> <span class="op">*</span></span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.distributed <span class="im">import</span> <span class="op">*</span></span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.models.xresnet <span class="im">import</span> <span class="op">*</span></span>
 <span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>path <span class="op">=</span> untar_data(URLs.IMAGEWOOF_320)</span>
+<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a></span>
@@ -664,29 +649,29 @@ class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a hr
 <span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a></span></code></pre></div>
 </section>
-<section id="loading-your-data" class="slide level2">
-<h2>Loading your data</h2>
+<section id="bringing-your-data-in" class="slide level2">
+<h2>Bringing your data in*</h2>
 <div class="sourceCode" id="cb4"><pre
 class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.<span class="bu">all</span> <span class="im">import</span> <span class="op">*</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.distributed <span class="im">import</span> <span class="op">*</span></span>
 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.models.xresnet <span class="im">import</span> <span class="op">*</span></span>
-<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>path <span class="op">=</span> untar_data(URLs.IMAGEWOOF_320)</span>
-<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a>dls <span class="op">=</span> DataBlock(</span>
-<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a>    blocks<span class="op">=</span>(ImageBlock, CategoryBlock),</span>
-<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a>    splitter<span class="op">=</span>GrandparentSplitter(valid_name<span class="op">=</span><span class="st">&#39;val&#39;</span>),</span>
-<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a>    get_items<span class="op">=</span>get_image_files, get_y<span class="op">=</span>parent_label,</span>
-<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a>    item_tfms<span class="op">=</span>[RandomResizedCrop(<span class="dv">160</span>), FlipItem(<span class="fl">0.5</span>)],</span>
-<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a>    batch_tfms<span class="op">=</span>Normalize.from_stats(<span class="op">*</span>imagenet_stats)</span>
-<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a>).dataloaders(path, path<span class="op">=</span>path, bs<span class="op">=</span><span class="dv">64</span>)</span>
+<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a><span class="co"># DOWNLOADS DATASET - we need to do this on the login node</span></span>
+<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>path <span class="op">=</span> untar_data(URLs.IMAGEWOOF_320) </span>
+<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb4-15"><a href="#cb4-15" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb4-16"><a href="#cb4-16" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb4-17"><a href="#cb4-17" aria-hidden="true" tabindex="-1"></a></span></code></pre></div>
 </section>
-<section id="single-gpu-code" class="slide level2">
-<h2>Single-gpu code</h2>
+<section id="loading-your-data" class="slide level2">
+<h2>Loading your data</h2>
 <div class="sourceCode" id="cb5"><pre
 class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.<span class="bu">all</span> <span class="im">import</span> <span class="op">*</span></span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.distributed <span class="im">import</span> <span class="op">*</span></span>
@@ -701,9 +686,30 @@ class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a hr
 <span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a>    batch_tfms<span class="op">=</span>Normalize.from_stats(<span class="op">*</span>imagenet_stats)</span>
 <span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a>).dataloaders(path, path<span class="op">=</span>path, bs<span class="op">=</span><span class="dv">64</span>)</span>
 <span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a>learn <span class="op">=</span> Learner(dls, xresnet50(n_out<span class="op">=</span><span class="dv">10</span>), metrics<span class="op">=</span>[accuracy,top_k_accuracy]).to_fp16()</span>
+<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a>learn.fine_tune(<span class="dv">6</span>)</span></code></pre></div>
+<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb5-17"><a href="#cb5-17" aria-hidden="true" tabindex="-1"></a></span></code></pre></div>
+</section>
+<section id="single-gpu-code" class="slide level2">
+<h2>Single-gpu code</h2>
+<div class="sourceCode" id="cb6"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.<span class="bu">all</span> <span class="im">import</span> <span class="op">*</span></span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.distributed <span class="im">import</span> <span class="op">*</span></span>
+<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.models.xresnet <span class="im">import</span> <span class="op">*</span></span>
+<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>path <span class="op">=</span> untar_data(URLs.IMAGEWOOF_320)</span>
+<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>dls <span class="op">=</span> DataBlock(</span>
+<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>    blocks<span class="op">=</span>(ImageBlock, CategoryBlock),</span>
+<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>    splitter<span class="op">=</span>GrandparentSplitter(valid_name<span class="op">=</span><span class="st">&#39;val&#39;</span>),</span>
+<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a>    get_items<span class="op">=</span>get_image_files, get_y<span class="op">=</span>parent_label,</span>
+<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a>    item_tfms<span class="op">=</span>[RandomResizedCrop(<span class="dv">160</span>), FlipItem(<span class="fl">0.5</span>)],</span>
+<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a>    batch_tfms<span class="op">=</span>Normalize.from_stats(<span class="op">*</span>imagenet_stats)</span>
+<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a>).dataloaders(path, path<span class="op">=</span>path, bs<span class="op">=</span><span class="dv">64</span>)</span>
+<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a>learn <span class="op">=</span> Learner(dls, xresnet50(n_out<span class="op">=</span><span class="dv">10</span>), metrics<span class="op">=</span>[accuracy,top_k_accuracy]).to_fp16()</span>
+<span id="cb6-15"><a href="#cb6-15" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb6-16"><a href="#cb6-16" aria-hidden="true" tabindex="-1"></a>learn.fine_tune(<span class="dv">6</span>)</span></code></pre></div>
 </section>
 <section id="venv_template" class="slide level2">
 <h2>Venv_template</h2>
@@ -716,130 +722,160 @@ modules</li>
 <li class="fragment"><a
 href="https://gitlab.jsc.fz-juelich.de/kesselheim1/sc_venv_template">Link
 to gitlab repo</a></li>
-<li class="fragment"><div class="sourceCode" id="cb6"><pre
-class="sourceCode bash"><code class="sourceCode bash"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">git</span> clone https://gitlab.jsc.fz-juelich.de/kesselheim1/sc_venv_template.git</span></code></pre></div></li>
-<li class="fragment">Add this to requirements.txt:</li>
 <li class="fragment"><div class="sourceCode" id="cb7"><pre
-class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>fastai</span>
-<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>deepspeed</span>
-<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>accelerate</span></code></pre></div></li>
-<li class="fragment">(the last one will become <code>accelerate</code>
-later this week)</li>
-<li class="fragment">Run <code>./setup.sh</code></li>
-<li class="fragment"><code>source activate.sh</code></li>
+class="sourceCode bash"><code class="sourceCode bash"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="bu">cd</span> <span class="va">$HOME</span>/2023-nov-intro-to-supercompting-jsc.git/src</span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="fu">git</span> clone https://gitlab.jsc.fz-juelich.de/kesselheim1/sc_venv_template.git</span></code></pre></div></li>
+<li class="fragment">Add this to sc_venv_template/requirements.txt:</li>
+<li class="fragment"><div class="sourceCode" id="cb8"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>fastai</span>
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>deepspeed</span>
+<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>accelerate</span></code></pre></div></li>
+<li class="fragment"><div class="sourceCode" id="cb9"><pre
+class="sourceCode bash"><code class="sourceCode bash"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="ex">sc_venv_template/setup.sh</span></span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="bu">source</span> sc_venv_template/activate.sh</span></code></pre></div></li>
 <li class="fragment">Done! You installed everything you need</li>
 </ul>
 </section>
 <section id="submission-script" class="slide level2">
 <h2>Submission Script</h2>
-<div class="sourceCode" id="cb8"><pre
-class="sourceCode bash"><code class="sourceCode bash"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co">#!/bin/bash -x</span></span>
-<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --account=training2334</span></span>
-<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --nodes=1</span></span>
-<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --job-name=ai-serial</span></span>
-<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --ntasks-per-node=1</span></span>
-<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --cpus-per-task=1</span></span>
-<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --output=out-serial.%j</span></span>
-<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --error=err-serial.%j</span></span>
-<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --time=00:40:00</span></span>
-<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --partition=develbooster</span></span>
-<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Make sure we are on the right directory</span></span>
-<span id="cb8-13"><a href="#cb8-13" aria-hidden="true" tabindex="-1"></a><span class="bu">cd</span> <span class="va">$HOME</span>/2023-nov-intro-to-supercompting-jsc/src</span>
-<span id="cb8-14"><a href="#cb8-14" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb8-15"><a href="#cb8-15" aria-hidden="true" tabindex="-1"></a><span class="co"># This loads modules and python packages</span></span>
-<span id="cb8-16"><a href="#cb8-16" aria-hidden="true" tabindex="-1"></a><span class="bu">source</span> sc_venv_template/activate.sh</span>
-<span id="cb8-17"><a href="#cb8-17" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb8-18"><a href="#cb8-18" aria-hidden="true" tabindex="-1"></a><span class="co"># Run the demo</span></span>
-<span id="cb8-19"><a href="#cb8-19" aria-hidden="true" tabindex="-1"></a><span class="bu">time</span> srun python serial.py</span></code></pre></div>
+<div class="sourceCode" id="cb10"><pre
+class="sourceCode bash"><code class="sourceCode bash"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="co">#!/bin/bash</span></span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --account=training2334</span></span>
+<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --nodes=1</span></span>
+<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --job-name=ai-serial</span></span>
+<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --ntasks-per-node=1</span></span>
+<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --cpus-per-task=1</span></span>
+<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --output=out-serial.%j</span></span>
+<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --error=err-serial.%j</span></span>
+<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --time=00:40:00</span></span>
+<span id="cb10-10"><a href="#cb10-10" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --partition=develbooster</span></span>
+<span id="cb10-11"><a href="#cb10-11" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb10-12"><a href="#cb10-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Make sure we are on the right directory</span></span>
+<span id="cb10-13"><a href="#cb10-13" aria-hidden="true" tabindex="-1"></a><span class="bu">cd</span> <span class="va">$HOME</span>/2023-nov-intro-to-supercompting-jsc/src</span>
+<span id="cb10-14"><a href="#cb10-14" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb10-15"><a href="#cb10-15" aria-hidden="true" tabindex="-1"></a><span class="co"># This loads modules and python packages</span></span>
+<span id="cb10-16"><a href="#cb10-16" aria-hidden="true" tabindex="-1"></a><span class="bu">source</span> sc_venv_template/activate.sh</span>
+<span id="cb10-17"><a href="#cb10-17" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb10-18"><a href="#cb10-18" aria-hidden="true" tabindex="-1"></a><span class="co"># Run the demo</span></span>
+<span id="cb10-19"><a href="#cb10-19" aria-hidden="true" tabindex="-1"></a><span class="bu">time</span> srun python serial.py</span></code></pre></div>
+</section>
+<section id="download-dataset" class="slide level2">
+<h2>Download dataset</h2>
+<ul>
+<li class="fragment">Compute nodes have no internet</li>
+<li class="fragment">We need to download the dataset</li>
+</ul>
+</section>
+<section id="download-dataset-1" class="slide level2">
+<h2>Download dataset</h2>
+<div class="sourceCode" id="cb11"><pre
+class="sourceCode bash"><code class="sourceCode bash"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="bu">cd</span> <span class="va">$HOME</span>/2023-nov-intro-to-supercompting-jsc/src</span>
+<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="bu">source</span> sc_venv_template/activate.sh</span>
+<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="ex">python</span> serial.py</span>
+<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a><span class="kw">(</span><span class="ex">Some</span> warnings<span class="kw">)</span></span>
+<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
+<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a><span class="ex">Epoch</span> 1/1 : <span class="kw">|</span><span class="ex">-------------------------------------------------------------</span><span class="kw">|</span> <span class="ex">0.71%</span> [1/141 00:07<span class="op">&lt;</span>16:40]</span></code></pre></div>
+<ul>
+<li class="fragment">It started training, on the login node’s CPUs
+(WRONG!!!)</li>
+<li class="fragment">That means we have the data!</li>
+<li class="fragment">We just cancel with Ctrl+C</li>
+</ul>
 </section>
 <section id="running-it" class="slide level2">
 <h2>Running it</h2>
 <ul>
-<li class="fragment"><div class="sourceCode" id="cb9"><pre
-class="sourceCode bash"><code class="sourceCode bash"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="bu">cd</span> <span class="va">$HOME</span>/2023-nov-intro-to-supercompting-jsc/src</span>
-<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="ex">sbatch</span> serial.slurm</span></code></pre></div></li>
+<li class="fragment"><div class="sourceCode" id="cb12"><pre
+class="sourceCode bash"><code class="sourceCode bash"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="bu">cd</span> <span class="va">$HOME</span>/2023-nov-intro-to-supercompting-jsc/src</span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="ex">sbatch</span> serial.slurm</span></code></pre></div></li>
 <li class="fragment">On Juwels Booster, should take about 5 minutes</li>
 <li class="fragment">On a cpu system this would take half a day</li>
+<li class="fragment">Check the out-serial-XXXXXX and err-serial-XXXXXX
+files</li>
 </ul>
 </section>
 <section id="going-data-parallel" class="slide level2">
 <h2>Going data parallel</h2>
 <ul>
-<li class="fragment">Same code as before, let’s show the
+<li class="fragment">Almost same code as before, let’s show the
 differences</li>
 </ul>
 </section>
 <section id="data-parallel-4" class="slide level2">
 <h2>Data parallel</h2>
-<div class="sourceCode" id="cb10"><pre
-class="sourceCode python"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.<span class="bu">all</span> <span class="im">import</span> <span class="op">*</span></span>
-<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.distributed <span class="im">import</span> <span class="op">*</span></span>
-<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.models.xresnet <span class="im">import</span> <span class="op">*</span></span>
-<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a>path <span class="op">=</span> rank0_first(untar_data, URLs.IMAGEWOOF_320)</span>
-<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a>dls <span class="op">=</span> DataBlock(</span>
-<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a>    blocks<span class="op">=</span>(ImageBlock, CategoryBlock),</span>
-<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a>    splitter<span class="op">=</span>GrandparentSplitter(valid_name<span class="op">=</span><span class="st">&#39;val&#39;</span>),</span>
-<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a>    get_items<span class="op">=</span>get_image_files, get_y<span class="op">=</span>parent_label,</span>
-<span id="cb10-10"><a href="#cb10-10" aria-hidden="true" tabindex="-1"></a>    item_tfms<span class="op">=</span>[RandomResizedCrop(<span class="dv">160</span>), FlipItem(<span class="fl">0.5</span>)],</span>
-<span id="cb10-11"><a href="#cb10-11" aria-hidden="true" tabindex="-1"></a>    batch_tfms<span class="op">=</span>Normalize.from_stats(<span class="op">*</span>imagenet_stats)</span>
-<span id="cb10-12"><a href="#cb10-12" aria-hidden="true" tabindex="-1"></a>).dataloaders(path, path<span class="op">=</span>path, bs<span class="op">=</span><span class="dv">64</span>)</span>
-<span id="cb10-13"><a href="#cb10-13" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb10-14"><a href="#cb10-14" aria-hidden="true" tabindex="-1"></a>learn <span class="op">=</span> Learner(dls, xresnet50(n_out<span class="op">=</span><span class="dv">10</span>), metrics<span class="op">=</span>[accuracy,top_k_accuracy]).to_fp16()</span>
-<span id="cb10-15"><a href="#cb10-15" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> learn.distrib_ctx():</span>
-<span id="cb10-16"><a href="#cb10-16" aria-hidden="true" tabindex="-1"></a>    learn.fine_tune(<span class="dv">6</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb13"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.<span class="bu">all</span> <span class="im">import</span> <span class="op">*</span></span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.distributed <span class="im">import</span> <span class="op">*</span></span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> fastai.vision.models.xresnet <span class="im">import</span> <span class="op">*</span></span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>path <span class="op">=</span> rank0_first(untar_data, URLs.IMAGEWOOF_320)</span>
+<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a>dls <span class="op">=</span> DataBlock(</span>
+<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a>    blocks<span class="op">=</span>(ImageBlock, CategoryBlock),</span>
+<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a>    splitter<span class="op">=</span>GrandparentSplitter(valid_name<span class="op">=</span><span class="st">&#39;val&#39;</span>),</span>
+<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a>    get_items<span class="op">=</span>get_image_files, get_y<span class="op">=</span>parent_label,</span>
+<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a>    item_tfms<span class="op">=</span>[RandomResizedCrop(<span class="dv">160</span>), FlipItem(<span class="fl">0.5</span>)],</span>
+<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a>    batch_tfms<span class="op">=</span>Normalize.from_stats(<span class="op">*</span>imagenet_stats)</span>
+<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a>).dataloaders(path, path<span class="op">=</span>path, bs<span class="op">=</span><span class="dv">64</span>)</span>
+<span id="cb13-13"><a href="#cb13-13" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb13-14"><a href="#cb13-14" aria-hidden="true" tabindex="-1"></a>learn <span class="op">=</span> Learner(dls, xresnet50(n_out<span class="op">=</span><span class="dv">10</span>), metrics<span class="op">=</span>[accuracy,top_k_accuracy]).to_fp16()</span>
+<span id="cb13-15"><a href="#cb13-15" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> learn.distrib_ctx():</span>
+<span id="cb13-16"><a href="#cb13-16" aria-hidden="true" tabindex="-1"></a>    learn.fine_tune(<span class="dv">6</span>)</span></code></pre></div>
 </section>
 <section id="data-parallel-5" class="slide level2">
 <h2>Data Parallel</h2>
 <h3 id="what-changed">What changed?</h3>
 <p>It was</p>
-<div class="sourceCode" id="cb11"><pre
-class="sourceCode python"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>path <span class="op">=</span> untar_data(URLs.IMAGEWOOF_320)</span></code></pre></div>
+<div class="sourceCode" id="cb14"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>path <span class="op">=</span> untar_data(URLs.IMAGEWOOF_320)</span></code></pre></div>
 <p>Became</p>
-<div class="sourceCode" id="cb12"><pre
-class="sourceCode python"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>path <span class="op">=</span> rank0_first(untar_data, URLs.IMAGEWOOF_320)</span></code></pre></div>
+<div class="sourceCode" id="cb15"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>path <span class="op">=</span> rank0_first(untar_data, URLs.IMAGEWOOF_320)</span></code></pre></div>
 <p>It was</p>
-<div class="sourceCode" id="cb13"><pre
-class="sourceCode python"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>learn.fine_tune(<span class="dv">6</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb16"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>learn.fine_tune(<span class="dv">6</span>)</span></code></pre></div>
 <p>Became</p>
-<div class="sourceCode" id="cb14"><pre
-class="sourceCode python"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> learn.distrib_ctx():</span>
-<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>    learn.fine_tune(<span class="dv">6</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb17"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> learn.distrib_ctx():</span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>    learn.fine_tune(<span class="dv">6</span>)</span></code></pre></div>
 </section>
 <section id="submission-script-data-parallel" class="slide level2">
 <h2>Submission script: data parallel</h2>
 <ul>
-<li class="fragment">Please check the course repository: <a
-href="https://gitlab.jsc.fz-juelich.de/strube1/2023-nov-intro-to-supercompting-jsc/-/blob/main/src/distrib.slurm">src/distrib.slurm</a></li>
+<li class="fragment"><p>Please check the course repository: <a
+href="https://gitlab.jsc.fz-juelich.de/strube1/2023-nov-intro-to-supercompting-jsc/-/blob/main/src/distrib.slurm">src/distrib.slurm</a></p></li>
+<li class="fragment"><p>Main differences:</p></li>
+<li class="fragment"><div class="sourceCode" id="cb18"><pre
+class="sourceCode bash"><code class="sourceCode bash"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --cpus-per-task=48</span></span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="co">#SBATCH --gres=gpu:4</span></span></code></pre></div></li>
 </ul>
 </section>
 <section id="lets-check-the-outputs" class="slide level2">
 <h2>Let’s check the outputs!</h2>
 <h4 id="single-gpu">Single gpu:</h4>
-<div class="sourceCode" id="cb15"><pre
-class="sourceCode bash"><code class="sourceCode bash"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
-<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         2.249933    2.152813    0.225757  0.750573        01:11                          </span>
-<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
-<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         1.882008    1.895813    0.324510  0.832018        00:44                          </span>
-<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="ex">1</span>         1.837312    1.916380    0.374141  0.845253        00:44                          </span>
-<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="ex">2</span>         1.717144    1.739026    0.378722  0.869941        00:43                          </span>
-<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a><span class="ex">3</span>         1.594981    1.637526    0.417664  0.891575        00:44                          </span>
-<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a><span class="ex">4</span>         1.460454    1.410519    0.507254  0.920336        00:44                          </span>
-<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a><span class="ex">5</span>         1.389946    1.304924    0.538814  0.935862        00:43  </span>
-<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a><span class="ex">real</span>    5m44.972s</span></code></pre></div>
+<div class="sourceCode" id="cb19"><pre
+class="sourceCode bash"><code class="sourceCode bash"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
+<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         2.249933    2.152813    0.225757  0.750573        01:11                          </span>
+<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
+<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         1.882008    1.895813    0.324510  0.832018        00:44                          </span>
+<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="ex">1</span>         1.837312    1.916380    0.374141  0.845253        00:44                          </span>
+<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a><span class="ex">2</span>         1.717144    1.739026    0.378722  0.869941        00:43                          </span>
+<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a><span class="ex">3</span>         1.594981    1.637526    0.417664  0.891575        00:44                          </span>
+<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a><span class="ex">4</span>         1.460454    1.410519    0.507254  0.920336        00:44                          </span>
+<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a><span class="ex">5</span>         1.389946    1.304924    0.538814  0.935862        00:43  </span>
+<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a><span class="ex">real</span>    5m44.972s</span></code></pre></div>
 <h4 id="multi-gpu">Multi gpu:</h4>
-<div class="sourceCode" id="cb16"><pre
-class="sourceCode bash"><code class="sourceCode bash"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
-<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         2.201540    2.799354    0.202950  0.662513        00:09                        </span>
-<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
-<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         1.951004    2.059517    0.294761  0.781282        00:08                        </span>
-<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a><span class="ex">1</span>         1.929561    1.999069    0.309512  0.792981        00:08                        </span>
-<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="ex">2</span>         1.854629    1.962271    0.314344  0.840285        00:08                        </span>
-<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a><span class="ex">3</span>         1.754019    1.687136    0.404883  0.872330        00:08                        </span>
-<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a><span class="ex">4</span>         1.643759    1.499526    0.482706  0.906409        00:08                        </span>
-<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a><span class="ex">5</span>         1.554356    1.450976    0.502798  0.914547        00:08  </span>
-<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a><span class="ex">real</span>    1m19.979s</span></code></pre></div>
+<div class="sourceCode" id="cb20"><pre
+class="sourceCode bash"><code class="sourceCode bash"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         2.201540    2.799354    0.202950  0.662513        00:09                        </span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         1.951004    2.059517    0.294761  0.781282        00:08                        </span>
+<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="ex">1</span>         1.929561    1.999069    0.309512  0.792981        00:08                        </span>
+<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a><span class="ex">2</span>         1.854629    1.962271    0.314344  0.840285        00:08                        </span>
+<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="ex">3</span>         1.754019    1.687136    0.404883  0.872330        00:08                        </span>
+<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="ex">4</span>         1.643759    1.499526    0.482706  0.906409        00:08                        </span>
+<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a><span class="ex">5</span>         1.554356    1.450976    0.502798  0.914547        00:08  </span>
+<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a><span class="ex">real</span>    1m19.979s</span></code></pre></div>
 </section>
 <section id="some-insights" class="slide level2">
 <h2>Some insights</h2>
@@ -848,6 +884,7 @@ class="sourceCode bash"><code class="sourceCode bash"><span id="cb16-1"><a href=
 and loss 😩
 <ul>
 <li class="fragment">In exchange for speed šŸŽļø</li>
+<li class="fragment">Train a bit longer and you’re good!</li>
 </ul></li>
 <li class="fragment">It’s more than 4x faster because the library is
 multi-threaded (and now we use 48 threads)</li>
@@ -872,17 +909,17 @@ submission file!</li>
 <section id="multi-node-1" class="slide level2">
 <h2>Multi-node</h2>
 <ul>
-<li class="fragment"><div class="sourceCode" id="cb17"><pre
-class="sourceCode bash"><code class="sourceCode bash"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
-<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         2.242036    2.192690    0.201728  0.681148        00:10                      </span>
-<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
-<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         2.035004    2.084082    0.246189  0.748984        00:05                      </span>
-<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a><span class="ex">1</span>         1.981432    2.054528    0.247205  0.764482        00:05                      </span>
-<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a><span class="ex">2</span>         1.942930    1.918441    0.316057  0.821138        00:05                      </span>
-<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a><span class="ex">3</span>         1.898426    1.832725    0.370173  0.839431        00:05                      </span>
-<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a><span class="ex">4</span>         1.859066    1.781805    0.375508  0.858740        00:05                      </span>
-<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a><span class="ex">5</span>         1.820968    1.743448    0.394055  0.864583        00:05</span>
-<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a><span class="ex">real</span>    1m15.651s    </span></code></pre></div></li>
+<li class="fragment"><div class="sourceCode" id="cb21"><pre
+class="sourceCode bash"><code class="sourceCode bash"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
+<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         2.242036    2.192690    0.201728  0.681148        00:10                      </span>
+<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a><span class="ex">epoch</span>     train_loss  valid_loss  accuracy  top_k_accuracy  time    </span>
+<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a><span class="ex">0</span>         2.035004    2.084082    0.246189  0.748984        00:05                      </span>
+<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a><span class="ex">1</span>         1.981432    2.054528    0.247205  0.764482        00:05                      </span>
+<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a><span class="ex">2</span>         1.942930    1.918441    0.316057  0.821138        00:05                      </span>
+<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a><span class="ex">3</span>         1.898426    1.832725    0.370173  0.839431        00:05                      </span>
+<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a><span class="ex">4</span>         1.859066    1.781805    0.375508  0.858740        00:05                      </span>
+<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a><span class="ex">5</span>         1.820968    1.743448    0.394055  0.864583        00:05</span>
+<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a><span class="ex">real</span>    1m15.651s    </span></code></pre></div></li>
 </ul>
 </section>
 <section id="some-insights-1" class="slide level2">
@@ -895,6 +932,7 @@ vs 8 seconds)</li>
 <ul>
 <li class="fragment">It fits into 4gb, we ā€œstretchedā€ it to a 320gb
 system</li>
+<li class="fragment">It’s not a good fit for this system</li>
 </ul></li>
 <li class="fragment">You need bigger models to really exercise the gpu
 and scaling</li>
diff --git a/src/serial.slurm b/src/serial.slurm
index 2068d87f62d657cd29dd5e1ed8b32945d19b5fca..facab0234a31666f5f823843575ef04ce1e17374 100644
--- a/src/serial.slurm
+++ b/src/serial.slurm
@@ -1,4 +1,4 @@
-#!/bin/bash -x
+#!/bin/bash
 #SBATCH --account=training2334
 #SBATCH --nodes=1
 #SBATCH --job-name=ai-serial