diff --git a/Jupyter_Notebooks/conditional_quantile_plot.ipynb b/Jupyter_Notebooks/conditional_quantile_plot.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..01922a48122017c03e1121f4a6bcc4d72884cbdc
--- /dev/null
+++ b/Jupyter_Notebooks/conditional_quantile_plot.ipynb
@@ -0,0 +1,389 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "under-cooler",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, sys\n",
+    "import glob\n",
+    "import datetime as dt\n",
+    "import numpy as np\n",
+    "import xarray as xr\n",
+    "\n",
+    "import time "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "id": "becoming-dover",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "8440\n",
+      "Data variables:\n",
+      "    2t_in                   (in_hour, lat, lon) float32 ...\n",
+      "    tcc_in                  (in_hour, lat, lon) float32 ...\n",
+      "    t_850_in                (in_hour, lat, lon) float32 ...\n",
+      "    2t_ref                  (fcst_hour, lat, lon) float32 ...\n",
+      "    tcc_ref                 (fcst_hour, lat, lon) float32 ...\n",
+      "    t_850_ref               (fcst_hour, lat, lon) float32 ...\n",
+      "    2t_savp_fcst            (fcst_hour, lat, lon) float32 ...\n",
+      "    tcc_savp_fcst           (fcst_hour, lat, lon) float32 ...\n",
+      "    t_850_savp_fcst         (fcst_hour, lat, lon) float32 ...\n",
+      "    2t_persistence_fcst     (fcst_hour, lat, lon) float64 ...\n",
+      "    tcc_persistence_fcst    (fcst_hour, lat, lon) float64 ...\n",
+      "    t_850_persistence_fcst  (fcst_hour, lat, lon) float64 ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "forecast_path = \"/p/home/jusers/langguth1/juwels/video_prediction_shared_folder/results/era5-Y2007-2019M01to12-80x48-3960N0180E-2t_tcc_t_850_langguth1/savp/20210505T131220_mache1_karim_savp_smreg_cv3_3\"\n",
+    "fnames= os.path.join(forecast_path, \"vfp_date_*sample_ind_*.nc\" )\n",
+    "\n",
+    "fnames = glob.glob(fnames)\n",
+    "print(len(fnames))\n",
+    "\n",
+    "dfile = xr.open_dataset(fnames[99])\n",
+    "\n",
+    "print(dfile.data_vars)\n",
+    "#print(dfile[\"init_time\"])\n",
+    "#print(dfile[\"2t_savp_fcst\"][2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "id": "editorial-bunny",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def non_interst_vars(ds):\n",
+    "    \"\"\"\n",
+    "    Creates list of variables that are not of interest. For this, vars2proc must be defined at global scope\n",
+    "    :param ds: the dataset\n",
+    "    :return: list of variables in dataset that are not of interest\n",
+    "    \"\"\"\n",
+    "    return [v for v in ds.data_vars\n",
+    "            if v not in vars2proc]\n",
+    "#\n",
+    "# ====================================================================================================\n",
+    "\n",
+    "\n",
+    "def get_relevant_vars(ds):\n",
+    "    \"\"\"\n",
+    "    Drops variables that are not of interest from dataset and also shrinks data to cells of interest.\n",
+    "    For this, ncells must be a dimension of the dataset and dmask_ref_inds must be defined at gloabl scope\n",
+    "    :param ds: the dataset\n",
+    "    :return: dataset with non-interesting variables dropped and data shrinked to region of interest\n",
+    "    \"\"\"\n",
+    "    return ds.drop(non_interst_vars(ds)).isel(fcst_hour=11)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "id": "great-metro",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Registering and loading data took 1057.31 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "vars2proc = [\"2t_savp_fcst\", \"2t_ref\"]\n",
+    "\n",
+    "time0 = time.time()\n",
+    "with xr.open_mfdataset(fnames, decode_cf=True, combine=\"nested\", concat_dim=[\"init_time\"], compat=\"broadcast_equals\", preprocess=get_relevant_vars) as dfiles:\n",
+    "    data = dfiles.load()\n",
+    "    #times0 = dfiles[\"time_forecast\"]\n",
+    "    print(\"Registering and loading data took {0:.2f} seconds\".format(time.time()- time0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "id": "hindu-wesley",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<xarray.Dataset>\n",
+      "Dimensions:       (init_time: 8440, lat: 48, lon: 80)\n",
+      "Coordinates:\n",
+      "  * init_time     (init_time) datetime64[ns] 2010-08-20T05:00:00 ... 2010-03-...\n",
+      "  * lat           (lat) float64 53.7 53.4 53.1 52.8 52.5 ... 40.5 40.2 39.9 39.6\n",
+      "  * lon           (lon) float64 1.8 2.1 2.4 2.7 3.0 ... 24.3 24.6 24.9 25.2 25.5\n",
+      "    fcst_hour     int64 12\n",
+      "Data variables:\n",
+      "    2t_savp_fcst  (init_time, lat, lon) float32 291.3 291.8 ... 288.5 288.2\n",
+      "    2t_ref        (init_time, lat, lon) float32 292.2 292.1 ... 288.5 288.6\n"
+     ]
+    }
+   ],
+   "source": [
+    "data_correct = xr.Dataset({\"2t_savp_fcst\": data[\"2t_savp_fcst\"], \"2t_ref\": data[\"2t_ref\"]})\n",
+    "print(data_correct)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "id": "sweet-happening",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313]\n"
+     ]
+    }
+   ],
+   "source": [
+    "data_fcst, data_ref = data_correct[\"2t_savp_fcst\"], data_correct[\"2t_ref\"]\n",
+    "\n",
+    "fcst_min, fcst_max = np.floor(np.min(data_fcst)), np.ceil(np.max(data_fcst))\n",
+    "x_bins = list(np.arange(int(fcst_min), int(fcst_max) + 1))\n",
+    "x_bins_c = 0.5*(np.asarray(x_bins[0:-1]) + np.asarray(x_bins[1:]))\n",
+    "nbins = len(x_bins) - 1\n",
+    "\n",
+    "print(x_bins)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "id": "incorporate-flooring",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "quantiles = [0.05, 0.5, 0.95]\n",
+    "nquantiles = len(quantiles)\n",
+    "quantile_panel = xr.DataArray(np.full((nbins, nquantiles), np.nan), coords={\"bin_center\": x_bins_c, \"quantile\": quantiles},\n",
+    "                              dims=[\"bin_center\", \"quantile\"])\n",
+    "for i in np.arange(nbins):\n",
+    "    data_cropped = data_correct[\"2t_ref\"].where(np.logical_and(data_correct[\"2t_savp_fcst\"] >= x_bins[i],\n",
+    "                                                               data_correct[\"2t_savp_fcst\"] < x_bins[i+1]))\n",
+    "    quantile_panel.loc[dict(bin_center=x_bins_c[i])] = data_cropped.quantile([0.05, 0.5, 0.95])\n",
+    "    \n",
+    "x_bins_c = x_bins_c - 273.15\n",
+    "quantile_panel = quantile_panel - 273.15"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "id": "brilliant-aberdeen",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<Figure size 648x432 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "fig, ax = plt.subplots(figsize=(9,6))\n",
+    "\n",
+    "ls_all = [\"--\", \"-\", \"--\"]\n",
+    "lw_all = [2., 1.5, 2.]\n",
+    "ax.plot(x_bins_c, x_bins_c, color='k', label='reference 1:1', linewidth=1.)\n",
+    "for i in np.arange(3):\n",
+    "    ax.plot(x_bins_c, quantile_panel.isel(quantile=i), ls=ls_all[i], color=\"k\", lw=lw_all[i])\n",
+    "    \n",
+    "ax.set_ylabel(\"2m temperature from ERA5 [°C]\", fontsize=16)\n",
+    "ax.set_xlabel(\"Predicted 2m temperature from SAVP [°C]\", fontsize=16)\n",
+    "\n",
+    "ax.tick_params(axis=\"both\", labelsize=14)\n",
+    "\n",
+    "fig.savefig(\"./first_cond_quantile.png\")\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "relevant-freight",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#data_grouped = data_correct.groupby_bins(\"2t_savp_fcst\", x_bins)#.groups"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 148,
+   "id": "ordered-cambridge",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[[22 22 22 ... 18 18 18]\n",
+      "  [22 22 22 ... 18 18 18]\n",
+      "  [22 22 22 ... 19 19 19]\n",
+      "  ...\n",
+      "  [29 29 29 ... 30 30 30]\n",
+      "  [29 30 29 ... 31 31 31]\n",
+      "  [29 30 29 ... 31 31 31]]\n",
+      "\n",
+      " [[21 21 21 ... 20 20 20]\n",
+      "  [20 21 21 ... 20 20 20]\n",
+      "  [20 21 21 ... 20 20 20]\n",
+      "  ...\n",
+      "  [30 30 30 ... 31 31 31]\n",
+      "  [30 30 30 ... 31 31 31]\n",
+      "  [30 30 30 ... 31 31 31]]\n",
+      "\n",
+      " [[21 21 21 ... 21 21 21]\n",
+      "  [21 21 21 ... 21 21 21]\n",
+      "  [21 21 21 ... 21 21 21]\n",
+      "  ...\n",
+      "  [28 28 28 ... 31 31 31]\n",
+      "  [28 28 28 ... 32 32 31]\n",
+      "  [28 29 29 ... 32 32 32]]\n",
+      "\n",
+      " ...\n",
+      "\n",
+      " [[22 22 22 ... 20 20 20]\n",
+      "  [22 22 22 ... 20 20 20]\n",
+      "  [22 21 21 ... 20 20 20]\n",
+      "  ...\n",
+      "  [29 29 29 ... 31 31 31]\n",
+      "  [29 29 29 ... 32 32 32]\n",
+      "  [30 30 29 ... 32 32 32]]\n",
+      "\n",
+      " [[21 21 21 ... 20 20 20]\n",
+      "  [20 21 21 ... 20 20 20]\n",
+      "  [20 20 21 ... 20 20 20]\n",
+      "  ...\n",
+      "  [30 30 30 ... 31 31 31]\n",
+      "  [30 30 29 ... 31 31 31]\n",
+      "  [30 30 30 ... 31 31 31]]\n",
+      "\n",
+      " [[22 22 22 ... 24 24 24]\n",
+      "  [22 22 22 ... 24 23 24]\n",
+      "  [22 22 22 ... 24 24 24]\n",
+      "  ...\n",
+      "  [27 27 27 ... 31 31 31]\n",
+      "  [28 28 28 ... 32 32 32]\n",
+      "  [28 28 28 ... 32 32 32]]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "inds_of_bins = np.digitize(data_fcst, x_bins, right=True)\n",
+    "\n",
+    "print(inds_of_bins)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 149,
+   "id": "furnished-customer",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<xarray.DataArray '2t_ref' (2t_savp_fcst_bins: 37)>\n",
+      "array([259.60351562, 264.13945557, 264.74759033, 265.45030518,\n",
+      "       266.47970703, 267.3628302 , 268.44342804, 269.80157959,\n",
+      "       270.4291217 , 271.22656982, 272.41841827, 274.18320801,\n",
+      "       274.74815369, 275.68839111, 276.3840918 , 277.0491394 ,\n",
+      "       277.99171387, 279.1111615 , 280.24440918, 281.56947693,\n",
+      "       282.817146  , 284.15313873, 285.25139038, 286.46736084,\n",
+      "       287.11281006, 287.56309875, 288.39205322, 289.28383789,\n",
+      "       290.12092529, 291.00213623, 291.93958588, 292.7901001 ,\n",
+      "       294.50114746, 295.28106201, 295.7451416 , 296.17975464,\n",
+      "       295.94475342])\n",
+      "Coordinates:\n",
+      "  * 2t_savp_fcst_bins  (2t_savp_fcst_bins) object (260, 261] ... (296, 297]\n",
+      "    quantile           float64 0.99\n",
+      "<xarray.DataArray '2t_savp_fcst' (2t_savp_fcst_bins: 37)>\n",
+      "array([260.51538086, 261.99571045, 262.96671509, 263.99466095,\n",
+      "       264.98212372, 265.99100769, 266.99321747, 267.99145386,\n",
+      "       268.99003754, 269.9897348 , 270.99363922, 271.99260651,\n",
+      "       272.98925781, 273.99296265, 274.99265747, 275.9934906 ,\n",
+      "       276.99263123, 277.99108887, 278.98980103, 279.99055573,\n",
+      "       280.98829712, 281.99064941, 282.98851074, 283.98887085,\n",
+      "       284.99008545, 285.99044281, 286.99019897, 287.9892334 ,\n",
+      "       288.98918335, 289.98908264, 290.98603363, 291.96720215,\n",
+      "       292.98072205, 293.98917358, 294.99038391, 295.9605835 ,\n",
+      "       296.59046722])\n",
+      "Coordinates:\n",
+      "  * 2t_savp_fcst_bins  (2t_savp_fcst_bins) object (260, 261] ... (296, 297]\n",
+      "    quantile           float64 0.99\n"
+     ]
+    }
+   ],
+   "source": [
+    "def calc_quantile(x, dim =\"init_time\"):\n",
+    "    return x.quantile(0.99)\n",
+    "\n",
+    "cond_quantile1 = data_grouped.map(calc_quantile)\n",
+    "#cond_quantile2 = data_grouped.map(calc_quantile)\n",
+    "\n",
+    "\n",
+    "print(cond_quantile1[\"quantile\"])\n",
+    "print(cond_quantile1[\"2t_savp_fcst\"])\n",
+    "\n",
+    "#print(cond_quantile2[\"2t_ref\"])\n",
+    "#print(cond_quantile2[\"2t_savp_fcst\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "electrical-evening",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Jupyter_Notebooks/first_cond_quantile.png b/Jupyter_Notebooks/first_cond_quantile.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ff3a7a8a081c4a874d2e2a8c8d3d0d2e47d1fb5
Binary files /dev/null and b/Jupyter_Notebooks/first_cond_quantile.png differ
diff --git a/Jupyter_Notebooks/juwels_juwelsbooster_compare_old.ipynb b/Jupyter_Notebooks/juwels_juwelsbooster_compare_old.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..d788742d00cb9054dd90557edc674e481cf1c77b
--- /dev/null
+++ b/Jupyter_Notebooks/juwels_juwelsbooster_compare_old.ipynb
@@ -0,0 +1,684 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, glob\n",
+    "import math\n",
+    "import pickle\n",
+    "import numpy as np\n",
+    "import xarray as xr\n",
+    "import matplotlib\n",
+    "matplotlib.use('Agg')\n",
+    "from matplotlib.transforms import Affine2D\n",
+    "from matplotlib.patches import Polygon\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "base = \"/p/project/deepacf/deeprain/video_prediction_shared_folder/models/\"+ \\\n",
+    "       \"era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/\"\n",
+    "fname_timing_train = \"/timing_training_time.pkl\"\n",
+    "fname_timing_total = \"/timing_total_time.pkl\"\n",
+    "\n",
+    "fname_timing_iter = \"timing_per_iteration_time.pkl\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# some auxiliary functions\n",
+    "def orderOfMagnitude(number):\n",
+    "    return np.floor(np.log(number, 10))\n",
+    "\n",
+    "def total_times(infile):\n",
+    "    with open(infile,'rb') as tfile:\n",
+    "        #print(\"Opening pickle time: '{0}'\".format(infile))\n",
+    "        total_time_sec = pickle.load(tfile)\n",
+    "    return np.asarray(total_time_sec/60)\n",
+    "\n",
+    "def log_total_times(infile):\n",
+    "    total_time_min = total_times(infile)\n",
+    "    return np.log(total_time_min)\n",
+    "\n",
+    "\n",
+    "def get_time_dict(base, wildcardspec, tfilename, gpu_id_str=\"gpu\", llog = False):\n",
+    "    time_dict = {}\n",
+    "    flist_hpc = sorted(glob.glob(base + wildcardspec))\n",
+    "    wrapper = total_times\n",
+    "    if llog: wrapper = log_total_times\n",
+    "    for tfile in flist_hpc: \n",
+    "        ngpus = get_ngpus(tfile, gpu_id_str)\n",
+    "        time_dict[\"{0:d} GPU(s)\".format(ngpus)] = wrapper(tfile + tfilename)\n",
+    "    return time_dict\n",
+    "\n",
+    "def get_ngpus(fname, search_str, max_order=3):\n",
+    "    \"\"\"\n",
+    "    Tries to get numbers in the vicinty of search_str which is supposed to be a substring in fname.\n",
+    "    First seaches for numbers right before the occurence of search_str, then afterwards.\n",
+    "    :param fname: file name from which number should be inferred\n",
+    "    :param search_str: seach string for which number identification is considered to be possible\n",
+    "    :param max_order: maximum order of retrieved number (default: 3 -> maximum number is 999 then)\n",
+    "    :return num_int: integer of number in the vicintity of search string. \n",
+    "    \"\"\"\n",
+    "    \n",
+    "    ind_gpu_info = fname.lower().find(search_str)\n",
+    "    if ind_gpu_info == -1:\n",
+    "        raise ValueError(\"Unable to find search string '{0}' in file name '{1}'\".format(search_str, fname))\n",
+    "    \n",
+    "    # init loops\n",
+    "    fname_len = len(fname)\n",
+    "    success, flag = False, True\n",
+    "    indm = 1\n",
+    "    ind_sm, ind_sp = 0, 0\n",
+    "\n",
+    "    # check occurence of numbers in front of search string\n",
+    "    while indm < max_order and flag:\n",
+    "        if ind_gpu_info - indm > 0:\n",
+    "            if fname[ind_gpu_info - indm].isnumeric():\n",
+    "                ind_sm += 1\n",
+    "                success = True\n",
+    "            else:\n",
+    "                flag = False\n",
+    "        else:\n",
+    "            flag = False\n",
+    "        indm += 1\n",
+    "  \n",
+    "\n",
+    "    if not success: # check occurence of numbers after search string\n",
+    "        ind_gpu_info = ind_gpu_info + len(search_str)\n",
+    "        flag = True\n",
+    "        indm = 0\n",
+    "        while indm < max_order and flag: \n",
+    "            if ind_gpu_info + indm < fname_len:\n",
+    "                if fname[ind_gpu_info + indm].isnumeric():\n",
+    "                    ind_sp += 1\n",
+    "                    success = True\n",
+    "                else:\n",
+    "                    flag = False\n",
+    "            else:\n",
+    "                flag = False\n",
+    "            indm += 1\n",
+    "            \n",
+    "        if success:\n",
+    "            return(int(fname[ind_gpu_info:ind_gpu_info+ind_sp]))\n",
+    "        else:\n",
+    "            raise ValueError(\"Search string found in fname, but unable to infer number of GPUs.\")\n",
+    "\n",
+    "    else:\n",
+    "        return(int(fname[ind_gpu_info-ind_sm:ind_gpu_info]))\n",
+    "        \n",
+    "        \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total computation with 16 GPU(s): 152.50984706878663\n",
+      "Total computation with 32 GPU(s): 81.80640578667322\n",
+      "Total computation with 4 GPU(s): 554.5182513117791\n",
+      "Total computation with 64 GPU(s): 45.01537701288859\n",
+      "Total computation with 8 GPU(s): 287.91878341039023\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Juwels\n",
+    "wildcard_juwels = '20210115T135325_langguth1_test_venv_juwels_container*old'\n",
+    "total_time_min_juwels = get_time_dict(base, wildcard_juwels, fname_timing_total, \"gpus\")\n",
+    "training_time_min_juwels = get_time_dict(base, wildcard_juwels, fname_timing_train, \"gpus\")\n",
+    "for key in training_time_min_juwels.keys():\n",
+    "    print(\"Total computation with {0}: {1}\".format(key, training_time_min_juwels[key]))\n",
+    "\n",
+    "overhead_time_juwels = {}\n",
+    "for key in training_time_min_juwels.keys() & total_time_min_juwels.keys():\n",
+    "    overhead_time_juwels[key] = total_time_min_juwels[key] - training_time_min_juwels[key]\n",
+    "    \n",
+    "#print('Juwels total time in minutes', get_time_d)\n",
+    "#print('Juwels total training time in minutes', training_time_min_juwels)\n",
+    "#overhead_time_juwels = np.array(total_time_min_juwels) - np.array(training_time_min_juwels)\n",
+    "#print('Juwels overhead time in minutes', overhead_time_juwels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total computation with 1 GPU(s): 566.7376739541689\n",
+      "Total computation with 4 GPU(s): 159.4931242307027\n",
+      "Total computation with 8 GPU(s): 92.15467914342881\n",
+      "Total computation with 16 GPU(s): 46.11619712909063\n",
+      "Total computation with 32 GPU(s): 33.09077355464299\n",
+      "Total computation with 64 GPU(s): 23.24405464331309\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Juwels booster\n",
+    "wildcard_booster = '2020*gong1_booster_gpu*'\n",
+    "total_time_min_booster = get_time_dict(base, wildcard_booster, fname_timing_total)\n",
+    "training_time_min_booster = get_time_dict(base, wildcard_booster, fname_timing_train)\n",
+    "for key in training_time_min_booster.keys():\n",
+    "    print(\"Total computation with {0}: {1}\".format(key, training_time_min_booster[key]))\n",
+    "\n",
+    "#print('Juwels Booster total time in minutes', list_times(base, wildcard_booster, filename_timing_total))\n",
+    "#print('Juwels Booster total training time in minutes', list_times(base, wildcard_booster, filename_timing_train))\n",
+    "overhead_time_booster = {}\n",
+    "for key in training_time_min_booster.keys() & total_time_min_booster.keys():\n",
+    "    overhead_time_booster[key] = total_time_min_booster[key] - training_time_min_booster[key]\n",
+    "#print('Juwels overhead time in minutes', overhead_time_booster)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def time_per_iteration_mean_std(infile):\n",
+    "    with open(infile, 'rb') as tfile:\n",
+    "        time_per_iteration_list = pickle.load(tfile) \n",
+    "        \n",
+    "    time_per_iteration = np.array(time_per_iteration_list)\n",
+    "    return np.mean(time_per_iteration), np.std(time_per_iteration)\n",
+    "\n",
+    "def iter_stat(base, wildcardspec, gpu_id_str=\"gpu\"):\n",
+    "    stat_iter_dict = {}\n",
+    "    flist_hpc = sorted(glob.glob(base + wildcardspec))\n",
+    "    for tdir in flist_hpc: \n",
+    "        ngpus = get_ngpus(tdir, gpu_id_str)\n",
+    "        ftname = os.path.join(tdir, fname_timing_iter)\n",
+    "        mean_loc, std_loc = time_per_iteration_mean_std(ftname)\n",
+    "        stat_iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = {\"mean\": mean_loc , \"std\": std_loc}\n",
+    "    return stat_iter_dict\n",
+    "\n",
+    "def time_per_iteration_all(infile):\n",
+    "    with open(infile,'rb') as tfile:\n",
+    "        time_per_iteration_list = pickle.load(tfile)\n",
+    "    return np.asarray(time_per_iteration_list)\n",
+    "\n",
+    "def all_iter(base, wildcardspec, gpu_id_str=\"gpu\"):\n",
+    "    iter_dict = {}\n",
+    "    flist_hpc = sorted(glob.glob(base + wildcardspec))\n",
+    "    for tdir in flist_hpc: \n",
+    "        ngpus = get_ngpus(tdir, gpu_id_str)\n",
+    "        ftname = os.path.join(tdir, fname_timing_iter)\n",
+    "        iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = time_per_iteration_all(ftname)\n",
+    "    return iter_dict    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "JUWELS (0.6151515198034729, 0.20104178037750603)\n",
+      "Booster (0.3521572324468615, 0.3656996619706779)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Juwels\n",
+    "print('JUWELS', time_per_iteration_mean_std('/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/20201210T140958_stadtler1_comparison_1node_1gpu/timing_per_iteration_time.pkl'))\n",
+    "# Booster\n",
+    "print('Booster', time_per_iteration_mean_std('/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/20201210T141910_gong1_booster_gpu1/timing_per_iteration_time.pkl'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Juwels mean and standart deviation {'16 GPU(s)': {'mean': 0.8209993402058342, 'std': 0.2627643291319852}, '32 GPU(s)': {'mean': 0.8590118098249986, 'std': 0.4078450977768068}, '4 GPU(s)': {'mean': 0.7445914211655112, 'std': 0.13789611351045}, '64 GPU(s)': {'mean': 0.9353915504630987, 'std': 0.6640973670265782}, '8 GPU(s)': {'mean': 0.7804724221628322, 'std': 0.21824334555299446}}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Juwels\n",
+    "print('Juwels mean and standart deviation',iter_stat(base, wildcard_juwels))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Booster mean and standart deviation {'1 GPU(s)': {'mean': 0.3521572324468615, 'std': 0.3656996619706779}, '4 GPU(s)': {'mean': 0.41844419631014446, 'std': 0.5273198599590724}, '8 GPU(s)': {'mean': 0.48867375665101026, 'std': 0.4378652997442439}, '16 GPU(s)': {'mean': 0.4786909431320202, 'std': 0.49638173862734053}, '32 GPU(s)': {'mean': 0.6439339113469129, 'std': 1.4395666886291258}, '64 GPU(s)': {'mean': 0.8176603168024377, 'std': 2.1044189535471185}}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Booster\n",
+    "print('Booster mean and standart deviation',iter_stat(base, wildcard_booster))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plotting \n",
+    "# Bar plot of total time and training time --> overhead time\n",
+    "\n",
+    "# dictionaries with the total times\n",
+    "tot_time_juwels_dict = get_time_dict(base, wildcard_juwels, fname_timing_total)\n",
+    "tot_time_booster_dict= get_time_dict(base, wildcard_booster, fname_timing_total)\n",
+    "\n",
+    "# dictionaries with the training times\n",
+    "train_time_juwels_dict = get_time_dict(base, wildcard_juwels, fname_timing_train)\n",
+    "train_time_booster_dict = get_time_dict(base, wildcard_booster, fname_timing_train)\n",
+    "\n",
+    "# get sorted arrays\n",
+    "# Note: The times for Juwels are divided by 2, since the experiments have been performed with an epoch number of 20\n",
+    "#       instead of 10 (as Bing and Scarlet did)\n",
+    "ngpus_sort = sorted([int(ngpu.split()[0]) for ngpu in tot_time_juwels_dict.keys()])\n",
+    "nexps = len(ngpus_sort)\n",
+    "tot_time_juwels = np.array([tot_time_juwels_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])/2.\n",
+    "tot_time_booster = np.array([tot_time_booster_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n",
+    "\n",
+    "train_time_juwels = np.array([train_time_juwels_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])/2.\n",
+    "train_time_booster = np.array([train_time_booster_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n",
+    "\n",
+    "overhead_juwels = tot_time_juwels - train_time_juwels \n",
+    "overhead_booster= tot_time_booster - train_time_booster\n",
+    "\n",
+    "names = [\"Juwels\", \"Juwels Booster\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "400.0\n",
+      "278.0\n",
+      "100.0\n",
+      "2.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "plot_computation_times(tot_time_juwels, tot_time_booster, labels, [\"Juwels\", \"Juwels Booster\"], \\\n",
+    "                       \"./total_computation_time\", log_yvals=False)\n",
+    "\n",
+    "plot_computation_times(overhead_juwels, overhead_booster, labels, [\"Juwels\", \"Juwels Booster\"], \\\n",
+    "                       \"./overhead_time\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print(labels)\n",
+    "#raise ValueError(\"Stop!\")\n",
+    "#x = np.arange(len(labels))  # the label locations\n",
+    "#width = 0.35  # the width of the bars\n",
+    "\n",
+    "#fig, ax = plt.subplots()\n",
+    "#rects1 = ax.bar(x - width/2, np.round(tot_time_juwels, 2), width, label='Juwels')\n",
+    "#rects2 = ax.bar(x + width/2, np.round(tot_time_booster, 2), width, label='Booster')\n",
+    "\n",
+    "def plot_computation_times(times1, times2, ngpus, names, plt_fname, log_yvals = False):\n",
+    "    \n",
+    "    nlabels = len(ngpus)\n",
+    "    x_pos = np.arange(nlabels)\n",
+    "    \n",
+    "    bar_width = 0.35\n",
+    "    ytitle = \"Time\"\n",
+    "    ymax = np.ceil(np.maximum(np.max(times1)/100. + 0.5, np.max(times2)/100. + 0.5))*100.\n",
+    "    print(ymax)    \n",
+    "    if log_yvals: \n",
+    "        times1, times2 = np.log(times1), np.log(times2)\n",
+    "        ytitle = \"LOG(Time) [min]\"\n",
+    "        ymax = np.ceil(np.maximum(np.max(times1)+0.5, np.max(times2) + 0.5))\n",
+    "    \n",
+    "    # create plot object\n",
+    "    fig, ax = plt.subplots()\n",
+    "    # create data bars\n",
+    "    rects1 = ax.bar(x_pos - bar_width/2, np.round(times1, 2), bar_width, label=names[0])\n",
+    "    rects2 = ax.bar(x_pos + bar_width/2, np.round(times2, 2), bar_width, label=names[1])\n",
+    "    # customize plot appearance\n",
+    "    # Add some text for labels, title and custom x-axis tick labels, etc.\n",
+    "    ax.set_ylabel(ytitle)\n",
+    "    ax.set_title('Comparison {0} and {1} with convLSTM model'.format(*names))\n",
+    "    ax.set_xticks(x_pos)\n",
+    "    ax.set_xticklabels(labels)\n",
+    "    ax.set_xlabel('# GPUs')\n",
+    "    print(np.ceil(np.maximum(np.max(times1)+0.5, np.max(times2) + 0.5)))\n",
+    "    ax.set_ylim(0., ymax)\n",
+    "    ax.legend()\n",
+    "                \n",
+    "    # add labels\n",
+    "    autolabel(ax, rects1)\n",
+    "    autolabel(ax, rects2)\n",
+    "    plt.savefig(plt_fname+\".png\")\n",
+    "    plt.close()\n",
+    "    \n",
+    "\n",
+    "def autolabel(ax, rects):\n",
+    "    \"\"\"Attach a text label above each bar in *rects*, displaying its height.\"\"\"\n",
+    "    for rect in rects:\n",
+    "        height = rect.get_height()\n",
+    "        ax.annotate('{}'.format(height),\n",
+    "                    xy=(rect.get_x() + rect.get_width() / 2, height),\n",
+    "                    xytext=(0, 3),  # 3 points vertical offset\n",
+    "                    textcoords=\"offset points\",\n",
+    "                    ha='center', va='bottom')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot mean + std \n",
+    "# Juwels\n",
+    "dict_stat_juwels = iter_stat(base, wildcard_juwels, gpu_id_str=\"gpu\")\n",
+    "#print(dict_stat_juwels)\n",
+    "iter_mean_juwels = np.array([dict_stat_juwels[\"{0:d} GPU(s)\".format(key)][\"mean\"] for key in labels])\n",
+    "iter_std_juwels = np.array([dict_stat_juwels[\"{0:d} GPU(s)\".format(key)][\"std\"] for key in labels])\n",
+    "\n",
+    "dict_stat_booster = iter_stat(base, wildcard_booster, gpu_id_str=\"gpu\")\n",
+    "iter_mean_booster = np.array([dict_stat_booster[\"{0:d} GPU(s)\".format(key)][\"mean\"] for key in labels])\n",
+    "iter_std_booster = np.array([dict_stat_booster[\"{0:d} GPU(s)\".format(key)][\"std\"] for key in labels])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(21225,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "iter_time_juwels = all_iter(base, wildcard_juwels)\n",
+    "iter_time_booster= all_iter(base, wildcard_booster)\n",
+    "\n",
+    "max_iter_juwels = np.shape(iter_time_booster[\"{0:d} GPU(s)\".format(labels[0])])[0]\n",
+    "max_iter_booster = np.shape(iter_time_booster[\"{0:d} GPU(s)\".format(labels[0])])[0]\n",
+    "\n",
+    "arr_iter_juwels = np.full((nexps, max_iter_juwels), np.nan)\n",
+    "arr_iter_booster= np.full((nexps, max_iter_booster), np.nan)\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# box plot instead of errorbar plot\n",
+    "# Juwels\n",
+    "#data_juwels = list_time_per_iteration_all_runs(base, wildcard_juwels)\n",
+    "data_juwels = all_iter(base, wildcard_juwels, gpu_id_str=\"gpu\")\n",
+    "# Booster\n",
+    "#data_booster = list_time_per_iteration_all_runs(base, wildcard_booster)\n",
+    "data_booster = all_iter(base, wildcard_booster, gpu_id_str=\"gpu\")\n",
+    "def simple_boxplot(time_per_iteration_data, title):\n",
+    "    # Multiple box plots on one Axes\n",
+    "    fig, ax = plt.subplots()\n",
+    "    ax.set_title(title)\n",
+    "    ax.boxplot(time_per_iteration_data, showfliers=False) # Outliers for initialization are disturbing \n",
+    "    plt.xticks([1, 2, 3, 4, 5 ,6], ['1', '4', '8', '16', '32', '64'])\n",
+    "    #plt.savefig('boxplot_'+title)\n",
+    "    #plt.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "886\n",
+      "64.08639097213745\n",
+      "31.232596397399902\n",
+      "(1326,)\n",
+      "***********\n",
+      "2100\n",
+      "4.405388832092285\n",
+      "29.095214366912842\n",
+      "(2653,)\n",
+      "***********\n",
+      "36981\n",
+      "7.751298189163208\n",
+      "26.409477949142456\n",
+      "(42450,)\n",
+      "***********\n",
+      "3843\n",
+      "66.00082683563232\n",
+      "29.385547637939453\n",
+      "(21225,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(np.argmax(data_booster[\"64 GPU(s)\"]))\n",
+    "print(np.max(data_booster[\"64 GPU(s)\"]))\n",
+    "print(data_booster[\"64 GPU(s)\"][0])\n",
+    "print(np.shape(data_booster[\"64 GPU(s)\"]))\n",
+    "print(\"***********\")\n",
+    "\n",
+    "print(np.argmax(data_juwels[\"64 GPU(s)\"][1::]))\n",
+    "print(np.max(data_juwels[\"64 GPU(s)\"][1::]))\n",
+    "print(data_juwels[\"64 GPU(s)\"][0])\n",
+    "print(np.shape(data_juwels[\"64 GPU(s)\"]))\n",
+    "print(\"***********\")\n",
+    "\n",
+    "print(np.argmax(data_juwels[\"4 GPU(s)\"][1::]))\n",
+    "print(np.max(data_juwels[\"4 GPU(s)\"][1::]))\n",
+    "print(data_juwels[\"4 GPU(s)\"][0])\n",
+    "print(np.shape(data_juwels[\"4 GPU(s)\"]))\n",
+    " \n",
+    "print(\"***********\")\n",
+    "print(np.argmax(data_booster[\"4 GPU(s)\"][1::]))\n",
+    "print(np.max(data_booster[\"4 GPU(s)\"][1::]))\n",
+    "print(data_booster[\"4 GPU(s)\"][0])\n",
+    "print(np.shape(data_booster[\"4 GPU(s)\"]))\n",
+    "\n",
+    "#simple_boxplot(data_juwels, 'Juwels')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "simple_boxplot(data_booster, 'Booster')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Try more fancy box plot \n",
+    "def more_fancy_boxplot(time_per_iteration_data1, time_per_iteration_data2, ngpu_list, title):\n",
+    "    nexps = len(ngpu_list)\n",
+    "    # Shuffle data: EXPECT JUWELS FIRST FOR THE LEGEND! NOT GENERIC!\n",
+    "    data = []\n",
+    "    for i in np.arange(nexps):\n",
+    "        data.append(time_per_iteration_data1[\"{0} GPU(s)\".format(ngpu_list[i])])\n",
+    "        data.append(time_per_iteration_data2[\"{0} GPU(s)\".format(ngpu_list[i])])\n",
+    "     \n",
+    "    # trick to get list with duplicated entries\n",
+    "    xlabels = [val for val in ngpu_list for _ in (0, 1)]\n",
+    "\n",
+    "    # Multiple box plots on one Axes\n",
+    "    #fig, ax = plt.subplots()\n",
+    "    fig = plt.figure(figsize=(6,4))\n",
+    "    ax = plt.axes([0.1, 0.15, 0.75, 0.75])   \n",
+    "    \n",
+    "    ax.set_title(title)\n",
+    "    bp = ax.boxplot(data, notch=0, sym='+', vert=1, whis=1.5, showfliers=False) # Outliers for initialization are disturbing\n",
+    "    plt.xticks(np.arange(1, nexps*2 +1), xlabels)\n",
+    "    ax.set_xlabel('# GPUs')\n",
+    "    ax.set_ylabel('Seconds')\n",
+    "    \n",
+    "    # Reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html \n",
+    "    box_colors = ['darkkhaki', 'royalblue']\n",
+    "    num_boxes = len(data)\n",
+    "    medians = np.empty(num_boxes)\n",
+    "    for i in range(num_boxes):\n",
+    "        box = bp['boxes'][i]\n",
+    "        boxX = []\n",
+    "        boxY = []\n",
+    "        for j in range(5):\n",
+    "            boxX.append(box.get_xdata()[j])\n",
+    "            boxY.append(box.get_ydata()[j])\n",
+    "        box_coords = np.column_stack([boxX, boxY])\n",
+    "        # Alternate between Dark Khaki and Royal Blue\n",
+    "        ax.add_patch(Polygon(box_coords, facecolor=box_colors[i % 2]))\n",
+    "        # Now draw the median lines back over what we just filled in\n",
+    "        med = bp['medians'][i]\n",
+    "        medianX = []\n",
+    "        medianY = []\n",
+    "        for j in range(2):\n",
+    "            medianX.append(med.get_xdata()[j])\n",
+    "            medianY.append(med.get_ydata()[j])\n",
+    "            ax.plot(medianX, medianY, 'k')\n",
+    "        medians[i] = medianY[0]\n",
+    "        # Finally, overplot the sample averages, with horizontal alignment\n",
+    "        # in the center of each box\n",
+    "        ax.plot(np.average(med.get_xdata()), np.average(data[i]),\n",
+    "                color='w', marker='*', markeredgecolor='k')\n",
+    "    \n",
+    "    # Finally, add a basic legend\n",
+    "    fig.text(0.9, 0.15, 'Juwels',\n",
+    "             backgroundcolor=box_colors[0], color='black', weight='roman',\n",
+    "             size='small')\n",
+    "    fig.text(0.9, 0.09, 'Booster',\n",
+    "             backgroundcolor=box_colors[1],\n",
+    "             color='white', weight='roman', size='small')\n",
+    "    #fig.text(0.90, 0.015, '*', color='white', backgroundcolor='silver',\n",
+    "    #         weight='roman', size='medium')\n",
+    "    fig.text(0.9, 0.03, '* Mean', color='white', backgroundcolor='silver',\n",
+    "             weight='roman', size='small')\n",
+    "\n",
+    "    \n",
+    "    plt.savefig('fancy_boxplot_'+title.replace(' ', '_'))\n",
+    "    plt.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "more_fancy_boxplot(data_juwels, data_booster, ngpus_sort, 'Time needed to iterate one step')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "flist_hpc1 = sorted(glob.glob(base + wildcard_juwels))\n",
+    "flist_hpc2 = sorted(glob.glob(base + wildcard_booster))\n",
+    "\n",
+    "\n",
+    "        \n",
+    "\n",
+    "print(get_ngpus(flist_hpc1[2], \"gpu\"))\n",
+    "print(get_ngpus(flist_hpc1[0], \"gpu\"))\n",
+    "\n",
+    "print(get_ngpus(flist_hpc2[2], \"gpu\"))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/Jupyter_Notebooks/performance_check.ipynb b/Jupyter_Notebooks/performance_check.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..3caf9018e91049c7ef7ee826382871dc5168a27a
--- /dev/null
+++ b/Jupyter_Notebooks/performance_check.ipynb
@@ -0,0 +1,724 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 108,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## import all required modules\n",
+    "import os, glob\n",
+    "import numpy as np\n",
+    "import pickle\n",
+    "# for plotting\n",
+    "import matplotlib\n",
+    "matplotlib.use('Agg')\n",
+    "from matplotlib.transforms import Affine2D\n",
+    "from matplotlib.patches import Polygon\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 144,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## some auxiliary functions\n",
+    "#\n",
+    "#colors = ['darkkhaki', 'royalblue']\n",
+    "colors = [\"midnightblue\", \"darkorange\"]\n",
+    "\n",
+    "def val_order(number):\n",
+    "    return int(np.floor(np.log10(number)))\n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "def get_ngpus(fname, search_str, max_order=3):\n",
+    "    \"\"\"\n",
+    "    Tries to get numbers in the vicinty of search_str which is supposed to be a substring in fname.\n",
+    "    First seaches for numbers right before the occurence of search_str, then afterwards.\n",
+    "    :param fname: file name from which number should be inferred\n",
+    "    :param search_str: seach string for which number identification is considered to be possible\n",
+    "    :param max_order: maximum order of retrieved number (default: 3 -> maximum number is 999 then)\n",
+    "    :return num_int: integer of number in the vicintity of search string. \n",
+    "    \"\"\"\n",
+    "    \n",
+    "    ind_gpu_info = fname.lower().find(search_str)\n",
+    "    if ind_gpu_info == -1:\n",
+    "        raise ValueError(\"Unable to find search string '{0}' in file name '{1}'\".format(search_str, fname))\n",
+    "    \n",
+    "    # init loops\n",
+    "    fname_len = len(fname)\n",
+    "    success, flag = False, True\n",
+    "    indm = 1\n",
+    "    ind_sm, ind_sp = 0, 0\n",
+    "    # check occurence of numbers in front of search string\n",
+    "    while indm < max_order and flag:\n",
+    "        if ind_gpu_info - indm > 0:\n",
+    "            if fname[ind_gpu_info - indm].isnumeric():\n",
+    "                ind_sm += 1\n",
+    "                success = True\n",
+    "            else:\n",
+    "                flag = False\n",
+    "        else:\n",
+    "            flag = False\n",
+    "        indm += 1\n",
+    "    # end while-loop\n",
+    "    if not success: # check occurence of numbers after search string\n",
+    "        ind_gpu_info = ind_gpu_info + len(search_str)\n",
+    "        flag = True\n",
+    "        indm = 0\n",
+    "        while indm < max_order and flag: \n",
+    "            if ind_gpu_info + indm < fname_len:\n",
+    "                if fname[ind_gpu_info + indm].isnumeric():\n",
+    "                    ind_sp += 1\n",
+    "                    success = True\n",
+    "                else:\n",
+    "                    flag = False\n",
+    "            else:\n",
+    "                flag = False\n",
+    "            indm += 1\n",
+    "        # end while-loop    \n",
+    "        if success:\n",
+    "            return(int(fname[ind_gpu_info:ind_gpu_info+ind_sp]))\n",
+    "        else:\n",
+    "            raise ValueError(\"Search string found in fname, but unable to infer number of GPUs.\")\n",
+    "\n",
+    "    else:\n",
+    "        return(int(fname[ind_gpu_info-ind_sm:ind_gpu_info]))\n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "# functions for computing time\n",
+    "def compute_time_tot(infile):\n",
+    "    with open(infile,'rb') as tfile:\n",
+    "        #print(\"Opening pickle time: '{0}'\".format(infile))\n",
+    "        total_time_sec = pickle.load(tfile)\n",
+    "    return np.asarray(total_time_sec/60)\n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "def compute_time_tot_log(infile):\n",
+    "    total_time_min = compute_time_tot(infile)\n",
+    "    return np.log(total_time_min)\n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "def get_time_dict(base, wildcardspec, tfilename, gpu_id_str=\"gpu\", llog = False):\n",
+    "    time_dict = {}\n",
+    "    flist_hpc = sorted(glob.glob(base + wildcardspec))\n",
+    "    print(flist_hpc)\n",
+    "    wrapper = compute_time_tot\n",
+    "    if llog: wrapper = compute_time_tot_log\n",
+    "    for tfile in flist_hpc: \n",
+    "        ngpus = get_ngpus(tfile, gpu_id_str)\n",
+    "        time_dict[\"{0:d} GPU(s)\".format(ngpus)] = wrapper(tfile + tfilename)\n",
+    "    return time_dict\n",
+    "#\n",
+    "def calc_speedup(comp_time, ngpus, l_ideal= False):\n",
+    "    nn = np.shape(ngpus)[0]\n",
+    "    if l_ideal:\n",
+    "        spd_data = np.array(ngpus, dtype=float)\n",
+    "    else:\n",
+    "        spd_data = comp_time\n",
+    "\n",
+    "    spd_up = spd_data[0:nn-1]/spd_data[1::]\n",
+    "    \n",
+    "    if l_ideal: spd_up = 1./spd_up\n",
+    "\n",
+    "    return spd_up\n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "# functions for iteration time data    \n",
+    "def iter_time_mean_std(infile):\n",
+    "    with open(infile, 'rb') as tfile:\n",
+    "        time_per_iteration_list = pickle.load(tfile) \n",
+    "        \n",
+    "    time_per_iteration = np.array(time_per_iteration_list)\n",
+    "    return np.mean(time_per_iteration), np.std(time_per_iteration)\n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "def iter_stat(base, wildcardspec, gpu_id_str=\"gpu\"):\n",
+    "    stat_iter_dict = {}\n",
+    "    flist_hpc = sorted(glob.glob(base + wildcardspec))\n",
+    "    for tdir in flist_hpc: \n",
+    "        ngpus = get_ngpus(tdir, gpu_id_str)\n",
+    "        ftname = os.path.join(tdir, fname_timing_iter)\n",
+    "        mean_loc, std_loc = iter_time_mean_std(ftname)\n",
+    "        stat_iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = {\"mean\": mean_loc , \"std\": std_loc}\n",
+    "    return stat_iter_dict\n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "def read_iter_time(infile):\n",
+    "    with open(infile,'rb') as tfile:\n",
+    "        time_per_iteration_list = pickle.load(tfile)\n",
+    "    return np.asarray(time_per_iteration_list)\n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "def get_iter_time_all(base, wildcardspec, gpu_id_str=\"gpu\"):\n",
+    "    iter_dict = {}\n",
+    "    flist_hpc = sorted(glob.glob(base + wildcardspec))\n",
+    "    for tdir in flist_hpc: \n",
+    "        ngpus = get_ngpus(tdir, gpu_id_str)\n",
+    "        ftname = os.path.join(tdir, fname_timing_iter)\n",
+    "        iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = read_iter_time(ftname)\n",
+    "    return iter_dict   \n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "# functions for plotting\n",
+    "def autolabel(ax, rects, rot=45):\n",
+    "    \"\"\"Attach a text label above each bar in *rects*, displaying its height.\"\"\"\n",
+    "    scal = 1\n",
+    "    if rot <0.:\n",
+    "        scal = -1\n",
+    "    for rect in rects:\n",
+    "        height = rect.get_height()\n",
+    "        ax.annotate('{}'.format(height),\n",
+    "                    xy=(rect.get_x() + rect.get_width()*scal, height),\n",
+    "                    xytext=(0, 3),  # 3 points vertical offset\n",
+    "                    textcoords=\"offset points\",\n",
+    "                    ha='center', va='bottom', rotation=rot)\n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "def plot_computation_time(times1, times2, ngpus, names, plt_fname, log_yvals = False):\n",
+    "    \n",
+    "    nlabels = len(ngpus)\n",
+    "    x_pos = np.arange(nlabels)\n",
+    "    \n",
+    "    bar_width = 0.35\n",
+    "    ytitle = \"Time [min]\"\n",
+    "    max_time = np.maximum(np.max(times1), np.max(times2))\n",
+    "    time_order = val_order(max_time)\n",
+    "    ymax = np.ceil(max_time/(10**time_order) + 0.5)*(10**time_order) + 10**time_order\n",
+    "   # np.ceil(np.maximum(np.max(times1)/100. + 0.5, np.max(times2)/100. + 0.5))*100.\n",
+    "    if log_yvals: \n",
+    "        times1, times2 = np.log(times1), np.log(times2)\n",
+    "        ytitle = \"LOG(Time) [min]\"\n",
+    "        ymax = np.ceil(np.maximum(np.max(times1)+0.5, np.max(times2) + 0.5))\n",
+    "    \n",
+    "    # create plot object\n",
+    "    fig, ax = plt.subplots()\n",
+    "    # create data bars\n",
+    "    rects1 = ax.bar(x_pos - bar_width/2, np.round(times1, 2), bar_width, label=names[0], color=colors[0])\n",
+    "    rects2 = ax.bar(x_pos + bar_width/2, np.round(times2, 2), bar_width, label=names[1], color=colors[1])\n",
+    "    # customize plot appearance\n",
+    "    # Add some text for labels, title and custom x-axis tick labels, etc.\n",
+    "    ax.set_ylabel(ytitle)\n",
+    "    ax.set_title('Comparison {0} and {1} with convLSTM model'.format(*names))\n",
+    "    ax.set_xticks(x_pos)\n",
+    "    ax.set_xticklabels(ngpus)\n",
+    "    ax.set_xlabel('# GPUs')\n",
+    "    ax.set_ylim(0., ymax)\n",
+    "    ax.legend()\n",
+    "                \n",
+    "    # add labels\n",
+    "    autolabel(ax, rects1)\n",
+    "    autolabel(ax, rects2)\n",
+    "    print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n",
+    "    plt.savefig(plt_fname+\".png\")\n",
+    "    plt.close()\n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "def plot_speedup(comp_time_hpc1, comp_time_hpc2, ngpus, names):\n",
+    "    fig = plt.figure(figsize=(6,4))\n",
+    "    ax = plt.axes([0.1, 0.15, 0.75, 0.75])   \n",
+    "    \n",
+    "    spd_up1 = calc_speedup(comp_time_hpc1, ngpus)\n",
+    "    spd_up2 = calc_speedup(comp_time_hpc2, ngpus)\n",
+    "    spd_ideal= calc_speedup(comp_time_hpc2, ngpus, l_ideal=True)\n",
+    "    \n",
+    "    plt.plot(spd_up1/spd_ideal, label= names[0], c=colors[0], lw=1.5)\n",
+    "    plt.plot(spd_up2/spd_ideal, label= names[1], c=colors[1], lw=1.5)\n",
+    "    plt.plot(spd_ideal/spd_ideal, label= \"Ideal\", c=\"r\", lw=3.)\n",
+    "    \n",
+    "    xlabels = []\n",
+    "    for i in np.arange(len(ngpus)-1):\n",
+    "        xlabels.append(\"{0} -> {1}\".format(ngpus[i], ngpus[i+1]))\n",
+    "    plt.xticks(np.arange(0, len(ngpus)-1), xlabels)\n",
+    "    ax.set_xlim(-0.5, len(ngpus)-1.5)\n",
+    "    ax.set_ylim(0.5, 1.5)\n",
+    "    legend = ax.legend(loc='upper left')\n",
+    "    ax.set_xlabel('GPU usage')\n",
+    "    ax.set_ylabel('Ratio Speedup factor') \n",
+    "    \n",
+    "    plt_fname = \"speed_up_{0}_vs_{1}.png\".format(*names)\n",
+    "    print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n",
+    "    plt.savefig(\"speed_up_{0}_vs_{1}.png\".format(*names))\n",
+    "#\n",
+    "# ****************************************************************************************************\n",
+    "#\n",
+    "def boxplot_iter_time(time_per_iteration_data1, time_per_iteration_data2, ngpu_list, names):\n",
+    "    nexps = len(ngpu_list)\n",
+    "    # create data lists for boxplot-routine\n",
+    "    data = []\n",
+    "    for i in np.arange(nexps):\n",
+    "        data.append(time_per_iteration_data1[\"{0} GPU(s)\".format(ngpu_list[i])])\n",
+    "        data.append(time_per_iteration_data2[\"{0} GPU(s)\".format(ngpu_list[i])])\n",
+    "     \n",
+    "    # trick to get list with duplicated entries\n",
+    "    xlabels = [val for val in ngpu_list for _ in (0, 1)]\n",
+    "\n",
+    "    # Multiple box plots on one Axes\n",
+    "    #fig, ax = plt.subplots()\n",
+    "    fig = plt.figure(figsize=(6,4))\n",
+    "    ax = plt.axes([0.1, 0.15, 0.75, 0.75])   \n",
+    "    \n",
+    "    ax.set_title(\"Time per iteration step\")\n",
+    "    bp = ax.boxplot(data, notch=0, sym='+', vert=1, whis=1.5, showfliers=False) # Outliers for initialization are disturbing\n",
+    "    plt.xticks(np.arange(1, nexps*2 +1), xlabels)\n",
+    "    ax.set_xlabel('# GPUs')\n",
+    "    ax.set_ylabel('Time [s]')\n",
+    "    \n",
+    "    # Reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html \n",
+    "    box_colors = colors\n",
+    "    num_boxes = len(data)\n",
+    "    medians = np.empty(num_boxes)\n",
+    "    for i in range(num_boxes):\n",
+    "        box = bp['boxes'][i]\n",
+    "        boxX = []\n",
+    "        boxY = []\n",
+    "        for j in range(5):\n",
+    "            boxX.append(box.get_xdata()[j])\n",
+    "            boxY.append(box.get_ydata()[j])\n",
+    "        box_coords = np.column_stack([boxX, boxY])\n",
+    "        # Alternate between Dark Khaki and Royal Blue\n",
+    "        ax.add_patch(Polygon(box_coords, facecolor=box_colors[i % 2]))\n",
+    "        # Now draw the median lines back over what we just filled in\n",
+    "        med = bp['medians'][i]\n",
+    "        medianX = []\n",
+    "        medianY = []\n",
+    "        for j in range(2):\n",
+    "            medianX.append(med.get_xdata()[j])\n",
+    "            medianY.append(med.get_ydata()[j])\n",
+    "            ax.plot(medianX, medianY, 'k')\n",
+    "        medians[i] = medianY[0]\n",
+    "        # Finally, overplot the sample averages, with horizontal alignment\n",
+    "        # in the center of each box\n",
+    "        ax.plot(np.average(med.get_xdata()), np.average(data[i]),\n",
+    "                color='w', marker='*', markeredgecolor='k', markersize=10)\n",
+    "    \n",
+    "    # Finally, add a basic legend\n",
+    "    fig.text(0.86, 0.15, names[0],\n",
+    "             backgroundcolor=box_colors[0], color='white', weight='roman',\n",
+    "             size='small')\n",
+    "    fig.text(0.86, 0.09, names[1],\n",
+    "             backgroundcolor=box_colors[1],\n",
+    "             color='white', weight='roman', size='small')\n",
+    "    #fig.text(0.90, 0.015, '*', color='white', backgroundcolor='silver',\n",
+    "    #         weight='roman', size='medium')\n",
+    "    #fig_transform =  ax.figure.transFigure #+ ax.transAxes.inverted() #+ ax.figure.transFigure.inverted()\n",
+    "    #ax.plot(0.1, 0.03, marker='*', markersize=30, color=\"w\", markeredgecolor=\"k\", transform=fig_transform)\n",
+    "    fig.text(0.86, 0.03, '* Mean', color='black', backgroundcolor='white', \n",
+    "             weight='roman', size='small', bbox=dict(facecolor='none', edgecolor='k'))\n",
+    "\n",
+    "    plt_fname = \"boxplot_iter_time_{0}_vs_{1}\".format(*names)\n",
+    "    print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n",
+    "    plt.savefig(plt_fname+\".png\")\n",
+    "    plt.close()\n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## some basic settings\n",
+    "base_dir = \"/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/\"\n",
+    "\n",
+    "wildcard_hpc1 = '20210325T095504_langguth1_juwels_container_[1-9]*gpu*'  # search pattern for finding the experiments\n",
+    "wildcard_hpc2 = '20210325T095504_langguth1_jwb_container_[1-9]*gpu*'\n",
+    "\n",
+    "gpu_id_str = [\"gpu\", \"gpu\"]               # search substring to get the number of GPUs used in the experiments,\n",
+    "                                          # e.g. \"gpu\" if '64gpu' is a substring in the experiment directory\n",
+    "                                          # or \"ngpu\" if 'ngpu64' is a substring in the experiment directory\n",
+    "                                          # -> see wilcard-variables above\n",
+    "names_hpc = [\"Juwels\", \"Booster\"]\n",
+    "\n",
+    "# name of pickle files tracking computing time\n",
+    "fname_timing_train = \"/timing_training_time.pkl\"\n",
+    "fname_timing_total = \"/timing_total_time.pkl\"\n",
+    "\n",
+    "fname_timing_iter = \"timing_per_iteration_time.pkl\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 111,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_8gpus']\n",
+      "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_8gpus']\n",
+      "{'16 GPU(s)': array(53.40843068), '1 GPU(s)': array(930.4968381), '32 GPU(s)': array(45.96871045), '4 GPU(s)': array(217.45655225), '64 GPU(s)': array(35.7369519), '8 GPU(s)': array(106.4218419)}\n",
+      "{'16 GPU(s)': array(34.26928383), '1 GPU(s)': array(492.70926997), '32 GPU(s)': array(35.05492661), '4 GPU(s)': array(100.99109779), '64 GPU(s)': array(30.98471271), '8 GPU(s)': array(49.63896298)}\n",
+      "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_8gpus']\n",
+      "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_8gpus']\n"
+     ]
+    }
+   ],
+   "source": [
+    "## evaluate computing time\n",
+    "# dictionaries with the total times\n",
+    "tot_time_hpc1_dict = get_time_dict(base_dir, wildcard_hpc1, fname_timing_total, gpu_id_str=gpu_id_str[0])\n",
+    "tot_time_hpc2_dict= get_time_dict(base_dir, wildcard_hpc2, fname_timing_total, gpu_id_str=gpu_id_str[1])\n",
+    "\n",
+    "print(tot_time_hpc1_dict)\n",
+    "print(tot_time_hpc2_dict)\n",
+    "\n",
+    "# dictionaries with the training times\n",
+    "train_time_hpc1_dict = get_time_dict(base_dir, wildcard_hpc1, fname_timing_train, gpu_id_str=gpu_id_str[0])\n",
+    "train_time_hpc2_dict = get_time_dict(base_dir, wildcard_hpc2, fname_timing_train, gpu_id_str=gpu_id_str[1])\n",
+    "\n",
+    "# get sorted arrays\n",
+    "# Note: The times for Juwels are divided by 2, since the experiments have been performed with an epoch number of 20\n",
+    "#       instead of 10 (as Bing and Scarlet did)\n",
+    "ngpus_sort = sorted([int(ngpu.split()[0]) for ngpu in tot_time_hpc1_dict.keys()])\n",
+    "nexps = len(ngpus_sort)\n",
+    "tot_time_hpc1 = np.array([tot_time_hpc1_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n",
+    "tot_time_hpc1[0] = tot_time_hpc1[0]#*2.\n",
+    "tot_time_hpc2 = np.array([tot_time_hpc2_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n",
+    "\n",
+    "train_time_hpc1 = np.array([train_time_hpc1_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n",
+    "train_time_hpc1[0] = train_time_hpc1[0]#*2.\n",
+    "train_time_hpc2 = np.array([train_time_hpc2_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n",
+    "\n",
+    "overhead_hpc1 = tot_time_hpc1 - train_time_hpc1\n",
+    "overhead_hpc2= tot_time_hpc2 - train_time_hpc2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 112,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[492.70926997 100.99109779  49.63896298  34.26928383  35.05492661\n",
+      "  30.98471271]\n",
+      "Saving plot in file: ./total_computation_time_Juwels_vs_Booster.png ...\n",
+      "Saving plot in file: ./overhead_time_Juwels_vs_Booster.png ...\n",
+      "Saving plot in file: speed_up_Juwels_vs_Booster.png.png ...\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# plot the computing time\n",
+    "print(tot_time_hpc2)\n",
+    "plot_computation_time(tot_time_hpc1, tot_time_hpc2, ngpus_sort, names_hpc, \\\n",
+    "                       \"./total_computation_time_{0}_vs_{1}\".format(*names_hpc), log_yvals=False)\n",
+    "\n",
+    "plot_computation_time(overhead_hpc1, overhead_hpc2, ngpus_sort, names_hpc, \\\n",
+    "                       \"./overhead_time_{0}_vs_{1}\".format(*names_hpc))\n",
+    "# plot speed-up factors\n",
+    "plot_speedup(tot_time_hpc1, tot_time_hpc2, ngpus_sort, names_hpc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 113,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## evaluate iteration time\n",
+    "# get iteration times\n",
+    "iter_data_hpc1 = get_iter_time_all(base_dir, wildcard_hpc1, gpu_id_str=gpu_id_str[0])\n",
+    "iter_data_hpc2 = get_iter_time_all(base_dir, wildcard_hpc2, gpu_id_str=gpu_id_str[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 114,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saving plot in file: boxplot_iter_time_Juwels_vs_Booster.png ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "# plot the iteration time in box plots\n",
+    "boxplot_iter_time(iter_data_hpc1, iter_data_hpc2, ngpus_sort, names_hpc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 115,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_slowiter(iter_time, threshold):\n",
+    "    inds_slow = np.where(iter_time > threshold)[0]\n",
+    "    return iter_time[inds_slow], np.shape(inds_slow)[0]\n",
+    "\n",
+    "def ana_slowiter(itertime1, itertime2, thres, names):\n",
+    "    slowt1, nslow1 = get_slowiter(itertime1, thres)\n",
+    "    slowt2, nslow2 = get_slowiter(itertime2, thres)\n",
+    "    \n",
+    "    if nslow1 > 0:\n",
+    "        print(\"{0:d} slow iteration steps on {1} with averaged time of {2:5.2f}s (max: {3:5.2f}s)\"\\\n",
+    "              .format(nslow1, names[0], np.mean(slowt1), np.max(slowt1)))\n",
+    "    else: \n",
+    "        print(\"No slow iterations on {0}\".format(names[0]))\n",
+    "        \n",
+    "    if nslow2 > 0:\n",
+    "        print(\"{0:d} slow iteration steps on {1} with averaged time of {2:5.2f}s (max: {3:5.2f}s)\"\\\n",
+    "              .format(nslow2, names[1], np.mean(slowt2), np.max(slowt2)))\n",
+    "    else: \n",
+    "        print(\"No slow iterations on {0}\".format(names[1]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 116,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "***** Analyse single GPUs experiments *****\n",
+      "1 slow iteration steps on Juwels with averaged time of  5.18s (max:  5.18s)\n",
+      "No slow iterations on Booster\n",
+      "***** Analyse 4 GPUs experiments *****\n",
+      "No slow iterations on Juwels\n",
+      "No slow iterations on Booster\n",
+      "***** Analyse 8 GPUs experiments *****\n",
+      "No slow iterations on Juwels\n",
+      "No slow iterations on Booster\n",
+      "***** Analyse 32 GPUs experiments *****\n",
+      "No slow iterations on Juwels\n",
+      "No slow iterations on Booster\n",
+      "***** Analyse 32 GPUs experiments *****\n",
+      "No slow iterations on Juwels\n",
+      "No slow iterations on Booster\n",
+      "***** Analyse 64 GPUs experiments *****\n",
+      "No slow iterations on Juwels\n",
+      "No slow iterations on Booster\n"
+     ]
+    }
+   ],
+   "source": [
+    "    \n",
+    "## settings\n",
+    "names = [\"Juwels\", \"Booster\"]\n",
+    "slowiter_time = 5.       # arbitrary threshold for slow iteration steps\n",
+    "\n",
+    "# analyze single GPU experiments\n",
+    "print(\"***** Analyse single GPUs experiments *****\")\n",
+    "itertime_juwels = iter_data_hpc1[\"1 GPU(s)\"]\n",
+    "itertime_booster = iter_data_hpc2[\"1 GPU(s)\"]\n",
+    "\n",
+    "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n",
+    "\n",
+    "# analyze 4 GPUs experiments\n",
+    "print(\"***** Analyse 4 GPUs experiments *****\")\n",
+    "itertime_juwels = iter_data_hpc1[\"4 GPU(s)\"]\n",
+    "itertime_booster = iter_data_hpc2[\"4 GPU(s)\"]\n",
+    "\n",
+    "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n",
+    "\n",
+    "# analyze 8 GPUs experiments\n",
+    "print(\"***** Analyse 8 GPUs experiments *****\")\n",
+    "itertime_juwels = iter_data_hpc1[\"8 GPU(s)\"]\n",
+    "itertime_booster = iter_data_hpc2[\"8 GPU(s)\"]\n",
+    "\n",
+    "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n",
+    "\n",
+    "# analyze 16 GPUs experiments\n",
+    "print(\"***** Analyse 32 GPUs experiments *****\")\n",
+    "itertime_juwels = iter_data_hpc1[\"16 GPU(s)\"]\n",
+    "itertime_booster = iter_data_hpc2[\"16 GPU(s)\"]\n",
+    "\n",
+    "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n",
+    "\n",
+    "# analyze 32 GPUs experiments\n",
+    "print(\"***** Analyse 32 GPUs experiments *****\")\n",
+    "itertime_juwels = iter_data_hpc1[\"32 GPU(s)\"]\n",
+    "itertime_booster = iter_data_hpc2[\"32 GPU(s)\"]\n",
+    "\n",
+    "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n",
+    "\n",
+    "# analyze 64 GPUs experiments\n",
+    "print(\"***** Analyse 64 GPUs experiments *****\")\n",
+    "itertime_juwels = iter_data_hpc1[\"64 GPU(s)\"]\n",
+    "itertime_booster = iter_data_hpc2[\"64 GPU(s)\"]\n",
+    "\n",
+    "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Summary\n",
+    "- Occasionally, a few iteration steps are slow\n",
+    "- However, performance degradation seems to be much worser on Booster than on Juwels\n",
+    "- Higher chance for slow iteration steps on Booster in general"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 157,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def boxplot_iter_total_time(iteration_time, total_time, ngpu_list, name, log_yvals=False):\n",
+    "    nexps = len(ngpu_list)\n",
+    "    bar_width = 0.35\n",
+    "    # create data lists for boxplot-routine\n",
+    "    iter_time_all = []\n",
+    "    for i in np.arange(nexps):\n",
+    "        iter_time_all.append(iteration_time[\"{0} GPU(s)\".format(ngpu_list[i])])\n",
+    "     \n",
+    "    # trick to get list with duplicated entries\n",
+    "    xlabels = [val for val in ngpu_list for _ in (0, 1)]\n",
+    "    nlabels = len(xlabels)\n",
+    "\n",
+    "    # Multiple box plots on one Axes\n",
+    "    #fig, ax = plt.subplots()\n",
+    "    fig = plt.figure(figsize=(6,4))\n",
+    "    ax = plt.axes([0.1, 0.15, 0.75, 0.75])   \n",
+    "    \n",
+    "    bp = ax.boxplot(iter_time_all, positions=np.arange(0, nlabels, 2), notch=0, sym='+', vert=1, showfliers=False, widths=bar_width) # Outliers for initialization are disturbing\n",
+    "    ax.set_xlabel('# GPUs')\n",
+    "    ax.set_ylabel('Time [s]')\n",
+    "    \n",
+    "    # Reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html \n",
+    "    num_boxes = len(iter_time_all)\n",
+    "    medians = np.empty(num_boxes)\n",
+    "    for i in range(num_boxes):\n",
+    "        box = bp['boxes'][i]\n",
+    "        boxX = []\n",
+    "        boxY = []\n",
+    "        for j in range(5):\n",
+    "            boxX.append(box.get_xdata()[j])\n",
+    "            boxY.append(box.get_ydata()[j])\n",
+    "        box_coords = np.column_stack([boxX, boxY])\n",
+    "        ax.add_patch(Polygon(box_coords, facecolor=colors[1]))\n",
+    "        # Now draw the median lines back over what we just filled in\n",
+    "        med = bp['medians'][i]\n",
+    "        medianX = []\n",
+    "        medianY = []\n",
+    "        for j in range(2):\n",
+    "            medianX.append(med.get_xdata()[j])\n",
+    "            medianY.append(med.get_ydata()[j])\n",
+    "            ax.plot(medianX, medianY, 'k')\n",
+    "        medians[i] = medianY[0]\n",
+    "        # Finally, overplot the sample averages, with horizontal alignment\n",
+    "        # in the center of each box\n",
+    "        ax.plot(np.average(med.get_xdata()), np.average(iter_time_all[i]),\n",
+    "                color='w', marker='*', markeredgecolor='k', markersize=10)\n",
+    "    \n",
+    "    ax2 = ax.twinx()\n",
+    "    x_pos = np.arange(1, nlabels+1 ,2)\n",
+    "    \n",
+    "    ytitle = \"Time [min]\"\n",
+    "    max_time = np.max(total_time)\n",
+    "    time_order = val_order(max_time)\n",
+    "    ymax = np.ceil(max_time/(10**time_order) + 0.5)*(10**time_order) + 10**time_order\n",
+    "    # np.ceil(np.maximum(np.max(times1)/100. + 0.5, np.max(times2)/100. + 0.5))*100.\n",
+    "    if log_yvals: \n",
+    "        total_time = np.log(total_time)\n",
+    "        ytitle = \"LOG(Time) [min]\"\n",
+    "        ymax = np.ceil(np.max(total_time) + 0.5)\n",
+    "    \n",
+    "    # create data bars\n",
+    "    rects = ax2.bar(x_pos, np.round(total_time, 2), bar_width, label=names, color=colors[0])\n",
+    "    # customize plot appearance\n",
+    "    # Add some text for labels, title and custom x-axis tick labels, etc.\n",
+    "    ax2.set_ylabel(ytitle)\n",
+    "    ax2.set_xticks(np.arange(0, nlabels))\n",
+    "    ax2.set_xticklabels(xlabels)\n",
+    "    ax2.set_xlabel('# GPUs')\n",
+    "    ax2.set_ylim(0., ymax)\n",
+    "                \n",
+    "    # add labels\n",
+    "    autolabel(ax2, rects, rot=45)     \n",
+    "\n",
+    "    plt_fname = \"iter+tot_time_{0}_vs_{1}\".format(*names)\n",
+    "    print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n",
+    "    #plt.show()\n",
+    "    plt.savefig(plt_fname+\".png\")\n",
+    "    plt.close()\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 158,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saving plot in file: iter+tot_time_Juwels_vs_Booster.png ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "boxplot_iter_total_time(iter_data_hpc2, tot_time_hpc2, ngpus_sort, names_hpc[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}