diff --git a/Jupyter_Notebooks/first_cond_quantile.png b/Jupyter_Notebooks/first_cond_quantile.png deleted file mode 100644 index 6ff3a7a8a081c4a874d2e2a8c8d3d0d2e47d1fb5..0000000000000000000000000000000000000000 Binary files a/Jupyter_Notebooks/first_cond_quantile.png and /dev/null differ diff --git a/Jupyter_Notebooks/get_era5_forecasts.sh b/Jupyter_Notebooks/get_era5_forecasts.sh new file mode 100644 index 0000000000000000000000000000000000000000..f01bc770990e055d9c770602b4eef88ce4269ba0 --- /dev/null +++ b/Jupyter_Notebooks/get_era5_forecasts.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +yr=$1 + +indir=/p/fastdata/slmet/slmet111/met_data/ecmwf/era5/grib/${yr} +indir_ref=/p/project/deepacf/deeprain/video_prediction_shared_folder/results/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/savp/20210901T090059_gong1_savp_cv12/ +outdir=/p/scratch/deepacf/deeprain/video_prediction_shared_folder/era5_forecast_ref/${yr} + +if [[ ! -d "${outdir}" ]]; then + mkdir ${outdir} +fi + +hh_s=6 +hh_e=12 + +declare -a hh_list=(18) + +for hh in ${hh_list[@]}; do + hh0=$(printf "%02d" ${hh}) + + # slice and retrieve 2m temperature from forecast-files + for mm in {01..12}; + do sf_files=(`ls ${indir}/${mm}/fc_${hh0}/*_{6..12}_sf_fc.grb`); + for sf_file in ${sf_files[*]}; + do newfile=`basename "${sf_file}"` + newfile="${outdir}/${newfile/.grb/.nc}" + echo "Processing file '${newfile}'" + cdo --eccodes -f nc copy -selname,2t -sellonlatbox,0.,27.3,38.4,54.9 ${sf_file} ${newfile} + done + done + + date1=`date -d "${yr}0101" '+%Y%m%d'` + date2=`date -d "${yr}0101 ${hh0}" '+%Y-%m-%d %H:%M:00'` + date_end=`date -d "{yr}1231 ${hh0}" '+%Y%m%d'` + + while [[ "$date1" -le "$date_end" ]]; do + outfile=${outdir}/${date1}${hh0}_allfc.nc + echo "Merging forecats for run at ${date2}..." + cdo mergetime ${outdir}/${date1}_${hh}00_*.nc ${outfile} + echo "Manipulate attributes and dimensions..." + ncrename -O -v 2t,2t_era5_fcst ${outfile} ${outfile} + ncap2 -O -s init_time=0 ${outfile} ${outfile} + ncatted -O -a calendar,init_time,a,c,"proleptic_gregorian" -a units,init_time,a,c,"hours since ${date2}" ${outfile} ${outfile} + ncrename -O -d time,fcst_hour -v time,fcst_hour ${outfile} ${outfile} + ncap2 -O -s 'fcst_hour=int(fcst_hour)' ${outfile} ${outfile} + + # add reference data as possible + patt=${indir_ref}/vfp_date_${date1}${hh0}_sample_ind*.nc + if ls $patt 1>/dev/null 2<&1; then + file_src=`ls $patt` + ncks -d fcst_hour,5,11 -v 2t_ref,2t_persistence_fcst -A ${file_src} ${outfile} + else + echo "Could not find reference data for ${date2}..." + mv ${outfile} "${oufile/.nc/_ref_miss.nc}" + fi + + date1="$(date -u --date="$date1 tomorrow" '+%Y%m%d')" + date2="$(date -u --date="$date2 tomorrow" '+%Y-%m-%d %H:%M:00')" + done +done + diff --git a/Jupyter_Notebooks/get_era5_metrics_netcdf.ipynb b/Jupyter_Notebooks/get_era5_metrics_netcdf.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..9a07a3a2ad1e7fa0c383be80929562d440dccf9a --- /dev/null +++ b/Jupyter_Notebooks/get_era5_metrics_netcdf.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "5272c6d8-2d18-4de1-a437-5fe6461ca743", + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys\n", + "sys.path.append(\"../utils/\")\n", + "import xarray as xr\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from statistical_evaluation import perform_block_bootstrap_metric, avg_metrics, calculate_cond_quantiles, Scores" + ] + }, + { + "cell_type": "markdown", + "id": "61e6b683-b9c6-4691-9c1f-130324ddb7a8", + "metadata": {}, + "source": [ + "# Evaluation of the ERA5 short-range forecasts\n", + "\n", + "Define the path to the file and load the data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "453ad6f2-5655-47bf-8f39-7a1d73b059ab", + "metadata": {}, + "outputs": [], + "source": [ + "indir = \"/p/home/jusers/langguth1/juwels/video_prediction_shared_folder/results/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/era5_forecast\"\n", + "yr=2019\n", + "\n", + "era5_fcst_file = os.path.join(indir, \"{0}_era5_short_range_fcst.nc\".format(yr))\n", + "\n", + "if not os.path.isfile(era5_fcst_file):\n", + " raise FileNotFoundError(\"Could not find file with all ERA5 forecasts '{0}'\".format(era5_fcst_file))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "459fa9bf-dbd0-48ee-8184-ccfec9ddbd59", + "metadata": {}, + "outputs": [], + "source": [ + "era5_fcst = xr.open_dataset(era5_fcst_file)\n", + "print(era5_fcst)" + ] + }, + { + "cell_type": "markdown", + "id": "07b1324f-c6df-45c8-b77a-e334c2bb2966", + "metadata": {}, + "source": [ + "Next we initialize the function for calculating the MSE and call it to evaluate the ERA5 and persistence forecasts. <br>\n", + "If you require further evaluation metrics, just expand the cell accordingly, e.g. add the following lines <br>\n", + "```\n", + "ssim_func = Scores(\"ssim\", [\"lat\", \"lon\"]).score_func \n", + "\n", + "ssim_era5_all = ssim_func(data_fcst=era5_fcst[varname_fcst], data_ref=era5_fcst[varname_ref])\n", + "ssim_per_all = (data_fcst=era5_fcst[varname_per], data_ref=era5_fcst[varname_ref])\n", + "```\n", + "in case you want to evaluate the SSIM as well." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e066baeb-3190-4012-a0c1-ace1100be3aa", + "metadata": {}, + "outputs": [], + "source": [ + "fdata_clim = os.path.join(\"/p/project/deepacf/deeprain/video_prediction_shared_folder/preprocessedData/T2monthly/\", \"climatology_t2m_1991-2020.nc\")\n", + "\n", + "data = xr.open_dataset(fdata_clim)\n", + "\n", + "# copied from load_climdata in main_visualize_postprocess.py\n", + "var=\"var167\"\n", + "dt_clim = data[var]\n", + "\n", + "lon_dom = np.arange(0., 27.5, 0.3)\n", + "lat_dom = np.arange(54.9, 38.1, -0.3)\n", + "\n", + "# get the coordinates of the data after running CDO\n", + "coords = dt_clim.coords\n", + "nlat, nlon = len(coords[\"lat\"]), len(coords[\"lon\"])\n", + "# modify it our needs\n", + "coords_new = dict(coords)\n", + "coords_new.pop(\"time\")\n", + "coords_new[\"month\"] = np.arange(1, 13)\n", + "coords_new[\"hour\"] = np.arange(0, 24)\n", + "# initialize a new data array with explicit dimensions for month and hour\n", + "data_clim_new = xr.DataArray(\n", + " np.full((12, 24, nlat, nlon), np.nan),\n", + " coords=coords_new,\n", + " dims=[\"month\", \"hour\", \"lat\", \"lon\"],\n", + ")\n", + "# do the reorganization\n", + "for month in np.arange(1, 13):\n", + " data_clim_new.loc[dict(month=month)] = dt_clim.sel(\n", + " time=dt_clim[\"time.month\"] == month\n", + " )\n", + "\n", + "data_clim = data_clim_new.sel(lon=lons, lat=lats, tolerance=0.01, method=\"nearest\")\n", + "print(data_clim[\"lat\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0dd70ea1-9341-4b7a-9086-405ee75c6a64", + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm import tqdm\n", + "from skimage.metrics import structural_similarity as ssim\n", + "\n", + "# overwrite some score-functions which inherently expect a separate fcst_hour-dimension next to batch_size \n", + "\n", + "def calc_acc_batch(data_fcst, data_ref, **kwargs):\n", + " \"\"\"\n", + " Calculate acc ealuation metric of forecast data w.r.t reference data\n", + " :param data_fcst: forecasted data (xarray with dimensions [batch, fore_hours, lat, lon])\n", + " :param data_ref: reference data (xarray with dimensions [batch, fore_hours, lat, lon])\n", + " :param data_clim: climatology data (xarray with dimensions [monthly, hourly, lat, lon])\n", + " :return: averaged acc for each batch example [batch, fore_hours]\n", + " \"\"\"\n", + " \n", + " print(\"Start calculating ACC\")\n", + " if \"data_clim\" in kwargs:\n", + " data_clim = kwargs[\"data_clim\"]\n", + " else:\n", + " raise KeyError(\"%{0}: climatological data must be parsed to calculate the ACC.\".format(method)) \n", + "\n", + " batch_size = data_fcst.shape[0]\n", + " acc = np.ones([batch_size])*np.nan\n", + " for i in tqdm(range(batch_size)):\n", + " img_fcst = data_fcst[i, ...]\n", + " img_ref = data_ref[i, ...]\n", + " # get the forecast time\n", + " img_month = img_fcst[\"fcst_hour\"].dt.month.values\n", + " img_hour = img_fcst[\"fcst_hour\"].dt.hour.values\n", + " img_clim = data_clim.sel(month=img_month, hour=img_hour) \n", + "\n", + " img1_ = img_ref - img_clim\n", + " img2_ = img_fcst - img_clim\n", + " cor1 = np.sum(img1_*img2_)\n", + " cor2 = np.sqrt(np.sum(img1_**2)*np.sum(img2_**2))\n", + " acc[i] = cor1/cor2\n", + " \n", + " # convert to data array \n", + " acc = xr.DataArray(acc, coords={\"fcst_hour\": data_fcst[\"fcst_hour\"]}, dims=[\"fcst_hour\"])\n", + " return acc\n", + "\n", + "def calc_ssim_batch(data_fcst, data_ref, **kwargs):\n", + " \"\"\"\n", + " Calculate ssim ealuation metric of forecast data w.r.t reference data\n", + " :param data_fcst: forecasted data (xarray with dimensions [batch, fore_hours, lat, lon])\n", + " :param data_ref: reference data (xarray with dimensions [batch, fore_hours, lat, lon])\n", + " :return: averaged ssim for each batch example, shape is [batch,fore_hours]\n", + " \"\"\"\n", + " method = Scores.calc_ssim_batch.__name__\n", + " batch_size = np.array(data_ref).shape[0]\n", + " ssim_pred = []\n", + " for i in tqdm(range(batch_size)):\n", + " ssim_pred.append(ssim(data_ref[i, ...],data_fcst[i,...]))\n", + " \n", + " # convert to data array \n", + " ssim_pred = xr.DataArray(np.asarray(ssim_pred), coords={\"fcst_hour\": data_fcst[\"fcst_hour\"]}, dims=[\"fcst_hour\"]) \n", + " return ssim_pred" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e76c4db9-f4da-4664-8054-fdd99cf5f64b", + "metadata": {}, + "outputs": [], + "source": [ + "# to get and configure the score-functions\n", + "score_dims = [\"lat\", \"lon\"]\n", + "scores = [\"mse\", \"ssim\", \"acc\", \"texture\"]\n", + "# the reference data\n", + "varname_ref, varname_fcst, varname_per = \"2t_ref\", \"2t_era5_fcst\", \"2t_persistence_fcst\"\n", + "\n", + "# initialize empty dictionaries to store the score functions and to save the corresponding results\n", + "score_data = {}\n", + "for score in scores:\n", + " # overwrite functions that are incompatble here\n", + " if score == \"acc\":\n", + " score_func = calc_acc_batch\n", + " elif score == \"ssim\":\n", + " score_func = calc_ssim_batch\n", + " else:\n", + " score_func = Scores(score, score_dims).score_func\n", + " # parsing the persistence forecast as float32 increases throughput by a factor of 10!\n", + " score_data[score] = {\"era5_all\": score_func(data_fcst=era5_fcst[varname_fcst], data_ref=era5_fcst[varname_ref], data_clim=data_clim,),\n", + " \"per_all\": score_func(data_fcst=era5_fcst[varname_per].astype(\"float32\"), data_ref=era5_fcst[varname_ref], data_clim=data_clim)} " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c612a06f-e193-4d4a-85a4-1adc11fde81e", + "metadata": {}, + "outputs": [], + "source": [ + "print(score_data[\"mse\"][\"era5_all\"])\n", + "print(score_data[\"ssim\"][\"era5_all\"])\n", + "print(score_data[\"texture\"][\"era5_all\"])" + ] + }, + { + "cell_type": "markdown", + "id": "b91695dc-7d3f-47de-8e67-03e5675aeac1", + "metadata": {}, + "source": [ + "Next, we initialize the data arrays to store the metrics for each forecast hour. <br>\n", + "Note that the ERA5 short-range forecasts only start twice a day at 06 and 18 UTC, respectively. Besides, the have only data starting from lead time 6 hours, but for consistency with the video prediction models, the data arrays cover all lead times between forecast hour 1 and 12. The unavailable values will be set to None." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95611d9c-7551-466d-84b6-ca24be2d3977", + "metadata": {}, + "outputs": [], + "source": [ + "init_times = [6, 18]\n", + "fcst_hours = np.arange(1, 13)\n", + "nhours = len(fcst_hours)\n", + "nboots=1000\n", + "\n", + "# MSE\n", + "mse_era5_fcst = xr.DataArray(np.empty(nhours, dtype=object), coords={\"fcst_hour\": fcst_hours}, dims=[\"fcst_hour\"])\n", + "mse_era5_fcst_boot = xr.DataArray(np.empty((nhours, nboots), dtype=object),\n", + " coords={\"fcst_hour\": fcst_hours, \"iboot\": np.arange(nboots)},\n", + " dims=[\"fcst_hour\", \"iboot\"])\n", + "\n", + "mse_per_fcst = xr.DataArray(np.empty(nhours, dtype=object), coords={\"fcst_hour\": fcst_hours}, dims=[\"fcst_hour\"])\n", + "mse_per_fcst_boot = xr.DataArray(np.empty((nhours, nboots), dtype=object),\n", + " coords={\"fcst_hour\": fcst_hours, \"iboot\": np.arange(nboots)},\n", + " dims=[\"fcst_hour\", \"iboot\"])\n", + "\n", + "# SSMI\n", + "ssim_era5_fcst, ssim_per_fcst = mse_era5_fcst.copy(), mse_per_fcst.copy()\n", + "ssim_era5_fcst_boot, ssim_per_fcst_boot = mse_per_fcst_boot.copy(), mse_per_fcst_boot.copy()\n", + "\n", + "# ACC\n", + "acc_era5_fcst, acc_per_fcst = mse_era5_fcst.copy(), mse_per_fcst.copy()\n", + "acc_era5_fcst_boot, acc_per_fcst_boot = mse_per_fcst_boot.copy(), mse_per_fcst_boot.copy()\n", + "\n", + "# ACC\n", + "txtr_era5_fcst, txtr_per_fcst = mse_era5_fcst.copy(), mse_per_fcst.copy()\n", + "txtr_era5_fcst_boot, txtr_per_fcst_boot = mse_per_fcst_boot.copy(), mse_per_fcst_boot.copy()\n" + ] + }, + { + "cell_type": "markdown", + "id": "e5700456-043b-4656-8ac4-759f42fc03c4", + "metadata": {}, + "source": [ + "Finally, we populate the initialized data arrays by looping over the forecast hours for which data is available. <br>\n", + "Additionally, we perform block bootstrapping to estimate the uncertainty of our evaluation metrics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3ea77c8-60c1-48bd-8285-95d6af66217a", + "metadata": {}, + "outputs": [], + "source": [ + "def handle_scores(scores1_all, scores_ref_all, fhh):\n", + " \n", + " scores1 = scores1_all.sel(fcst_hour=(scores1_all.fcst_hour.dt.hour.isin(fhh)))\n", + " scores_ref = scores_ref_all.sel(fcst_hour=(scores_ref_all.fcst_hour.dt.hour.isin(fhh)))\n", + " score1_mean, score_ref_mean = scores1.mean(), scores_ref.mean()\n", + " # two runs per day -> 2*7 correpsonds to a block length of one week\n", + " score1_boot = perform_block_bootstrap_metric(scores1, \"fcst_hour\", 2*7)\n", + " score_ref_boot = perform_block_bootstrap_metric(scores_ref, \"fcst_hour\", 2*7)\n", + " \n", + " return score1_mean, score_ref_mean, score1_boot, score_ref_boot\n", + " \n", + "\n", + "for fh in fcst_hours[5::]:\n", + " print(\"Handling scores for forecast hour '{0:0d}'\".format(fh))\n", + " fh_curr = (init_times + fh)%24\n", + " # MSE\n", + " mse_era5_fcst[fh-1], mse_per_fcst[fh-1], mse_era5_fcst_boot[fh-1, :], mse_per_fcst_boot[fh-1, :] = handle_scores(score_data[\"mse\"][\"era5_all\"],\n", + " score_data[\"mse\"][\"per_all\"], fh_curr)\n", + " # SSIM\n", + " ssim_era5_fcst[fh-1], ssim_per_fcst[fh-1], ssim_era5_fcst_boot[fh-1, :], ssim_per_fcst_boot[fh-1, :] = handle_scores(score_data[\"ssim\"][\"era5_all\"],\n", + " score_data[\"ssim\"][\"per_all\"], fh_curr)\n", + " # ACC\n", + " acc_era5_fcst[fh-1], acc_per_fcst[fh-1], acc_era5_fcst_boot[fh-1, :], acc_per_fcst_boot[fh-1, :] = handle_scores(score_data[\"acc\"][\"era5_all\"],\n", + " score_data[\"acc\"][\"per_all\"], fh_curr) \n", + " # TEXTURE\n", + " txtr_era5_fcst[fh-1], txtr_per_fcst[fh-1], txtr_era5_fcst_boot[fh-1, :], txtr_per_fcst_boot[fh-1, :] = handle_scores(score_data[\"texture\"][\"era5_all\"],\n", + " score_data[\"texture\"][\"per_all\"], fh_curr)\n", + " \n", + " #mse_era5_curr = mse_era5_all.sel(fcst_hour=(mse_era5_all.fcst_hour.dt.hour.isin(fh_curr)))\n", + " #mse_per_curr = mse_per_all.sel(fcst_hour=(mse_per_all.fcst_hour.dt.hour.isin(fh_curr)))\n", + " #mse_era5_fcst[fh-1], mse_per_fcst[fh-1] = mse_era5_curr.mean(), mse_per_curr.mean()\n", + " ## two runs per day -> 2*7 correpsonds to a block length of one week\n", + " #mse_era5_fcst_boot[fh-1, :] = perform_block_bootstrap_metric(mse_era5_curr, \"fcst_hour\", 2*7)\n", + " #mse_per_fcst_boot[fh-1, :] = perform_block_bootstrap_metric(mse_per_curr, \"fcst_hour\", 2*7)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ec002e6-cb1b-4c66-9ed0-9f5e2bd3fae9", + "metadata": {}, + "outputs": [], + "source": [ + "print(mse_era5_fcst)\n", + "print(ssim_era5_fcst)\n", + "print(acc_era5_fcst)\n", + "print(txtr_era5_fcst)" + ] + }, + { + "cell_type": "markdown", + "id": "276f4f4c-e926-4009-9198-c4e43271e87d", + "metadata": {}, + "source": [ + "Finally, we put the data arrays into a joint dataset and save the results into the netCDF-file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88ccbdce-5d6a-4c40-971c-ebe9a05a8c88", + "metadata": {}, + "outputs": [], + "source": [ + "# create Dataset and save to netCDF-file\n", + "ds_mse = xr.Dataset({\"2t_era5_mse_avg\": mse_era5_fcst, \"2t_era5_mse_bootstrapped\": mse_era5_fcst_boot, \n", + " \"2t_persistence_mse_avg\": mse_per_fcst, \"2t_persistence_mse_bootstrapped\": mse_per_fcst_boot,\n", + " # SSIM\n", + " \"2t_era5_ssim_avg\": ssim_era5_fcst, \"2t_era5_mse_bootstrapped\": ssim_era5_fcst_boot, \n", + " \"2t_persistence_ssim_avg\": ssim_per_fcst, \"2t_persistence_mse_bootstrapped\": ssim_per_fcst_boot,\n", + " # ACC\n", + " \"2t_era5_acc_avg\": acc_era5_fcst, \"2t_era5_acc_bootstrapped\": acc_era5_fcst_boot, \n", + " \"2t_persistence_acc_avg\": acc_per_fcst, \"2t_persistence_acc_bootstrapped\": acc_per_fcst_boot,\n", + " # TEXTURE \n", + " \"2t_era5_texture_avg\": txtr_era5_fcst, \"2t_era5_texture_bootstrapped\": txtr_era5_fcst_boot, \n", + " \"2t_persistence_texture_avg\": txtr_per_fcst, \"2t_persistence_texture_bootstrapped\": txtr_per_fcst_boot, \n", + " })\n", + "\n", + "outfile = os.path.join(indir, \"evaluation_metrics.nc\")\n", + "\n", + "print(\"Save evaluation metrics to '{0}'\".format(outfile))\n", + "ds_mse.to_netcdf(outfile)" + ] + }, + { + "cell_type": "markdown", + "id": "a7678cc4-7333-402a-bcf4-1bc15712255d", + "metadata": {}, + "source": [ + "## DONE!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "PyDeepLearning-1.1", + "language": "python", + "name": "pydeeplearning" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Jupyter_Notebooks/get_metrics_joint_dom.ipynb b/Jupyter_Notebooks/get_metrics_joint_dom.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e54043e10d1caf51352f75c11ed4cbde656d5aee --- /dev/null +++ b/Jupyter_Notebooks/get_metrics_joint_dom.ipynb @@ -0,0 +1,338 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "d636ee12-e299-485f-b84d-6f35c05fa766", + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys\n", + "import glob\n", + "sys.path.append(\"../utils/\")\n", + "import xarray as xr\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from statistical_evaluation import perform_block_bootstrap_metric, avg_metrics, calculate_cond_quantiles, Scores" + ] + }, + { + "cell_type": "markdown", + "id": "8510684d-9374-4e40-bc1c-4d69181c925c", + "metadata": {}, + "source": [ + "# Evaluation over a smaller (joint) domain\n", + "\n", + "The following cells will first merge all forecast files under `indir` into a single netCDF-file.<br>\n", + "Then the data is sliced to the domain defined by `lonlatbox` and all subsequent evaluation is performed on this smaller domain.<br>\n", + "The evaluation metrics are then saved to a file under `indir` named `evaluation_metrics_<nlon>x<nlat>.nc` where `nlat` and `nlon` denote the number of grid points/pixels in latitude and longitude direction of the smaller domain, respectively. <br>\n", + "\n", + "Thus, first let's define the basic parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "440b15fa-ecd4-4bb4-9100-ede5abb2b04f", + "metadata": {}, + "outputs": [], + "source": [ + "indir = \"/p/project/deepacf/deeprain/video_prediction_shared_folder/results/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/savp/20210901T090059_gong1_savp_cv12/\"\n", + "model = \"savp\"\n", + "# define domain. [3., 24.3, 40.2, 53.1] corresponds to the smallest domain tested in the GMD paper\n", + "lonlatbox = [3., 24.3, 40.2, 53.1]" + ] + }, + { + "cell_type": "markdown", + "id": "fd759f01-2561-4615-8056-036bdee6e2c7", + "metadata": {}, + "source": [ + "Next, we perform a first merging step. For computational efficiency, we merge max. 1000 files in the first step.<br>\n", + "Since the data is not sorted by the dimension `init_time` when querying along the sample index, we sort it before saving to intermediate files.<br>\n", + "\n", + "Given that the merging step has already been performed, no further processing is required.<br>\n", + "If this is not the case, we start with the sample indices between 0 and 999:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6726da3-d774-4eda-89d6-e315a865bb99", + "metadata": {}, + "outputs": [], + "source": [ + "def get_fname_ndigits(indir, prefix, suffix, n, patt=\"[0-9]\"):\n", + " flist = []\n", + " for i in range(1, n+1):\n", + " fn_search = os.path.join(indir, \"{0}{1}{2}\".format(prefix, i*patt, suffix))\n", + " flist = flist + glob.glob(fn_search)\n", + " \n", + " if len(flist) == 0:\n", + " raise FileNotFoundError(\"Could not find any file under '{0}' with prefix '{1}' and suffix '{2}' containing digits.\".format(indir, prefix, suffix))\n", + " return flist\n", + "\n", + "# get list of files with sample index between 0 and 999.\n", + "vfp_list = get_fname_ndigits(indir, \"vfp_date_*sample_ind_\", \".nc\", 3)\n", + "outfile = os.path.join(indir, \"vfp_{0}_forecasts_sample_ind_0_999.nc\".format(model))\n", + "\n", + "if not os.path.isfile(outfile):\n", + " print(\"File '{0}' does not exist. \\n Start reading data with sample index between 0 and 999 from '{1}'...\".format(outfile, indir))\n", + " data_all = xr.open_mfdataset(vfp_list, concat_dim=\"init_time\", combine=\"nested\", decode_cf=True).load()\n", + " data_all = data_all.sortby(\"init_time\")\n", + " print(\"Data loaded successfully. Save merged data to '{0}'.\".format(outfile))\n", + " data_all.to_netcdf(outfile, encoding={'init_time':{'units': \"seconds since 1900-01-01 00:00:00\"}})" + ] + }, + { + "cell_type": "markdown", + "id": "1c0222c0-386d-44f4-9532-4e824b14828c", + "metadata": {}, + "source": [ + "Then, we proceed with the rest. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54f4aa3e-3a39-496e-ae97-65f79d9cd598", + "metadata": {}, + "outputs": [], + "source": [ + "for i in np.arange(1, 9):\n", + " outfile = os.path.join(indir, \"vfp_{0}_forecasts_sample_ind_{1:d}000_{1:d}999.nc\".format(model, i))\n", + " if not os.path.isfile(outfile):\n", + " print(\"File '{0}' does not exist. Start reading data with sample index between {1:d}000 and {1:d}999 from '{2}'...\".format(outfile, i, indir))\n", + " data_all = xr.open_mfdataset(os.path.join(indir, \"vfp_date_*sample_ind_{0}???.nc\".format(i)), concat_dim=\"init_time\", combine=\"nested\", decode_cf=True).load()\n", + " data_all = data_all.sortby(\"init_time\")\n", + " print(\"Data loaded successfully. Save merged data to '{0}'.\".format(outfile))\n", + " data_all.to_netcdf(outfile, encoding={'init_time':{'units': \"seconds since 1900-01-01 00:00:00\"}})" + ] + }, + { + "cell_type": "markdown", + "id": "bdf16158-0ce5-40a3-848d-f574a1b9d622", + "metadata": {}, + "source": [ + "Still, xarray's `open_mfdataset`-method would not be able to concatenate all data since the `init_time`-dimension is not montonically increasing/decreasing when looping through the files. <br>\n", + "Thus, we have to merge the data manually.\n", + "The merged dataset is then saved to separate datafile for later computation.\n", + "\n", + "If the data has already been merged, we simply read the data from the corresponding netCDF-file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92f15edf-c23f-4803-b3c5-618305194de5", + "metadata": {}, + "outputs": [], + "source": [ + "outfile_all = os.path.join(indir, \"vfp_{0}_forecasts_all.nc\".format(model))\n", + "\n", + "if not os.path.isfile(outfile_all):\n", + " \n", + " print(\"netCDF-file with all forecasts '{0}' does not exist yet. Start merging and sorting all precursor files.\".format(outfile))\n", + " all_files = sorted(glob.glob(os.path.join(indir, \"vfp_{0}_forecasts_sample_ind_*.nc\".format(model))))\n", + " \n", + " if len(all_files) == 0:\n", + " raise FileNotFoundError(\"Could not find any precursor files.\")\n", + "\n", + " for i, f in enumerate(all_files):\n", + " print(\"Processing file '{0}'\".format(f))\n", + " tmp = xr.open_dataset(os.path.join(indir, f)).load()\n", + " if i == 0:\n", + " all_fcst = tmp.copy()\n", + " else:\n", + " print(\"Start merging\")\n", + " all_fcst = xr.merge([all_fcst, tmp]) \n", + "\n", + " # sort by init_time-dimension...\n", + " all_fcst = all_fcst.sortby(\"init_time\")\n", + " # ... and save to file\n", + " print(\"Finally, write all merged and sorted data to '{0}'.\".format(outfile_all))\n", + " all_fcst.to_netcdf(outfile_all)\n", + "else:\n", + " all_fcst = xr.open_dataset(outfile_all).load()" + ] + }, + { + "cell_type": "markdown", + "id": "0fcf1cb1-ba0d-4262-8e23-12ba44b6e2d0", + "metadata": {}, + "source": [ + "Now, we slice the dataset to the domain of interest (defined by `lonlatbox`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ede23e56-5be8-48be-b584-0eb8741acbf3", + "metadata": {}, + "outputs": [], + "source": [ + "all_fcst_sl = all_fcst.sel({\"lon\": slice(lonlatbox[0], lonlatbox[1]), \"lat\": slice(lonlatbox[3], lonlatbox[2])}) \n", + "print(all_fcst_sl)" + ] + }, + { + "cell_type": "markdown", + "id": "e21b89c8-57ab-4070-9b4c-ec0fe24c37b9", + "metadata": {}, + "source": [ + "Next we initialize the function for calculating the MSE and call it to evaluate the ERA5 and persistence forecasts. <br>\n", + "If you require further evaluation metrics, just expand the cell accordingly, e.g. add the following lines <br>\n", + "```\n", + "ssim_func = Scores(\"ssim\", [\"lat\", \"lon\"]).score_func \n", + "\n", + "ssim_era5_all = ssim_func(data_fcst=era5_fcst[varname_fcst], data_ref=era5_fcst[varname_ref])\n", + "ssim_per_all = (data_fcst=era5_fcst[varname_per], data_ref=era5_fcst[varname_ref])\n", + "```\n", + "in case you want to evaluate the SSIM as well." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2b70b80-6b86-4674-b051-6a23aaa821ea", + "metadata": {}, + "outputs": [], + "source": [ + "mse_func = Scores(\"mse\", [\"lat\", \"lon\"]).score_func\n", + "varname_ref, varname_fcst, varname_per = \"2t_ref\", \"2t_{0}_fcst\".format(model), \"2t_persistence_fcst\"\n", + "\n", + "mse_model_all = mse_func(data_fcst=all_fcst_sl[varname_fcst], data_ref=all_fcst_sl[varname_ref])\n", + "mse_per_all = mse_func(data_fcst=all_fcst_sl[varname_per], data_ref=all_fcst_sl[varname_ref])" + ] + }, + { + "cell_type": "markdown", + "id": "7745356d-ad44-47b6-9655-8d6db3433b1a", + "metadata": {}, + "source": [ + "Then, we initialize the data arrays to store the desired evaluation metrics..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b49db031-126c-44b1-b649-4f70587fac89", + "metadata": {}, + "outputs": [], + "source": [ + "fcst_hours = all_fcst_sl[\"fcst_hour\"]\n", + "nhours = len(fcst_hours)\n", + "nboots=1000\n", + "\n", + "mse_model_fcst = xr.DataArray(np.empty(nhours, dtype=object), coords={\"fcst_hour\": fcst_hours}, dims=[\"fcst_hour\"])\n", + "mse_model_fcst_boot = xr.DataArray(np.empty((nhours, nboots), dtype=object),\n", + " coords={\"fcst_hour\": fcst_hours, \"iboot\": np.arange(nboots)},\n", + " dims=[\"fcst_hour\", \"iboot\"])\n", + "mse_per_fcst = xr.DataArray(np.empty(nhours, dtype=object), coords={\"fcst_hour\": fcst_hours}, dims=[\"fcst_hour\"])\n", + "mse_per_fcst_boot = xr.DataArray(np.empty((nhours, nboots), dtype=object),\n", + " coords={\"fcst_hour\": fcst_hours, \"iboot\": np.arange(nboots)},\n", + " dims=[\"fcst_hour\", \"iboot\"])" + ] + }, + { + "cell_type": "markdown", + "id": "55967405-02d1-46e8-b3c3-8952d0e28bd2", + "metadata": {}, + "source": [ + "... and populate them by looping over all forecast hours." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5090e71c-f20f-43e6-94f6-71cbd0b6006d", + "metadata": {}, + "outputs": [], + "source": [ + "for i, fh in enumerate(fcst_hours):\n", + " mse_model_curr = mse_model_all.sel(fcst_hour=fh)\n", + " mse_per_curr = mse_per_all.sel(fcst_hour=fh)\n", + " mse_model_fcst[fh-1], mse_per_fcst[fh-1] = mse_model_curr.mean(), mse_per_curr.mean()\n", + "\n", + " mse_model_fcst_boot[i, :] = perform_block_bootstrap_metric(mse_model_curr, \"init_time\", 24*7)\n", + " mse_per_fcst_boot[i, :] = perform_block_bootstrap_metric(mse_per_curr, \"init_time\", 24*7)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d526324d-5d19-4193-8208-e609d9c65205", + "metadata": {}, + "outputs": [], + "source": [ + "print(mse_model_fcst)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b42cd738-b966-4b24-ad13-351d9b88f9e8", + "metadata": {}, + "outputs": [], + "source": [ + "print(mse_model_fcst)" + ] + }, + { + "cell_type": "markdown", + "id": "b13c7287-7a8c-4133-bccc-f250bf25dad7", + "metadata": {}, + "source": [ + "Finally, we put the data arrays into a joint dataset and save the results into the netCDF-file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7cd455ee-4749-46dd-8095-9e43744a1563", + "metadata": {}, + "outputs": [], + "source": [ + "# create Dataset and save to netCDF-file\n", + "ds_mse = xr.Dataset({\"2t_{0}_mse_avg\".format(model): mse_model_fcst, \"2t_{0}_mse_bootstrapped\".format(model): mse_model_fcst_boot, \n", + " \"2t_persistence_mse_avg\": mse_per_fcst, \"2t_persistence_mse_bootstrapped\": mse_per_fcst_boot})\n", + "\n", + "outfile = os.path.join(indir, \"evaluation_metrics_{0:d}x{1:d}.nc\".format(len(all_fcst_sl[\"lon\"]), len(all_fcst_sl[\"lat\"])))\n", + "\n", + "print(\"Save evaluation metrics to '{0}'\".format(outfile))\n", + "print(ds_mse)\n", + "ds_mse.to_netcdf(outfile)" + ] + }, + { + "cell_type": "markdown", + "id": "69bb9464-6fdb-489b-ba59-0170040144ee", + "metadata": {}, + "source": [ + "## Done!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "PyDeepLearning-1.1", + "language": "python", + "name": "pydeeplearning" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Jupyter_Notebooks/juwels_juwelsbooster_compare_old.ipynb b/Jupyter_Notebooks/juwels_juwelsbooster_compare_old.ipynb deleted file mode 100644 index d788742d00cb9054dd90557edc674e481cf1c77b..0000000000000000000000000000000000000000 --- a/Jupyter_Notebooks/juwels_juwelsbooster_compare_old.ipynb +++ /dev/null @@ -1,684 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "import os, glob\n", - "import math\n", - "import pickle\n", - "import numpy as np\n", - "import xarray as xr\n", - "import matplotlib\n", - "matplotlib.use('Agg')\n", - "from matplotlib.transforms import Affine2D\n", - "from matplotlib.patches import Polygon\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "base = \"/p/project/deepacf/deeprain/video_prediction_shared_folder/models/\"+ \\\n", - " \"era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/\"\n", - "fname_timing_train = \"/timing_training_time.pkl\"\n", - "fname_timing_total = \"/timing_total_time.pkl\"\n", - "\n", - "fname_timing_iter = \"timing_per_iteration_time.pkl\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# some auxiliary functions\n", - "def orderOfMagnitude(number):\n", - " return np.floor(np.log(number, 10))\n", - "\n", - "def total_times(infile):\n", - " with open(infile,'rb') as tfile:\n", - " #print(\"Opening pickle time: '{0}'\".format(infile))\n", - " total_time_sec = pickle.load(tfile)\n", - " return np.asarray(total_time_sec/60)\n", - "\n", - "def log_total_times(infile):\n", - " total_time_min = total_times(infile)\n", - " return np.log(total_time_min)\n", - "\n", - "\n", - "def get_time_dict(base, wildcardspec, tfilename, gpu_id_str=\"gpu\", llog = False):\n", - " time_dict = {}\n", - " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", - " wrapper = total_times\n", - " if llog: wrapper = log_total_times\n", - " for tfile in flist_hpc: \n", - " ngpus = get_ngpus(tfile, gpu_id_str)\n", - " time_dict[\"{0:d} GPU(s)\".format(ngpus)] = wrapper(tfile + tfilename)\n", - " return time_dict\n", - "\n", - "def get_ngpus(fname, search_str, max_order=3):\n", - " \"\"\"\n", - " Tries to get numbers in the vicinty of search_str which is supposed to be a substring in fname.\n", - " First seaches for numbers right before the occurence of search_str, then afterwards.\n", - " :param fname: file name from which number should be inferred\n", - " :param search_str: seach string for which number identification is considered to be possible\n", - " :param max_order: maximum order of retrieved number (default: 3 -> maximum number is 999 then)\n", - " :return num_int: integer of number in the vicintity of search string. \n", - " \"\"\"\n", - " \n", - " ind_gpu_info = fname.lower().find(search_str)\n", - " if ind_gpu_info == -1:\n", - " raise ValueError(\"Unable to find search string '{0}' in file name '{1}'\".format(search_str, fname))\n", - " \n", - " # init loops\n", - " fname_len = len(fname)\n", - " success, flag = False, True\n", - " indm = 1\n", - " ind_sm, ind_sp = 0, 0\n", - "\n", - " # check occurence of numbers in front of search string\n", - " while indm < max_order and flag:\n", - " if ind_gpu_info - indm > 0:\n", - " if fname[ind_gpu_info - indm].isnumeric():\n", - " ind_sm += 1\n", - " success = True\n", - " else:\n", - " flag = False\n", - " else:\n", - " flag = False\n", - " indm += 1\n", - " \n", - "\n", - " if not success: # check occurence of numbers after search string\n", - " ind_gpu_info = ind_gpu_info + len(search_str)\n", - " flag = True\n", - " indm = 0\n", - " while indm < max_order and flag: \n", - " if ind_gpu_info + indm < fname_len:\n", - " if fname[ind_gpu_info + indm].isnumeric():\n", - " ind_sp += 1\n", - " success = True\n", - " else:\n", - " flag = False\n", - " else:\n", - " flag = False\n", - " indm += 1\n", - " \n", - " if success:\n", - " return(int(fname[ind_gpu_info:ind_gpu_info+ind_sp]))\n", - " else:\n", - " raise ValueError(\"Search string found in fname, but unable to infer number of GPUs.\")\n", - "\n", - " else:\n", - " return(int(fname[ind_gpu_info-ind_sm:ind_gpu_info]))\n", - " \n", - " \n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total computation with 16 GPU(s): 152.50984706878663\n", - "Total computation with 32 GPU(s): 81.80640578667322\n", - "Total computation with 4 GPU(s): 554.5182513117791\n", - "Total computation with 64 GPU(s): 45.01537701288859\n", - "Total computation with 8 GPU(s): 287.91878341039023\n" - ] - } - ], - "source": [ - "# Juwels\n", - "wildcard_juwels = '20210115T135325_langguth1_test_venv_juwels_container*old'\n", - "total_time_min_juwels = get_time_dict(base, wildcard_juwels, fname_timing_total, \"gpus\")\n", - "training_time_min_juwels = get_time_dict(base, wildcard_juwels, fname_timing_train, \"gpus\")\n", - "for key in training_time_min_juwels.keys():\n", - " print(\"Total computation with {0}: {1}\".format(key, training_time_min_juwels[key]))\n", - "\n", - "overhead_time_juwels = {}\n", - "for key in training_time_min_juwels.keys() & total_time_min_juwels.keys():\n", - " overhead_time_juwels[key] = total_time_min_juwels[key] - training_time_min_juwels[key]\n", - " \n", - "#print('Juwels total time in minutes', get_time_d)\n", - "#print('Juwels total training time in minutes', training_time_min_juwels)\n", - "#overhead_time_juwels = np.array(total_time_min_juwels) - np.array(training_time_min_juwels)\n", - "#print('Juwels overhead time in minutes', overhead_time_juwels)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total computation with 1 GPU(s): 566.7376739541689\n", - "Total computation with 4 GPU(s): 159.4931242307027\n", - "Total computation with 8 GPU(s): 92.15467914342881\n", - "Total computation with 16 GPU(s): 46.11619712909063\n", - "Total computation with 32 GPU(s): 33.09077355464299\n", - "Total computation with 64 GPU(s): 23.24405464331309\n" - ] - } - ], - "source": [ - "# Juwels booster\n", - "wildcard_booster = '2020*gong1_booster_gpu*'\n", - "total_time_min_booster = get_time_dict(base, wildcard_booster, fname_timing_total)\n", - "training_time_min_booster = get_time_dict(base, wildcard_booster, fname_timing_train)\n", - "for key in training_time_min_booster.keys():\n", - " print(\"Total computation with {0}: {1}\".format(key, training_time_min_booster[key]))\n", - "\n", - "#print('Juwels Booster total time in minutes', list_times(base, wildcard_booster, filename_timing_total))\n", - "#print('Juwels Booster total training time in minutes', list_times(base, wildcard_booster, filename_timing_train))\n", - "overhead_time_booster = {}\n", - "for key in training_time_min_booster.keys() & total_time_min_booster.keys():\n", - " overhead_time_booster[key] = total_time_min_booster[key] - training_time_min_booster[key]\n", - "#print('Juwels overhead time in minutes', overhead_time_booster)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "def time_per_iteration_mean_std(infile):\n", - " with open(infile, 'rb') as tfile:\n", - " time_per_iteration_list = pickle.load(tfile) \n", - " \n", - " time_per_iteration = np.array(time_per_iteration_list)\n", - " return np.mean(time_per_iteration), np.std(time_per_iteration)\n", - "\n", - "def iter_stat(base, wildcardspec, gpu_id_str=\"gpu\"):\n", - " stat_iter_dict = {}\n", - " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", - " for tdir in flist_hpc: \n", - " ngpus = get_ngpus(tdir, gpu_id_str)\n", - " ftname = os.path.join(tdir, fname_timing_iter)\n", - " mean_loc, std_loc = time_per_iteration_mean_std(ftname)\n", - " stat_iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = {\"mean\": mean_loc , \"std\": std_loc}\n", - " return stat_iter_dict\n", - "\n", - "def time_per_iteration_all(infile):\n", - " with open(infile,'rb') as tfile:\n", - " time_per_iteration_list = pickle.load(tfile)\n", - " return np.asarray(time_per_iteration_list)\n", - "\n", - "def all_iter(base, wildcardspec, gpu_id_str=\"gpu\"):\n", - " iter_dict = {}\n", - " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", - " for tdir in flist_hpc: \n", - " ngpus = get_ngpus(tdir, gpu_id_str)\n", - " ftname = os.path.join(tdir, fname_timing_iter)\n", - " iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = time_per_iteration_all(ftname)\n", - " return iter_dict \n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "JUWELS (0.6151515198034729, 0.20104178037750603)\n", - "Booster (0.3521572324468615, 0.3656996619706779)\n" - ] - } - ], - "source": [ - "# Juwels\n", - "print('JUWELS', time_per_iteration_mean_std('/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/20201210T140958_stadtler1_comparison_1node_1gpu/timing_per_iteration_time.pkl'))\n", - "# Booster\n", - "print('Booster', time_per_iteration_mean_std('/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/20201210T141910_gong1_booster_gpu1/timing_per_iteration_time.pkl'))" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Juwels mean and standart deviation {'16 GPU(s)': {'mean': 0.8209993402058342, 'std': 0.2627643291319852}, '32 GPU(s)': {'mean': 0.8590118098249986, 'std': 0.4078450977768068}, '4 GPU(s)': {'mean': 0.7445914211655112, 'std': 0.13789611351045}, '64 GPU(s)': {'mean': 0.9353915504630987, 'std': 0.6640973670265782}, '8 GPU(s)': {'mean': 0.7804724221628322, 'std': 0.21824334555299446}}\n" - ] - } - ], - "source": [ - "# Juwels\n", - "print('Juwels mean and standart deviation',iter_stat(base, wildcard_juwels))" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Booster mean and standart deviation {'1 GPU(s)': {'mean': 0.3521572324468615, 'std': 0.3656996619706779}, '4 GPU(s)': {'mean': 0.41844419631014446, 'std': 0.5273198599590724}, '8 GPU(s)': {'mean': 0.48867375665101026, 'std': 0.4378652997442439}, '16 GPU(s)': {'mean': 0.4786909431320202, 'std': 0.49638173862734053}, '32 GPU(s)': {'mean': 0.6439339113469129, 'std': 1.4395666886291258}, '64 GPU(s)': {'mean': 0.8176603168024377, 'std': 2.1044189535471185}}\n" - ] - } - ], - "source": [ - "# Booster\n", - "print('Booster mean and standart deviation',iter_stat(base, wildcard_booster))" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "# Plotting \n", - "# Bar plot of total time and training time --> overhead time\n", - "\n", - "# dictionaries with the total times\n", - "tot_time_juwels_dict = get_time_dict(base, wildcard_juwels, fname_timing_total)\n", - "tot_time_booster_dict= get_time_dict(base, wildcard_booster, fname_timing_total)\n", - "\n", - "# dictionaries with the training times\n", - "train_time_juwels_dict = get_time_dict(base, wildcard_juwels, fname_timing_train)\n", - "train_time_booster_dict = get_time_dict(base, wildcard_booster, fname_timing_train)\n", - "\n", - "# get sorted arrays\n", - "# Note: The times for Juwels are divided by 2, since the experiments have been performed with an epoch number of 20\n", - "# instead of 10 (as Bing and Scarlet did)\n", - "ngpus_sort = sorted([int(ngpu.split()[0]) for ngpu in tot_time_juwels_dict.keys()])\n", - "nexps = len(ngpus_sort)\n", - "tot_time_juwels = np.array([tot_time_juwels_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])/2.\n", - "tot_time_booster = np.array([tot_time_booster_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", - "\n", - "train_time_juwels = np.array([train_time_juwels_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])/2.\n", - "train_time_booster = np.array([train_time_booster_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", - "\n", - "overhead_juwels = tot_time_juwels - train_time_juwels \n", - "overhead_booster= tot_time_booster - train_time_booster\n", - "\n", - "names = [\"Juwels\", \"Juwels Booster\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "400.0\n", - "278.0\n", - "100.0\n", - "2.0\n" - ] - } - ], - "source": [ - "plot_computation_times(tot_time_juwels, tot_time_booster, labels, [\"Juwels\", \"Juwels Booster\"], \\\n", - " \"./total_computation_time\", log_yvals=False)\n", - "\n", - "plot_computation_times(overhead_juwels, overhead_booster, labels, [\"Juwels\", \"Juwels Booster\"], \\\n", - " \"./overhead_time\")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "#print(labels)\n", - "#raise ValueError(\"Stop!\")\n", - "#x = np.arange(len(labels)) # the label locations\n", - "#width = 0.35 # the width of the bars\n", - "\n", - "#fig, ax = plt.subplots()\n", - "#rects1 = ax.bar(x - width/2, np.round(tot_time_juwels, 2), width, label='Juwels')\n", - "#rects2 = ax.bar(x + width/2, np.round(tot_time_booster, 2), width, label='Booster')\n", - "\n", - "def plot_computation_times(times1, times2, ngpus, names, plt_fname, log_yvals = False):\n", - " \n", - " nlabels = len(ngpus)\n", - " x_pos = np.arange(nlabels)\n", - " \n", - " bar_width = 0.35\n", - " ytitle = \"Time\"\n", - " ymax = np.ceil(np.maximum(np.max(times1)/100. + 0.5, np.max(times2)/100. + 0.5))*100.\n", - " print(ymax) \n", - " if log_yvals: \n", - " times1, times2 = np.log(times1), np.log(times2)\n", - " ytitle = \"LOG(Time) [min]\"\n", - " ymax = np.ceil(np.maximum(np.max(times1)+0.5, np.max(times2) + 0.5))\n", - " \n", - " # create plot object\n", - " fig, ax = plt.subplots()\n", - " # create data bars\n", - " rects1 = ax.bar(x_pos - bar_width/2, np.round(times1, 2), bar_width, label=names[0])\n", - " rects2 = ax.bar(x_pos + bar_width/2, np.round(times2, 2), bar_width, label=names[1])\n", - " # customize plot appearance\n", - " # Add some text for labels, title and custom x-axis tick labels, etc.\n", - " ax.set_ylabel(ytitle)\n", - " ax.set_title('Comparison {0} and {1} with convLSTM model'.format(*names))\n", - " ax.set_xticks(x_pos)\n", - " ax.set_xticklabels(labels)\n", - " ax.set_xlabel('# GPUs')\n", - " print(np.ceil(np.maximum(np.max(times1)+0.5, np.max(times2) + 0.5)))\n", - " ax.set_ylim(0., ymax)\n", - " ax.legend()\n", - " \n", - " # add labels\n", - " autolabel(ax, rects1)\n", - " autolabel(ax, rects2)\n", - " plt.savefig(plt_fname+\".png\")\n", - " plt.close()\n", - " \n", - "\n", - "def autolabel(ax, rects):\n", - " \"\"\"Attach a text label above each bar in *rects*, displaying its height.\"\"\"\n", - " for rect in rects:\n", - " height = rect.get_height()\n", - " ax.annotate('{}'.format(height),\n", - " xy=(rect.get_x() + rect.get_width() / 2, height),\n", - " xytext=(0, 3), # 3 points vertical offset\n", - " textcoords=\"offset points\",\n", - " ha='center', va='bottom')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "# Plot mean + std \n", - "# Juwels\n", - "dict_stat_juwels = iter_stat(base, wildcard_juwels, gpu_id_str=\"gpu\")\n", - "#print(dict_stat_juwels)\n", - "iter_mean_juwels = np.array([dict_stat_juwels[\"{0:d} GPU(s)\".format(key)][\"mean\"] for key in labels])\n", - "iter_std_juwels = np.array([dict_stat_juwels[\"{0:d} GPU(s)\".format(key)][\"std\"] for key in labels])\n", - "\n", - "dict_stat_booster = iter_stat(base, wildcard_booster, gpu_id_str=\"gpu\")\n", - "iter_mean_booster = np.array([dict_stat_booster[\"{0:d} GPU(s)\".format(key)][\"mean\"] for key in labels])\n", - "iter_std_booster = np.array([dict_stat_booster[\"{0:d} GPU(s)\".format(key)][\"std\"] for key in labels])" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(21225,)\n" - ] - } - ], - "source": [ - "iter_time_juwels = all_iter(base, wildcard_juwels)\n", - "iter_time_booster= all_iter(base, wildcard_booster)\n", - "\n", - "max_iter_juwels = np.shape(iter_time_booster[\"{0:d} GPU(s)\".format(labels[0])])[0]\n", - "max_iter_booster = np.shape(iter_time_booster[\"{0:d} GPU(s)\".format(labels[0])])[0]\n", - "\n", - "arr_iter_juwels = np.full((nexps, max_iter_juwels), np.nan)\n", - "arr_iter_booster= np.full((nexps, max_iter_booster), np.nan)\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "# box plot instead of errorbar plot\n", - "# Juwels\n", - "#data_juwels = list_time_per_iteration_all_runs(base, wildcard_juwels)\n", - "data_juwels = all_iter(base, wildcard_juwels, gpu_id_str=\"gpu\")\n", - "# Booster\n", - "#data_booster = list_time_per_iteration_all_runs(base, wildcard_booster)\n", - "data_booster = all_iter(base, wildcard_booster, gpu_id_str=\"gpu\")\n", - "def simple_boxplot(time_per_iteration_data, title):\n", - " # Multiple box plots on one Axes\n", - " fig, ax = plt.subplots()\n", - " ax.set_title(title)\n", - " ax.boxplot(time_per_iteration_data, showfliers=False) # Outliers for initialization are disturbing \n", - " plt.xticks([1, 2, 3, 4, 5 ,6], ['1', '4', '8', '16', '32', '64'])\n", - " #plt.savefig('boxplot_'+title)\n", - " #plt.close()" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "886\n", - "64.08639097213745\n", - "31.232596397399902\n", - "(1326,)\n", - "***********\n", - "2100\n", - "4.405388832092285\n", - "29.095214366912842\n", - "(2653,)\n", - "***********\n", - "36981\n", - "7.751298189163208\n", - "26.409477949142456\n", - "(42450,)\n", - "***********\n", - "3843\n", - "66.00082683563232\n", - "29.385547637939453\n", - "(21225,)\n" - ] - } - ], - "source": [ - "print(np.argmax(data_booster[\"64 GPU(s)\"]))\n", - "print(np.max(data_booster[\"64 GPU(s)\"]))\n", - "print(data_booster[\"64 GPU(s)\"][0])\n", - "print(np.shape(data_booster[\"64 GPU(s)\"]))\n", - "print(\"***********\")\n", - "\n", - "print(np.argmax(data_juwels[\"64 GPU(s)\"][1::]))\n", - "print(np.max(data_juwels[\"64 GPU(s)\"][1::]))\n", - "print(data_juwels[\"64 GPU(s)\"][0])\n", - "print(np.shape(data_juwels[\"64 GPU(s)\"]))\n", - "print(\"***********\")\n", - "\n", - "print(np.argmax(data_juwels[\"4 GPU(s)\"][1::]))\n", - "print(np.max(data_juwels[\"4 GPU(s)\"][1::]))\n", - "print(data_juwels[\"4 GPU(s)\"][0])\n", - "print(np.shape(data_juwels[\"4 GPU(s)\"]))\n", - " \n", - "print(\"***********\")\n", - "print(np.argmax(data_booster[\"4 GPU(s)\"][1::]))\n", - "print(np.max(data_booster[\"4 GPU(s)\"][1::]))\n", - "print(data_booster[\"4 GPU(s)\"][0])\n", - "print(np.shape(data_booster[\"4 GPU(s)\"]))\n", - "\n", - "#simple_boxplot(data_juwels, 'Juwels')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "simple_boxplot(data_booster, 'Booster')" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [], - "source": [ - "# Try more fancy box plot \n", - "def more_fancy_boxplot(time_per_iteration_data1, time_per_iteration_data2, ngpu_list, title):\n", - " nexps = len(ngpu_list)\n", - " # Shuffle data: EXPECT JUWELS FIRST FOR THE LEGEND! NOT GENERIC!\n", - " data = []\n", - " for i in np.arange(nexps):\n", - " data.append(time_per_iteration_data1[\"{0} GPU(s)\".format(ngpu_list[i])])\n", - " data.append(time_per_iteration_data2[\"{0} GPU(s)\".format(ngpu_list[i])])\n", - " \n", - " # trick to get list with duplicated entries\n", - " xlabels = [val for val in ngpu_list for _ in (0, 1)]\n", - "\n", - " # Multiple box plots on one Axes\n", - " #fig, ax = plt.subplots()\n", - " fig = plt.figure(figsize=(6,4))\n", - " ax = plt.axes([0.1, 0.15, 0.75, 0.75]) \n", - " \n", - " ax.set_title(title)\n", - " bp = ax.boxplot(data, notch=0, sym='+', vert=1, whis=1.5, showfliers=False) # Outliers for initialization are disturbing\n", - " plt.xticks(np.arange(1, nexps*2 +1), xlabels)\n", - " ax.set_xlabel('# GPUs')\n", - " ax.set_ylabel('Seconds')\n", - " \n", - " # Reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html \n", - " box_colors = ['darkkhaki', 'royalblue']\n", - " num_boxes = len(data)\n", - " medians = np.empty(num_boxes)\n", - " for i in range(num_boxes):\n", - " box = bp['boxes'][i]\n", - " boxX = []\n", - " boxY = []\n", - " for j in range(5):\n", - " boxX.append(box.get_xdata()[j])\n", - " boxY.append(box.get_ydata()[j])\n", - " box_coords = np.column_stack([boxX, boxY])\n", - " # Alternate between Dark Khaki and Royal Blue\n", - " ax.add_patch(Polygon(box_coords, facecolor=box_colors[i % 2]))\n", - " # Now draw the median lines back over what we just filled in\n", - " med = bp['medians'][i]\n", - " medianX = []\n", - " medianY = []\n", - " for j in range(2):\n", - " medianX.append(med.get_xdata()[j])\n", - " medianY.append(med.get_ydata()[j])\n", - " ax.plot(medianX, medianY, 'k')\n", - " medians[i] = medianY[0]\n", - " # Finally, overplot the sample averages, with horizontal alignment\n", - " # in the center of each box\n", - " ax.plot(np.average(med.get_xdata()), np.average(data[i]),\n", - " color='w', marker='*', markeredgecolor='k')\n", - " \n", - " # Finally, add a basic legend\n", - " fig.text(0.9, 0.15, 'Juwels',\n", - " backgroundcolor=box_colors[0], color='black', weight='roman',\n", - " size='small')\n", - " fig.text(0.9, 0.09, 'Booster',\n", - " backgroundcolor=box_colors[1],\n", - " color='white', weight='roman', size='small')\n", - " #fig.text(0.90, 0.015, '*', color='white', backgroundcolor='silver',\n", - " # weight='roman', size='medium')\n", - " fig.text(0.9, 0.03, '* Mean', color='white', backgroundcolor='silver',\n", - " weight='roman', size='small')\n", - "\n", - " \n", - " plt.savefig('fancy_boxplot_'+title.replace(' ', '_'))\n", - " plt.close()" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [], - "source": [ - "more_fancy_boxplot(data_juwels, data_booster, ngpus_sort, 'Time needed to iterate one step')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "flist_hpc1 = sorted(glob.glob(base + wildcard_juwels))\n", - "flist_hpc2 = sorted(glob.glob(base + wildcard_booster))\n", - "\n", - "\n", - " \n", - "\n", - "print(get_ngpus(flist_hpc1[2], \"gpu\"))\n", - "print(get_ngpus(flist_hpc1[0], \"gpu\"))\n", - "\n", - "print(get_ngpus(flist_hpc2[2], \"gpu\"))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/Jupyter_Notebooks/performance_check.ipynb b/Jupyter_Notebooks/performance_check.ipynb deleted file mode 100644 index 3caf9018e91049c7ef7ee826382871dc5168a27a..0000000000000000000000000000000000000000 --- a/Jupyter_Notebooks/performance_check.ipynb +++ /dev/null @@ -1,724 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [], - "source": [ - "## import all required modules\n", - "import os, glob\n", - "import numpy as np\n", - "import pickle\n", - "# for plotting\n", - "import matplotlib\n", - "matplotlib.use('Agg')\n", - "from matplotlib.transforms import Affine2D\n", - "from matplotlib.patches import Polygon\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "metadata": {}, - "outputs": [], - "source": [ - "## some auxiliary functions\n", - "#\n", - "#colors = ['darkkhaki', 'royalblue']\n", - "colors = [\"midnightblue\", \"darkorange\"]\n", - "\n", - "def val_order(number):\n", - " return int(np.floor(np.log10(number)))\n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "def get_ngpus(fname, search_str, max_order=3):\n", - " \"\"\"\n", - " Tries to get numbers in the vicinty of search_str which is supposed to be a substring in fname.\n", - " First seaches for numbers right before the occurence of search_str, then afterwards.\n", - " :param fname: file name from which number should be inferred\n", - " :param search_str: seach string for which number identification is considered to be possible\n", - " :param max_order: maximum order of retrieved number (default: 3 -> maximum number is 999 then)\n", - " :return num_int: integer of number in the vicintity of search string. \n", - " \"\"\"\n", - " \n", - " ind_gpu_info = fname.lower().find(search_str)\n", - " if ind_gpu_info == -1:\n", - " raise ValueError(\"Unable to find search string '{0}' in file name '{1}'\".format(search_str, fname))\n", - " \n", - " # init loops\n", - " fname_len = len(fname)\n", - " success, flag = False, True\n", - " indm = 1\n", - " ind_sm, ind_sp = 0, 0\n", - " # check occurence of numbers in front of search string\n", - " while indm < max_order and flag:\n", - " if ind_gpu_info - indm > 0:\n", - " if fname[ind_gpu_info - indm].isnumeric():\n", - " ind_sm += 1\n", - " success = True\n", - " else:\n", - " flag = False\n", - " else:\n", - " flag = False\n", - " indm += 1\n", - " # end while-loop\n", - " if not success: # check occurence of numbers after search string\n", - " ind_gpu_info = ind_gpu_info + len(search_str)\n", - " flag = True\n", - " indm = 0\n", - " while indm < max_order and flag: \n", - " if ind_gpu_info + indm < fname_len:\n", - " if fname[ind_gpu_info + indm].isnumeric():\n", - " ind_sp += 1\n", - " success = True\n", - " else:\n", - " flag = False\n", - " else:\n", - " flag = False\n", - " indm += 1\n", - " # end while-loop \n", - " if success:\n", - " return(int(fname[ind_gpu_info:ind_gpu_info+ind_sp]))\n", - " else:\n", - " raise ValueError(\"Search string found in fname, but unable to infer number of GPUs.\")\n", - "\n", - " else:\n", - " return(int(fname[ind_gpu_info-ind_sm:ind_gpu_info]))\n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "# functions for computing time\n", - "def compute_time_tot(infile):\n", - " with open(infile,'rb') as tfile:\n", - " #print(\"Opening pickle time: '{0}'\".format(infile))\n", - " total_time_sec = pickle.load(tfile)\n", - " return np.asarray(total_time_sec/60)\n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "def compute_time_tot_log(infile):\n", - " total_time_min = compute_time_tot(infile)\n", - " return np.log(total_time_min)\n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "def get_time_dict(base, wildcardspec, tfilename, gpu_id_str=\"gpu\", llog = False):\n", - " time_dict = {}\n", - " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", - " print(flist_hpc)\n", - " wrapper = compute_time_tot\n", - " if llog: wrapper = compute_time_tot_log\n", - " for tfile in flist_hpc: \n", - " ngpus = get_ngpus(tfile, gpu_id_str)\n", - " time_dict[\"{0:d} GPU(s)\".format(ngpus)] = wrapper(tfile + tfilename)\n", - " return time_dict\n", - "#\n", - "def calc_speedup(comp_time, ngpus, l_ideal= False):\n", - " nn = np.shape(ngpus)[0]\n", - " if l_ideal:\n", - " spd_data = np.array(ngpus, dtype=float)\n", - " else:\n", - " spd_data = comp_time\n", - "\n", - " spd_up = spd_data[0:nn-1]/spd_data[1::]\n", - " \n", - " if l_ideal: spd_up = 1./spd_up\n", - "\n", - " return spd_up\n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "# functions for iteration time data \n", - "def iter_time_mean_std(infile):\n", - " with open(infile, 'rb') as tfile:\n", - " time_per_iteration_list = pickle.load(tfile) \n", - " \n", - " time_per_iteration = np.array(time_per_iteration_list)\n", - " return np.mean(time_per_iteration), np.std(time_per_iteration)\n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "def iter_stat(base, wildcardspec, gpu_id_str=\"gpu\"):\n", - " stat_iter_dict = {}\n", - " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", - " for tdir in flist_hpc: \n", - " ngpus = get_ngpus(tdir, gpu_id_str)\n", - " ftname = os.path.join(tdir, fname_timing_iter)\n", - " mean_loc, std_loc = iter_time_mean_std(ftname)\n", - " stat_iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = {\"mean\": mean_loc , \"std\": std_loc}\n", - " return stat_iter_dict\n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "def read_iter_time(infile):\n", - " with open(infile,'rb') as tfile:\n", - " time_per_iteration_list = pickle.load(tfile)\n", - " return np.asarray(time_per_iteration_list)\n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "def get_iter_time_all(base, wildcardspec, gpu_id_str=\"gpu\"):\n", - " iter_dict = {}\n", - " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", - " for tdir in flist_hpc: \n", - " ngpus = get_ngpus(tdir, gpu_id_str)\n", - " ftname = os.path.join(tdir, fname_timing_iter)\n", - " iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = read_iter_time(ftname)\n", - " return iter_dict \n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "# functions for plotting\n", - "def autolabel(ax, rects, rot=45):\n", - " \"\"\"Attach a text label above each bar in *rects*, displaying its height.\"\"\"\n", - " scal = 1\n", - " if rot <0.:\n", - " scal = -1\n", - " for rect in rects:\n", - " height = rect.get_height()\n", - " ax.annotate('{}'.format(height),\n", - " xy=(rect.get_x() + rect.get_width()*scal, height),\n", - " xytext=(0, 3), # 3 points vertical offset\n", - " textcoords=\"offset points\",\n", - " ha='center', va='bottom', rotation=rot)\n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "def plot_computation_time(times1, times2, ngpus, names, plt_fname, log_yvals = False):\n", - " \n", - " nlabels = len(ngpus)\n", - " x_pos = np.arange(nlabels)\n", - " \n", - " bar_width = 0.35\n", - " ytitle = \"Time [min]\"\n", - " max_time = np.maximum(np.max(times1), np.max(times2))\n", - " time_order = val_order(max_time)\n", - " ymax = np.ceil(max_time/(10**time_order) + 0.5)*(10**time_order) + 10**time_order\n", - " # np.ceil(np.maximum(np.max(times1)/100. + 0.5, np.max(times2)/100. + 0.5))*100.\n", - " if log_yvals: \n", - " times1, times2 = np.log(times1), np.log(times2)\n", - " ytitle = \"LOG(Time) [min]\"\n", - " ymax = np.ceil(np.maximum(np.max(times1)+0.5, np.max(times2) + 0.5))\n", - " \n", - " # create plot object\n", - " fig, ax = plt.subplots()\n", - " # create data bars\n", - " rects1 = ax.bar(x_pos - bar_width/2, np.round(times1, 2), bar_width, label=names[0], color=colors[0])\n", - " rects2 = ax.bar(x_pos + bar_width/2, np.round(times2, 2), bar_width, label=names[1], color=colors[1])\n", - " # customize plot appearance\n", - " # Add some text for labels, title and custom x-axis tick labels, etc.\n", - " ax.set_ylabel(ytitle)\n", - " ax.set_title('Comparison {0} and {1} with convLSTM model'.format(*names))\n", - " ax.set_xticks(x_pos)\n", - " ax.set_xticklabels(ngpus)\n", - " ax.set_xlabel('# GPUs')\n", - " ax.set_ylim(0., ymax)\n", - " ax.legend()\n", - " \n", - " # add labels\n", - " autolabel(ax, rects1)\n", - " autolabel(ax, rects2)\n", - " print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n", - " plt.savefig(plt_fname+\".png\")\n", - " plt.close()\n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "def plot_speedup(comp_time_hpc1, comp_time_hpc2, ngpus, names):\n", - " fig = plt.figure(figsize=(6,4))\n", - " ax = plt.axes([0.1, 0.15, 0.75, 0.75]) \n", - " \n", - " spd_up1 = calc_speedup(comp_time_hpc1, ngpus)\n", - " spd_up2 = calc_speedup(comp_time_hpc2, ngpus)\n", - " spd_ideal= calc_speedup(comp_time_hpc2, ngpus, l_ideal=True)\n", - " \n", - " plt.plot(spd_up1/spd_ideal, label= names[0], c=colors[0], lw=1.5)\n", - " plt.plot(spd_up2/spd_ideal, label= names[1], c=colors[1], lw=1.5)\n", - " plt.plot(spd_ideal/spd_ideal, label= \"Ideal\", c=\"r\", lw=3.)\n", - " \n", - " xlabels = []\n", - " for i in np.arange(len(ngpus)-1):\n", - " xlabels.append(\"{0} -> {1}\".format(ngpus[i], ngpus[i+1]))\n", - " plt.xticks(np.arange(0, len(ngpus)-1), xlabels)\n", - " ax.set_xlim(-0.5, len(ngpus)-1.5)\n", - " ax.set_ylim(0.5, 1.5)\n", - " legend = ax.legend(loc='upper left')\n", - " ax.set_xlabel('GPU usage')\n", - " ax.set_ylabel('Ratio Speedup factor') \n", - " \n", - " plt_fname = \"speed_up_{0}_vs_{1}.png\".format(*names)\n", - " print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n", - " plt.savefig(\"speed_up_{0}_vs_{1}.png\".format(*names))\n", - "#\n", - "# ****************************************************************************************************\n", - "#\n", - "def boxplot_iter_time(time_per_iteration_data1, time_per_iteration_data2, ngpu_list, names):\n", - " nexps = len(ngpu_list)\n", - " # create data lists for boxplot-routine\n", - " data = []\n", - " for i in np.arange(nexps):\n", - " data.append(time_per_iteration_data1[\"{0} GPU(s)\".format(ngpu_list[i])])\n", - " data.append(time_per_iteration_data2[\"{0} GPU(s)\".format(ngpu_list[i])])\n", - " \n", - " # trick to get list with duplicated entries\n", - " xlabels = [val for val in ngpu_list for _ in (0, 1)]\n", - "\n", - " # Multiple box plots on one Axes\n", - " #fig, ax = plt.subplots()\n", - " fig = plt.figure(figsize=(6,4))\n", - " ax = plt.axes([0.1, 0.15, 0.75, 0.75]) \n", - " \n", - " ax.set_title(\"Time per iteration step\")\n", - " bp = ax.boxplot(data, notch=0, sym='+', vert=1, whis=1.5, showfliers=False) # Outliers for initialization are disturbing\n", - " plt.xticks(np.arange(1, nexps*2 +1), xlabels)\n", - " ax.set_xlabel('# GPUs')\n", - " ax.set_ylabel('Time [s]')\n", - " \n", - " # Reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html \n", - " box_colors = colors\n", - " num_boxes = len(data)\n", - " medians = np.empty(num_boxes)\n", - " for i in range(num_boxes):\n", - " box = bp['boxes'][i]\n", - " boxX = []\n", - " boxY = []\n", - " for j in range(5):\n", - " boxX.append(box.get_xdata()[j])\n", - " boxY.append(box.get_ydata()[j])\n", - " box_coords = np.column_stack([boxX, boxY])\n", - " # Alternate between Dark Khaki and Royal Blue\n", - " ax.add_patch(Polygon(box_coords, facecolor=box_colors[i % 2]))\n", - " # Now draw the median lines back over what we just filled in\n", - " med = bp['medians'][i]\n", - " medianX = []\n", - " medianY = []\n", - " for j in range(2):\n", - " medianX.append(med.get_xdata()[j])\n", - " medianY.append(med.get_ydata()[j])\n", - " ax.plot(medianX, medianY, 'k')\n", - " medians[i] = medianY[0]\n", - " # Finally, overplot the sample averages, with horizontal alignment\n", - " # in the center of each box\n", - " ax.plot(np.average(med.get_xdata()), np.average(data[i]),\n", - " color='w', marker='*', markeredgecolor='k', markersize=10)\n", - " \n", - " # Finally, add a basic legend\n", - " fig.text(0.86, 0.15, names[0],\n", - " backgroundcolor=box_colors[0], color='white', weight='roman',\n", - " size='small')\n", - " fig.text(0.86, 0.09, names[1],\n", - " backgroundcolor=box_colors[1],\n", - " color='white', weight='roman', size='small')\n", - " #fig.text(0.90, 0.015, '*', color='white', backgroundcolor='silver',\n", - " # weight='roman', size='medium')\n", - " #fig_transform = ax.figure.transFigure #+ ax.transAxes.inverted() #+ ax.figure.transFigure.inverted()\n", - " #ax.plot(0.1, 0.03, marker='*', markersize=30, color=\"w\", markeredgecolor=\"k\", transform=fig_transform)\n", - " fig.text(0.86, 0.03, '* Mean', color='black', backgroundcolor='white', \n", - " weight='roman', size='small', bbox=dict(facecolor='none', edgecolor='k'))\n", - "\n", - " plt_fname = \"boxplot_iter_time_{0}_vs_{1}\".format(*names)\n", - " print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n", - " plt.savefig(plt_fname+\".png\")\n", - " plt.close()\n", - " \n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "metadata": {}, - "outputs": [], - "source": [ - "## some basic settings\n", - "base_dir = \"/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/\"\n", - "\n", - "wildcard_hpc1 = '20210325T095504_langguth1_juwels_container_[1-9]*gpu*' # search pattern for finding the experiments\n", - "wildcard_hpc2 = '20210325T095504_langguth1_jwb_container_[1-9]*gpu*'\n", - "\n", - "gpu_id_str = [\"gpu\", \"gpu\"] # search substring to get the number of GPUs used in the experiments,\n", - " # e.g. \"gpu\" if '64gpu' is a substring in the experiment directory\n", - " # or \"ngpu\" if 'ngpu64' is a substring in the experiment directory\n", - " # -> see wilcard-variables above\n", - "names_hpc = [\"Juwels\", \"Booster\"]\n", - "\n", - "# name of pickle files tracking computing time\n", - "fname_timing_train = \"/timing_training_time.pkl\"\n", - "fname_timing_total = \"/timing_total_time.pkl\"\n", - "\n", - "fname_timing_iter = \"timing_per_iteration_time.pkl\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_8gpus']\n", - "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_8gpus']\n", - "{'16 GPU(s)': array(53.40843068), '1 GPU(s)': array(930.4968381), '32 GPU(s)': array(45.96871045), '4 GPU(s)': array(217.45655225), '64 GPU(s)': array(35.7369519), '8 GPU(s)': array(106.4218419)}\n", - "{'16 GPU(s)': array(34.26928383), '1 GPU(s)': array(492.70926997), '32 GPU(s)': array(35.05492661), '4 GPU(s)': array(100.99109779), '64 GPU(s)': array(30.98471271), '8 GPU(s)': array(49.63896298)}\n", - "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_8gpus']\n", - "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_8gpus']\n" - ] - } - ], - "source": [ - "## evaluate computing time\n", - "# dictionaries with the total times\n", - "tot_time_hpc1_dict = get_time_dict(base_dir, wildcard_hpc1, fname_timing_total, gpu_id_str=gpu_id_str[0])\n", - "tot_time_hpc2_dict= get_time_dict(base_dir, wildcard_hpc2, fname_timing_total, gpu_id_str=gpu_id_str[1])\n", - "\n", - "print(tot_time_hpc1_dict)\n", - "print(tot_time_hpc2_dict)\n", - "\n", - "# dictionaries with the training times\n", - "train_time_hpc1_dict = get_time_dict(base_dir, wildcard_hpc1, fname_timing_train, gpu_id_str=gpu_id_str[0])\n", - "train_time_hpc2_dict = get_time_dict(base_dir, wildcard_hpc2, fname_timing_train, gpu_id_str=gpu_id_str[1])\n", - "\n", - "# get sorted arrays\n", - "# Note: The times for Juwels are divided by 2, since the experiments have been performed with an epoch number of 20\n", - "# instead of 10 (as Bing and Scarlet did)\n", - "ngpus_sort = sorted([int(ngpu.split()[0]) for ngpu in tot_time_hpc1_dict.keys()])\n", - "nexps = len(ngpus_sort)\n", - "tot_time_hpc1 = np.array([tot_time_hpc1_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", - "tot_time_hpc1[0] = tot_time_hpc1[0]#*2.\n", - "tot_time_hpc2 = np.array([tot_time_hpc2_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", - "\n", - "train_time_hpc1 = np.array([train_time_hpc1_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", - "train_time_hpc1[0] = train_time_hpc1[0]#*2.\n", - "train_time_hpc2 = np.array([train_time_hpc2_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", - "\n", - "overhead_hpc1 = tot_time_hpc1 - train_time_hpc1\n", - "overhead_hpc2= tot_time_hpc2 - train_time_hpc2" - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[492.70926997 100.99109779 49.63896298 34.26928383 35.05492661\n", - " 30.98471271]\n", - "Saving plot in file: ./total_computation_time_Juwels_vs_Booster.png ...\n", - "Saving plot in file: ./overhead_time_Juwels_vs_Booster.png ...\n", - "Saving plot in file: speed_up_Juwels_vs_Booster.png.png ...\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEFCAYAAAAYKqc0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAA010lEQVR4nO3dd3hUZfbA8e9JCAQIzSQgEJqKSJFEWmyAgAIiihQRFYG1YENl7e4urmuvu66u5afrigoiiqKABZUiiIgQJNIFUSGClAhIqCnn98d7EyOkTMhMbjI5n+e5DzN37tx77iScefPe955XVBVjjDHhJcLvAIwxxgSfJXdjjAlDltyNMSYMWXI3xpgwZMndGGPCkCV3Y4wJQ1X8DqCk4uLitHnz5n6HYYwxvktJSdmhqvEFvVbhknvz5s1ZsmSJ32EYY4zvROSnwl6zbhljjAlDltyNMSYMWXI3xpgwVOH63AuSmZlJWloaBw4c8DuUcis6OpqEhASioqL8DsUYUwbCIrmnpaVRq1Ytmjdvjoj4HU65o6qkp6eTlpZGixYt/A7HGFMGwqJb5sCBA8TGxlpiL4SIEBsba3/ZGFOJhEVyByyxF8M+H2Mql7BJ7uVBTExMSPZ777338sQTT4Rk38aY8GTJ3RhjwpAl9yCbO3cu/fv3z3s+ZswYxo8fz+LFixk0aBAA77//PtWrV+fQoUMcOHCA4447DoDvv/+evn370rFjR7p27cqaNWuO2P/TTz9NmzZtaN++PcOGDSubkzLGVDhhMVomv3vumcrKlT8HdZ9t2zbmvvsGlmofp5xyCsuWLQNg/vz5tGvXjsWLF5OVlUVycjIAo0eP5oUXXqBly5YsWrSI66+/ntmzZ/9hP4888gg//PAD1apVY9euXaWKyRgTvsIuuZdXVapU4fjjj2f16tV8/fXX3HLLLcybN4/s7Gy6du1KRkYGX375JRdddFHeew4ePHjEftq3b89ll13GhRdeyIUXXliGZ2CMqUjCLrmXtoVdWlWqVCEnJyfvef7hh926deOjjz4iKiqKs88+m1GjRpGdnc3jjz9OTk4OdevWzWvdF+aDDz5g3rx5TJ8+nQcffJDly5dTpUrY/RiNMaVkfe5B1qxZM1atWsXBgwfZtWsXs2bNynuta9euPPXUU5x22mnEx8eTnp7O2rVradeuHbVr16ZFixa8/fbbgLvxKDU19Q/7zsnJYdOmTfTo0YNHH32U3bt3k5GRUabnZ4ypGKzJFyRZWVlUq1aNJk2aMHToUNq1a0eLFi045ZRT8rZJTk5m69atdOvWDXBdLL/88kveGPSJEydy3XXX8cADD5CZmcmwYcNITEzMe392djbDhw9n9+7dqCo33XQTdevWLdPzNMZUDKKqfsdQIp06ddLD67mvXr2a1q1b+xSRk5qaytVXX83XX3/taxxFKQ+fkzEmeEQkRVU7FfRayLplROR/IrJNRFYUs11nEckSkSGhiiXUXnjhBS655BIeeOABv0MxxhggtH3u44G+RW0gIpHAo8AnIYwj5K699lpWrVpF7969/Q7FGGOAECZ3VZ0H/FrMZjcC7wDbQhWHMcZURr6NlhGRxsBA4PkAth0tIktEZMn27dtDH5wxxlRwfg6FfAq4U1VzittQVV9U1U6q2ik+vsCJvo0xxuTj51DITsCb3jDAOKCfiGSp6ns+xmSMMWHBt5a7qrZQ1eaq2hyYAlxfkRN7ZGQkSUlJJCYm0qFDB7788sug7v+hhx4K6v6MMeEtlEMhJwELgVYikiYiV4rItSJybaiO6afq1auzbNkyUlNTefjhh7n77ruDuv+jSe7Z2dlBjcEYU3GEcrTMJaraUFWjVDVBVV9W1RdU9YUCth2lqlNCFUtZ++2336hXrx7gygjcfvvttGvXjpNPPpnJkycXuX7Lli1069aNpKQk2rVrx/z587nrrrvYv38/SUlJXHbZZQBMmDCBLl26kJSUxDXXXJOXyGNiYrj11ltJTExk4cKFPpy9MaY8CL/yA3PGwrZlwd1n/STo8VSRm+Qm3wMHDrBly5a8Ur3vvvtuXot+x44ddO7cmW7duvHll18WuP6NN96gT58+/PWvfyU7O5t9+/bRtWtX/vOf/+QVFVu9ejWTJ09mwYIFREVFcf311zNx4kRGjBjB3r17SU5O5sknnwzuZ2CMqVDCL7n7JLdbBmDhwoWMGDGCFStW8MUXX3DJJZcQGRlJgwYN6N69O4sXLy50fefOnbniiivIzMzkwgsvJCkp6YhjzZo1i5SUFDp37gy4L5b69esDru9/8ODBZXXaxphyKvySezEt7LJw2mmnsWPHDo5mTH63bt2YN28eH3zwAaNGjeKWW25hxIgRf9hGVRk5ciQPP/zwEe+Pjo4mMjLyqGM3xoQHK/kbAmvWrCE7O5vY2Fi6du3K5MmTyc7OZvv27cybN48uXboUuv6nn36iQYMGXH311Vx11VUsXboUgKioKDIzMwHo1asXU6ZMYds2d2Pvr7/+yk8//eTb+Rpjyp/wa7n7JLfPHVzL+tVXXyUyMpKBAweycOFCEhMTEREee+wxjj322ELXv/rqqzz++ONERUURExPDa6+9Brgp+Nq3b0+HDh2YOHEiDzzwAL179yYnJ4eoqCieffZZmjVr5uMnYIwpT6zkbyVin5Mx4cWXkr/GGGP8Y8ndGGPCkCV3Y4wJQ5bcjTEmDFlyN8aYMGTJ3RhjwpAl9yCJiYkpcP2oUaOYMuXoaqLde++9PPHEE6UJyxhTSVlyN8aYMGTJPchUlTFjxtCqVSvOPvvsvBIBACkpKXTv3p2OHTvSp08ftmzZAsBLL71E586dSUxMZPDgwezbt8+v8I0xYSL8krtI6JYATJ06lbVr17Jq1Spee+21vBmZMjMzufHGG5kyZQopKSlcccUV/PWvfwVg0KBBLF68mNTUVFq3bs3LL78cso/HGFM5WG2ZIJs3b15eKd9GjRrRs2dPANauXcuKFSs455xzADdLUsOGDQFYsWIFf/vb39i1axcZGRn06dPHt/iNMeHBknsZUVXatm1b4OxIo0aN4r333iMxMZHx48czd+7csg/QGBNWwq9bRjV0SwC6deuWV8p3y5YtzJkzB4BWrVqxffv2vOSemZnJypUrAdizZw8NGzYkMzOTiRMnhuZzMcZUKtZyD7KBAwcye/Zs2rRpQ9OmTTnttNMAqFq1KlOmTOGmm25i9+7dZGVlMXbsWNq2bcv9999PcnIy8fHxJCcns2fPHp/PwhhT0VnJ30rEPidjwouV/DXGmErGkrsxxoQhS+7GGBOGwia5V7RrB2XNPh9jKpewSO7R0dGkp6dbAiuEqpKenk50dLTfoRhjykhYDIVMSEggLS2N7du3+x1KuRUdHU1CQoLfYRhjykhYJPeoqChatGjhdxjGGFNuhEW3jDHGmD+y5G6MMWGoyOQuIpEiYsVOjDGmgikyuatqNtBMRKqWUTzGGGOCIJALqhuABSIyDdibu1JV/xmyqMorVVjyJJx8FUTX9TsaY4wpVCB97t8DM7xta+VbKp+tS2D+XfBqO/hxpt/RGGNMoYptuavqPwBEJMZ7nhHIjkXkf0B/YJuqtivg9cuAOwEB9gDXqWpq4KH74NjOcOlC+GgkvNMXTr4aznoSqlbO7zpjTPlVbMtdRNqJyDfASmCliKSISNsA9j0e6FvE6z8A3VX1ZOB+4MUA9um/YzvD5Uuh8x2w4mV49WTYONvvqIwx5g8C6ZZ5EbhFVZupajPgVuCl4t6kqvOAX4t4/UtV3ek9/QqoOLdPVomGbo/CsC8gsiq83QtmjYHMvcW/1xhjykAgyb2mqs7JfaKqc4GaQY7jSuCjIO8z9BqdBpcvgw5jYdlz8FoipH3hd1TGGBNQct8gIuNEpLm3/A03giYoRKQHLrnfWcQ2o0VkiYgsKXf1Y6JqQI9/wcVzQXNgcjeYeytk7vc7MmNMJRZIcr8CiAfeBd4B4oA/BePgItIe+C8wQFXTC9tOVV9U1U6q2ik+Pj4Yhw6+hG4w4ltIvBZS/gmvnwJbFvkdlTGmkgokuZ+tqjepagdV7aiqY4FzSntgEWmK+8K4XFW/K+3+yoWqMXD2czDkU8jaD5NOh/l3Q9ZBvyMzxlQygST3uwNc9wciMglYCLQSkTQRuVJErhWRa71N7gFigedEZJmILCl0ZxVNs7Nh5HJo+yf4+hGY0BG2pvgdlTGmEpHCJrgQkXOBfsBQYHK+l2oDbVS1S+jDO1KnTp10yZIK9D3ww0fwyVWwdysk/xVO/asbYWOMMaUkIimq2qmg14pquW8GlgAHgJR8yzSgT7CDDFstzoWRK6D1pfDVfTAxGbZ/63dUxpgwV2jLPW8DkdrAXq+IGCISCVRT1X1lEN8RKlzLPb/178Ono+HATjjt79DlTogIi/lSjDE+ONqWe65PgOr5nlcHPgtGYJXOCQNg5EpoOQgW/M1dcE1f7XdUxpgwFEhyj85fT8Z7XCN0IYW5GnHQ/03o/xbs2uCGTC5+HHKy/Y7MGBNGAknue0WkQ+4TEekI2B06pdXqIhi10vXJz7vD3fy0c53fURljwkQgyX0s8LaIzBeRL3AjZ8aENKrKomYDuOBd6DcB0le58gVL/+3udDXGmFIIpOTvYhE5CWjlrVqrqpmhDasSEYHWl0GTHvDJ1TBnLKybCn3+B3WP8zs6Y0wFFegE2a2ANkAH4BIRGRG6kCqpmEYwcIZL6tu+gdfaQ+oLbvYnY4wpoUDquf8deMZbegCPAReEOK7KSQTa/cnd3drodPjsOpjSG37b6HdkxpgKJpCW+xCgF/CLqv4JSATqhDSqyq52Uxg8E85+AbYsdBOCLP+fteKNMQELJLnvV9UcIMu7oWkb0CS0YRlEIPEa14qvfwp8ciVM7Q8Zm/2OzBhTAQSS3JeISF3c7EspwFJcQTBTFuq0gKGzoce/YdMcGN8WVk2wVrwxpkiFJncROcN7+GdV3aWqL+BK/Y70umdMWZEI6HATjEiF2Dbw0eUwbZArRmaMMQUoquX+tPdvXitdVX9UVat65Zd6LeHiedDtcVdtcnxbWPuW31EZY8qhosa5Z4rIi0CCiDx9+IuqelPowjKFioiEzrfBcf3g41Ew42L47h3o9awrbWCMMRTdcu8PzMaVGkgpYDF+im0Dl3wJZz4I66fCq21h3Xt+R2WMKScKbbmr6g7gTRFZraqpZRiTCVREFUj+CxzXHz4aCdMGQuvh0PNpiK7nd3TGGB8VO1rGEnsFEN8eLvva1Yhf+ya82g42fOh3VMYYHwVafsCUd5FRcPq9cOkiqFYPpp4HM6+Eg7v9jswY4wNL7uGmQQcYngJd7oaV493drT/Z3CrGVDaB1JaJFZFnRGSpiKSIyL9FJLYsgjNHqUo16PqQu+BapQZMOcfVqTmUUfx7jTFhIZCW+5u4kgODcXVmtuNqulc6OTk5ZGQc8DuMwDVMhsu/gY63QOr/uUqTmz73OypjTBkIZILsFara7rB1y1X15JBGVgg/J8jetOlXkpPvp1GjurRs2eCIJTY2xpe4ApL2BcwcBbu+hw43w5kPQZTNlmhMRVbUBNnFTtYBfCIiw4DcWyGHADODFVxFEh0dxV139WPduq2sW7eVN974in37DuW9Xq9eTVq2rH9E0m/UqC4RET5f3kg405UvmHeXm+3phw+hz3hofLq/cRljQiKQlvseoCaQO/dbBLDXe6yqWjt04R3Jz5b74XJycti8eTfr12/NS/i5y6+/7s3brnr1qpxwwpFJv3nzOKKiIss+8I2zYeYVsGcTdLwVzrgPqkSXfRzGmFIpquVebHIvb8pTci9KenrGEQl/3bqtbN68K2+bKlUiaN487oikf8IJ9alRo1poAzy0Bz6/Db59EY5pDee+Csd2Du0xjTFBVarkLiLdClqvqvOCEFuJVZTkXpi9ew/ma+lvy0v6P/64g+zs3yfGbty4npfs6+dL+iHo1/9xphsPv/cX6HIXnDrOjbYxxpR7pU3u0/M9jQa6ACmq2jN4IQauoif3whw6lMWPP+44oqW/fv02Dhz4fT7yY46pScuWDTjxxN8Tfm6/vogc3cEP7IK5f3bj4uNOhnNfg/pJwTgtY0wIBbVbRkSaAE+p6uBgBFdS4ZrcC5OTk8PPP+8qIOlvZefOfXnb1axZ7Q/9+rlJv3nzWKpUCbBf//vp8Olo2L/DteC73O3ufDXGlEvBTu4CrFTVNsEIrqQqW3IvjKoW0q+/jS1bduVtFxUVSYsWcUck/eOPr0+NGlWP3PH+dJh9E6x5Axp0hL7jIa7dkdsZY3xX2m6ZZ4DcjSKAJOBHVR0ezCADZcm9eBkZB1i/ftsRif+nn9Lz+vVFhISEeoclfdfyr1evpqsR/9l1cGg3nPYPV0M+IpCRs8aYslLa5D4y39MsXGJfEMT4SsSS+9E7eDCLH3/cntfC/+67X1i3bisbNmz/Q79+XFwMLVs2IOnEaIYfO4EWWZ9zKLYTUee/jsSe5OMZGGPys6GQpkjZ2Tmkpf16xAie9eu3snv3Pga0+o4He82hRlQWr2/ox/Kqg4mLr0tcXAyxsTHExcXkPY6NjaF69QK6e4wxQXdUyV1ElvN7d8wRVLV9cMIrGUvuZUdV2b59D+vWbSVt7Urab32Q1lWXsHRbM0ZP78/mXQVfbK1ZsxqxsTWJi6uVl/AP/wJwr9UkNjaGqlWtu8eYo3G0yb2Z9/AG79/XvX+H4+5MvauYg/4PN1XftsNr03ivC/BvoB+wDxilqkuLORdL7n5ShVWvwWfXonVOYN95H7BjXw3S0/eyY8ceduzIID3dLbmP86/LzMwucLe1a0cX+EVwzDG5j2vlfVnUq1cj8NE/xoS50va5f6Oqpxy2bqmqdijmfd2ADOC1QpJ7P+BGXHJPBv6tqslFBoMl93Jh42yYej7UbgoXzYKYRsW+RVX57bcD7Nixp8gvgPyPc3KO/N0UEerWrfGHLqGC/jrI/bKoW7e6/3V9jAmR0hYOExE5I/ciqoicTmDT880TkeZFbDIAl/gV+EpE6opIQ1XdEkBMxk9Ne8Lgj+HdfjC5O1w0G2o3KfItIkKdOtWpU6c6xx9fv9hD5OTksHPnPn799Y9/FRz+RbBmzRbS0/eyc+feAvcTGRnBMcfUPOJL4MgvBvdlULt29NHfDGZMORJIcr8S+J+I1PGe7wKuCMKxGwOb8j1P89aV7+Ru//EPsx6uahr0vUYAsd7SsrQ7Syt1OMYUrRwOTCk2uatqCpCYm9xVtcwn5RSR0cBogKZNg59IjDEm3AQyzV4DEXkZeFNVd4tIGxG5MgjH/hnI/7d8grfuCKr6oqp2UtVO8fHxQTi0McaEt0CuNI3HTc6Re9XsO2BsEI49DRghzqnA7grR365qS0HLtlR4Ng6ebwg7VvkfT4iXd99ZQqOGY3ni8Y98j8WWcrCUQ4Ek9zhVfQtvsg5VzQIKHtOWj4hMAhYCrUQkTUSuFJFrReRab5MPgQ3AeuAl4PqjOQFTTsS3h6FzQXPgrbNgxwq/IwqpQYM6MmRIJ5566hMWLdrgdzjGHCGQoZBzcZNjf6qqHbxW9qOq2r0M4juCDYUs535dC2/3hKyDcNFnYV06OCPjAL17P0lmZjaffnobdevanLSmbBU1FDKQlvstuC6U40VkAfAabny6MUc6phUM/dxNvv12T9ia4ndEIRMTE82zzw5n69bd3HnnW1S0Uh4mvAUyXn0p0B04HbgGaKuq34Y6MFOB1TsBLv4cqtaBt3vBlkV+RxQyp5zSjNtvP5fp01OZNCl8z9NUPIGMlqkB3AWMVdUVQHMR6R/yyEzFVqeFS/DV42DKOZD2hd8RhcwNN/TkzDNbMm7cVNat2+p3OMYAgXXLvAIcAk7znv8MPBCyiEz4qN3UddHUbAjv9oVNc/2OKCQiIiJ4+unLiI6O4vrrX+fgwSy/QzImoOR+vKo+BmQCqOo+wG7TNIGp1di14Gs3c+UKfvrM74hC4thj6/Cvfw1j5cqfefjhGX6HY0xAyf2QiFQHV/5XRI4HDoY0KhNeah4LQ+dA3RNgan/44WO/IwqJ3r3bMWrUmbz44ufMmbPa73BMJRdIcv878DHQREQmArOAO0IalQk/Neq7BB/bBt4f4CbjDkPjxp3PSSc15Oab32D79j1+h2MqsUBGy3wKDAJGAZOATqo6N7RhmbBUPdaVCI5PhGmDYN27fkcUdNWrV+X55y8nI+MgY8e+QU5Ojt8hmUoq0ELX3YFeQA+ga+jCMWEvuh4M+RQadIbpQ2HNZL8jCrpWrRry978PYM6cNfz3v/P8DsdUUoEMhXwOuBZYDqwArhGRZ0MdmAlj1erAkJnQ6DT48FJYNcHviIJuxIjT6dOnHQ8+OIPly63msCl7gbTcewJ9VPUVVX0FN3NSz9CGZcJe1Vpuwo+E7vDRCFgx3u+IgkpEeOKJi4mNjeH6619n3z4bg2DKViDJfT2Qv4h6E2+dMaUTVRMGzoBm58DMP8G3L/odUVDFxsbwzDOXsWHDdsaNm+p3OKaSCSS51wJWi8hcr4jYKqC2iEwTkWkhjc6Ev6gacOH70KIffHoNfBNePX5nnNGSMWN6MWnSIqZPX+Z3OKYSCWSavXtCHoWp3KpEwwXvwoyLYfYYyDkEHf/sd1RBc9ttfVmwYB233z6ZU05pSkLCMX6HZCqBQIZCfq6qn+Muph4DZOSu89YbU3pVqsH5b8OJQ2DuLfD1o35HFDRRUZE8++xwcnKUG26YQFZWsdMhGFNqhSZ3EZkhIu28xw1xyf0K4HURGVs24ZlKJTIKzpsEJ10C8++Chff7HVHQNGsWxyOPXMTixT/w1FOf+h2OqQSKarm38KpAAvwJN1nH+UAyLskbE3wRVeDc16HN5fDlPbBgXLmdxqykbPYmU5aKSu6Z+R73wk2Lh6ruwZtyz5iQiIiEPq9Auyvhqwdg/t1hk+AfemgwTZocw5gxE9i1a5/f4ZgwVlRy3yQiN4rIQKADrr4MXhGxqLIIzlRiEZHQ+0VIvBYWPwqf3xoWCT4mJprnnrvcZm8yIVdUcr8SaIurKXOxqu7y1p+Kq/FuTGhJBPR6Dk65CVL+BbNvdBNwV3A2e5MpC4UOhVTVbbiyA4evnwPMCWVQxuQRgR5PQWRVWPIE5GTC2c+7xF+B3XBDT+bP/45x46bSuXMLWrZs4HdIJsxU7P8hpnIQgW6PQfJf3F2sM6+EnIo9nNBmbzKhZsndVAwicMYDcNq9sHI8fDwScip2QrTZm0woWXI3FYcInP53OPNBWD0RPrgMsjOLf185ZrM3mVAJpORvgohMFZHtIrJNRN4RkYSyCM6YAiX/Bbo9Dt+95UoWZB/yO6JSsdmbTCgE0nJ/BZgGNAQaAdOx0TLGb51vgx7/hvVTYdpgyKq4JXXzz9508802e5MJjkCSe7xXyz3LW8YD8SGOy5jidbjJDZXcMAPevxAy9/sd0VHLnb1p7lybvckERyDJPV1EhotIpLcMB9JDHZgxAUm6Dnr/F36cCe+dD5kV965Pm73JBFMgyf0KYCjwC7AFGIKrNWNM+XDyldB3PGyaA+/2g0MZfkd0VGz2JhNMgZT8/UlVL1DVeFWtr6oXqurGsgjOmIC1HQHnToCfv4B3+sLB3/yO6KjY7E0mWAq9Q1VE7lDVx0TkGeCIAhiqelNIIzOmpFpf4soGf3AJvNMbBn0M0XX9jqrEcmdveuaZzzjrrJM4//wkv0MyFVBRMzHlDrpdUhaBGBMUJw6BiCiYfhFMORsGfwLVK97MRzZ7kymtQrtlVHW693Cfqr6afwEq7lUrE/5OGAAD3oMdK+DtnrBvu98RlZjN3mRKK5ALqncHuM6Y8uO4fnDhNNi5Ft7qAXu3+h1RidnsTaY0ippm71yvv72xiDydbxkPBFTUQ0T6ishaEVkvIncV8HpTEZkjIt+IyLci0u+oz8SYwzXvDQM/gN0/wFtnQcZmvyMqsT/O3vS93+GYCqSolvtmXH/7ASAl3zIN6FPcjkUkEngWOBdoA1wiIm0O2+xvwFuqegowDHiupCdgTJGa9oTBH8OeNJjcHX7b5HdEJfbgg272phtusNmbTOCK6nNP9frXTzisz/1dVd0ZwL67AOtVdYOqHgLeBAYcfhigtve4Du4LxZjgSugKg2fCvm3wVnfY/aPfEZVIrVpu9qZt236z2ZtMwALpc28uIlNEZJWIbMhdAnhfYyB/MynNW5ffvcBwEUnDzdF6YyBBG1NijU+HIZ/CgZ2uBb+rYk1QbbM3mZIKtHDY87h+9h7Aa8CEIB3/EmC8qiYA/YDXRY6cYkdERovIEhFZsn17xRv5YMqJhl3golmQmQGTu8HOdX5HVCI33NCTM89sybhxU1m3ruJdIDZlK5DkXl1VZwHi3a16L3BeAO/7GWiS73mCty6/K4G3AFR1IRANxB2+I1V9UVU7qWqn+HirWWZKoUEHGDrHlQme3B3SK04NdZu9yZREIMn9oNeaXiciY0RkIBATwPsWAy1FpIWIVMVdMJ122DYbgV4AItIal9ytaW5CK749XDzXTbb91lluPHwFYbM3mUAFktxvBmoANwEdgcuBEcW9SVWzgDHATNzdrm+p6koRuU9ELvA2uxW4WkRSgUnAKLWrRaYsxLaBiz+HiCow+SzYtszviAJmszeZQEhJc6k3xHGYqk4MTUhF69Spky5ZYhURTJDsXO/uYs3McKUKju3kd0QB2b//EOed9xQ7duxh1qw7iI+v5XdIxgcikqKqBf7SFnUTU20RuVtE/iMivcUZA6zHlQA2puKrd4JrwVetA2/3gs1f+R1RQKpXr8pzz9nsTaZwRXXLvA60ApYDVwFzgIuAgap6+Hh1YyquOi1cgq8R76pJpn3hd0QBOemkhtxzzwU2e5MpUFHJ/ThVHaWq/4cbstgG6KOqy8okMmPKUu2mMPRzqNkQ3u0Lm+b6HVFARo48w2ZvMgUqKrln5j5Q1WwgTVUPhD4kY3xSq7Frwddu5mZ0+ukzvyMqls3eZApTVHJPFJHfvGUP0D73sYhUzGlujClOzWPdOPi6J8DU/vDDx35HVCybvckUpKjaMpGqWttbaqlqlXyPaxf2PmMqvBr1XYKPbQPvD4Dvpxf/Hp/lzt40adIipk9f5nc4phwIZJy7MZVP9VhXqiA+EaYNgnXv+h1RsW67rS8dOjTj9tsnk5b2q9/hGJ9ZcjemMNH1XLGxBp1h+lBYM9nviIpkszeZ/Cy5G1OUanVgyExodDp8eCmsClbNvNCw2ZtMLkvuxhSnai0Y/BEkdIePRsCKV/yOqEg2e5MBS+7GBCaqJgycAc3OgZlXQOoLfkdUJJu9yVhyNyZQUTXgwvfhuPPgs+tg1o2udHA5ZLM3GUvuxpRElWi4YCp0vAWW/Qfe6lFuJ9622ZsqN0vuxpRUZBSc9SSc9yZsT4UJHSFtvt9RFchmb6q8LLkbc7ROuhguXeQuuL7dE5b+G8pZ94fN3lR5WXI3pjTi2sJli6HFeTBnLHw4HDL3+h3VH9jsTZWTJXdjSqtaHRjwLpz5IKyZBG+cWu4m37bZmyofS+7GBINEQPJfYPDH7gLrxM7lribNuHHnc9JJDbn55jfYvn2P3+GYELPkbkwwNe8Nw1OgzvHw3gWwYBzklI8yADZ7U+Viyd2YYKvTHIZ9AW3/BF894EoH7y8fhbzyz9700ks2e1M4s+RuTChEVYc+L8PZL8DGWW645NZv/I4K+H32pocestmbwpkld2NCRQQSr4Fh8yEnC948HVa+6ndUNntTJWHJ3ZhQa5gMl6dAw9Pg41Hw2fW+ly2w2ZvCnyV3Y8pCjfow5BPodDukPg+Tu8Oen30NyWZvCm+W3I0pKxFVoPtjcP7bsGMFTOgAmz73NSSbvSl8WXI3pqydOAQu+xqq1YO3e8GSf/pWtsBmbwpfltyN8UNsa5fgTxgAn98KM4bBoQxfQrHZm8KTJXdj/FKtNpw/Bbo+AuumwBvJ8OtaX0IZNKgjgwd3tNmbSqg818mX8hxcQTp16qRLlizxOwxjguunWfDBMMg+CH1fg5YXlnkIe/YcoHfvJ8jMzOazz26nbt0aZR5DeXbwYBarV29m2bKNLFu2idTUjQwc2IGbbjrHt5hEJEVVOxX0WpWyDsYYU4BmvVzZgmmDYdpA6HI3nHE/RESWWQi5szcNGPA0d9zxFv/3fyMRkTI7fnmSnZ3D+vVbWbZsk5fMN7J69WYOHXLXJGJjY0hKakLTprE+R1o4S+7GlBe1m7obnmbfCF8/DFuXQL83oEZcmYWQO3vTww9/wKRJi7j00lPL7Nh+UVU2bkzPS+SpqZtYvjyNvXvdzV0xMdVITGzCVVd1JympCUlJTWncuF65/+KzbhljyqNv/wuzb4CaDeGCd6BBxzI7dE5ODsOGvUBKyk98/PEttGzZoMyOXRa2bfstr2slN5nv3Olq8FerVoW2bRuTmOiSeFJSU44/Pp6IiPJ5ebKobhlL7saUV78shmlDYN9W6PUcnHxF2R36l9306vU4jRrVZcaMsVSrVjH/yN+9ez+pqS6B5yb0LVt2ARARIZx0UsO8RJ6Y2ISTTmpI1aoV51wtuRtTUe3bAR9cAhs/g/ajocfTUKVamRz6k09WMGrUy4we3Z17772wTI5ZGvv2HWLlyp9JTd3IN9+4hL5hw/a811u0iCMxsSlJSU1ITGzKySc3pkaNsvksQ8UuqBpTUdWIcxOALPgbfP0IbFvmhk/WbhLyQ+efval791b06NE65McMVGZmNmvWbCE19ffulbVrfyE729Wob9iwDomJTRk6tDOJia5VXtlG/4S05S4ifYF/A5HAf1X1kQK2GQrcCyiQqqqXFrVPa7mbSmvdVPh4JERGQ/83oWnPkB9y//5DnHfeU+zYsYdZs+4gPr5WyI95uJycHDZs2JGvRb6RlSs3c+BAJgB169b4Q9dKUlJTjj22TpnH6QdfumVEJBL4DjgHSAMWA5eo6qp827QE3gJ6qupOEamvqtuK2q8ld1Oppa+BaYNg51p381On21xp4RBas2YL/fr9i1NPPZ4JE64O6cVFVeXnn3d5LXLXtZKauok9ew4Abjap9u0T/pDImzWLLfcjV0LFr26ZLsB6Vd3gBfEmMABYlW+bq4FnVXUnQHGJ3ZhKL/YkuGwRzLwS5t0BWxZB31egauha1LmzN/3lL+/w0kvzuOaas4K27/T0jHwXO92yY4crwxAVFUnr1g0ZOLBDXl95y5YNqFKl7Mb+V2ShTO6NgU35nqcByYdtcyKAiCzAdd3cq6ofH74jERkNjAZo2rRpSII1psKoWgv6T4aUZJh3J0xcCRdMdYk/REaOPIPPP1/LQw/N4PTTT+DkkxNKvI+MjAN8+22a1yJ3feWbNrlKlCJCy5b16dmzdV4ib926EdHRUcE+lUojlN0yQ4C+qnqV9/xyIFlVx+TbZgaQCQwFEoB5wMmququw/Vq3jDH5bJwDMy6GrP3QdzycODhkh0pPz+Ccc56gZs1qzJx5S5EjTQ4ezGLVqp/zbtP/5puNrF+/La8WS5Mmx+SNWklKakr79gnExESHLPZw5Ve3zM9A/kv6Cd66/NKARaqaCfwgIt8BLXH988aY4jTtAZcvdWULpg+BznfAmQ+62vFBljt709ChzzNu3FSefHIY4G7V/+67X/ISubtVfwuZme5W/fj4WiQmNmHAgFPy+spjY2OCHp/5o1Am98VASxFpgUvqw4DDR8K8B1wCvCIicbhumg0hjMmY8FMrAS6eB3NuhsWPubIF570JNeKDfqjc2ZueeeYzMjOz2bTpV779No39+920gbVqRZOY2ITRo7vn3eHZqFHdSnvB00+hHgrZD3gK15/+P1V9UETuA5ao6jRxP/Engb5ANvCgqr5Z1D6tW8aYIqx4BT67zk3rd8E7cGznoB8iMzObIUOeZfnyNNq2bZxXbyUxsSnHHRdXbm/VD0d2h6oxlcnWpW645N4t0PM/0P7qoB8iOzsHVbWRKz4rKrnbV6wx4aZBB1c+OOEs+HQ0zLwKsg4E9RCRkRGW2PduhYzNfkdRKEvuxoSj6rEw6ENI/iuseBne7Aq//eR3VBXb/nRY9y7MGgPj28ILx0LKv/yOqlBWW8aYcBURCWc+4PrdPxoBr3d0ZQuane13ZBXDwd/g5/mwcbZbtqcCClVqQEJXaDMSjuvnd5SFsuRuTLg7YQBcttj1w7/TB854ELrcGfKyBRVO5j74eQFsmgObZsMvS0CzIbIaNDodzrgPmvRwX5aRVf2OtliW3I2pDI45ES79Cj65Cr64G3752t30VK2235H5J+sg/LLo95b5lq8gJ9PdI3BsF0i+G5r0hEanQZWKd4OVJXdjKouqMXDeJGh4Knx+G0zsDAOmQmwbvyMrGzlZrjW+aba7s3fzAndnr0RA/Q7Q8c+uZd74TPdZVXCW3I2pTESg41g3omb6UJjYBfq8Aq0u8juy4MvJdv3km+a4lvnP8+HQHvdafHs3+UmTnpDQDaLr+hpqKFhyN6YySujmhktOvwhmDIUtt0K3R0JStqDMqEL6KpfIN82BtLlwYKd7rV4raD3ctcybnBWSu3fLmwr8kzTGlEqtxnDxXJh7C6Q8CdtSXLXJGvX9jiwwqrBr/e8t801zYJ9XNbxOCzhhoJvQpEkPiGnkb6w+sORuTGUWWRV6/cddQPzsGni9g5vGr9GpfkdWsN82/p7IN86GjDS3PqYRNOvtEnnTHi65V3KW3I0x0HaE64eeNggmd4OeT0P7a/wfLrn3F3fxc5M3omW3V1ewepyXyHu6fvN6Lf2PtZyx5G6MceonwWVL4KPhrvjYlq+g1/MQVb3sYtifDpvm/t4y/3W1W1+tjiun0OFml9Tj2rpRLqZQltyNMb+rfgwMnAFf/gO+ug+2f+uqS4aqm+Pgbkib//vwxNy7QKNqQuOu0O5PrnUen+TuuDUBs+RujPkjiYAz/uGVLRgOEzrBeW9A8z6l33fm3t/vAt0429We1xx3F2jjM+CM+/PdBWpT7JWGJXdjTMGO7++6aaYNgnfOdYk3+e6SdYdkHXTdOxtnu9b5lkW/3wXa8FRX2KxpT/e4At4FWp5ZcjfGFK7eCXDpQvhkNCz4m1e24NXCb/rJznSt8dwRLZsXuHLDEgENOkLHW9xolkZnhMVdoOVZhZusQ0S2A37XLo0Ddvgcg58q+/mDfQZgnwH4/xk0U9UC78iqcMm9PBCRJYXNflIZVPbzB/sMwD4DKN+fgY0lMsaYMGTJ3RhjwpAl96Pzot8B+Kyynz/YZwD2GUA5/gysz90YY8KQtdyNMSYMVdrkLiL/E5FtIrIixMcZLCIqIuXyirqIRIrINyIyIwT7ThKRr0RkmYgsEZEuwT7G0RCRP4vIShFZISKTRCSod8+IyEXe/nMO/7mLSHsRWei9vjzYxy4ipkJ/30XkRhFZ48X0WJCP20xElnq/AytF5FpvfQ0R+SDfcR8J5nELiCNaRL4WkVTveP/I99pEEVnr/T78T0SCfmtscT93EZkW9FykqpVyAboBHYAVJXhPvRIeoxYwD/gK6OT3ORcS4y3AG8CMYH8GwCfAud7jfsDccnC+jYEfgOre87eAUUE+79ZAK2Bu/p877qbBb4FE73ksEFlG513g7zvQA/gMqOY9rx/kz6Jqvn3HAD8CjYAaQI9828zP/V0J0fkLEOM9jgIWAafm+90Ub5kEXBfkz6DInzswyPs/GHAuCmSptC13VZ0H/FrCt73nfcNeICKB3N17P/AocKDEAZYBEUkAzgP+W4K3LfFaOj1Fiq2xqkDuDMx1gM1HEWYoVAGqez/DGgQWV8DnraqrVXVtAS/1Br5V1VRvu3RVzS5p8EejiN/364BHVPWgt922AHZ3sdfKvVVEipzSSFUP5e4bqIbXW6Cq+1R1Tu42wFIgIbCzKTl1MrynUd6i3msfeq8r8HWAcdzu/SVwjYgUN8t4oT93EYnBNbAeKPlZFa3SJvejdBbwT2AIsFpEHhKREwraUEQ6AE1U9YMyjK+kngLuAHJK8J4Tca2bMcAqEfmLiBQ2zc1Y4HER2QQ8Adx99KEGh6r+7MWyEdgC7FbVTwJ4a0nOu6h9qIjM9Loq7ijh+0PhRKCriCwSkc9FpHNxb1DVF4BzcV+M80Rkioj0FSm46IyINBGRb4FNwKOquvmw1+sC5wOzSnkuRfK6IJcB24BPVXXRYa9HAZcDHxe3L1X9i7ftccBSEXlFRM4sZPOifu73A08C+0p8QgEEWWkXoDlH+acQrkX6IJAFDD7stQjcn+TNvedzKWfdMkB/4Dnv8VkE2C1z2D7ica3+LKBLAa8/nfvZAEOBz8rBedcDZnuxRwHvAcODed75tvvDzx24DdclFIdLjAuBXmV47kf8vgMrgGdwXRJdvPikBPsUXLfGZmBaMds2wrWMG+RbVwX4CBhbhp9DXWAO0O6w9S8BTx3F/iKB4cBu4OkCXi/w5w4k5X5mpclFhS3Wci9E7re8t9yXb311EbkUeBfoA9wMfHrY22sB7YC5IvIjcCowrZxdVD0DuMCL702gp4hMyL+B1+LK/Qyuzbe+johcA0wDWgJX4PoUDzcS9zkBvI1LHn47G/hBVberaiYuvtPzbxCE8y5MGjBPVXeo6j7gQ1w/uJ/SgHfV+Rr3V1xc/g1E5MHcz+Ow9V2A53Bf4m9RzF9m6lrsK4Cu+Va/CKxT1adKeR4BU9VduOTeN3ediPwd96V9S0Hv8Vrmy0Tkw3zrRER6Aq8C9+A+hycLeHthP/fTgE7e/8EvgBNFZG6pTzBXWX1blseFEn5bAo/hvoH/A5xSgvfNpZy13A+L7ywCv6A6AfgeeARoWcy2q4GzvMe9gJRycK7JwEpcC0pw/zFvDOZ5F/Zzx/3VsNQ7dhXchczzyvDcj/h9B64F7vMen4jrOimy5Y7Xh4y7YD4UqFrEtgn8fvG6HvAdcLL3/AHgHSCiDM49HqjrPa6Ou4Db33t+FfBlbpwB7u8yYK0Xfz+KuDAeyM+9pLkooBjL6hervC24/tMtQCbum/XKAN7TD4g+imOFU3K/AKgS4LZnAilAKm50Qke/z9WL6x/AGlwr8nW80RxBPO+B3u/UQWArMDPfa8O9L5cVwGNleM4F/r7jRqpM8OJZCvQMYF8dcdUIAznuOd4XQar372hvfQLuguZqYJm3XBXC828PfOPFsAK4J99rWbgv7tw47glgf2cC8SU4fpE/91Akd7tD1RhjwpD1uRtjTBiy5G6MMWHIkrsxxoQhS+7GGBOGLLkbY0wYsuRuwoqINBCRN0Rkg4ikeJX4BnqvnSUiu72bUVZ7N64gIqNE5D+H7WduObvpzJgSseRuwoZX0Os93N2Ax6lqR2AYfywENV9Vk4BOwHCvBpAxYceSuwknPYFD6gpbAaCqP6nqM4dvqKp7cTdYFVj4rTAi8qOIxHmPO+XeLi4i3fOVLPhGRGqJSIyIzPKKRS0XkQH59jNOXA3xL8TVlL/NW3+8iHzs/dUxX0ROOpoPwphAytYaU1G0xd1lWSwRicXV/LkfKLYSYgBuA25Q1QVeGdfcMs8DVfU37wvhKxGZhvurYTCQiCtethT3RQOu1sq1qrpORJJxtVt6BiE+U8lYcjdhS0Sexd0mfkhVcxN4VxH5Blcg6xFVXVlE33pJbt9eAPxTRCbiCnGleSVkHxKRbt7xGgMNcEXb3lfVA8ABEZnuxRuDK2L2tvxeMr5aCWIwJo8ldxNOVuJaxACo6g1ei3lJvm3mq2r/w96XjivulN8xwI4CjpHF792ZeVOlqeojIvIBrv7QAhHpg/vLIB5XUyfTq/5X1LR6EcAu75qAMaVife4mnMwGokXkunzragTwvsXAGSJyLLi+dFyLeVMB2/6IK5wF+b5IROR4VV2uqo96+zsJN/vUNi+x9wCaeZsvAM4XN69nDK62Pqr6G/CDiFzk7VNEJDGA+I05grXcTdhQVRWRC4F/ebPdbAf2AncW876tInIz8KE3m1AGcImqFjRD1T+Al0Xkfly1z1xjvQSeg/sL4iNcXf/pIrIc99fDGu94i72+929xVSOX4yZ6AFdK9nkR+RuuP/5NXEVFY0rEqkIa4wMRiVHVDBGpgZtEfbSqBnQx2JhAWMvdGH+8KCJtcH3wr1piN8FmLXdjjAlDdkHVGGPCkCV3Y4wJQ5bcjTEmDFlyN8aYMGTJ3RhjwpAld2OMCUP/D4LzvOpD9rk/AAAAAElFTkSuQmCC\n", - "text/plain": [ - "<Figure size 432x288 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# plot the computing time\n", - "print(tot_time_hpc2)\n", - "plot_computation_time(tot_time_hpc1, tot_time_hpc2, ngpus_sort, names_hpc, \\\n", - " \"./total_computation_time_{0}_vs_{1}\".format(*names_hpc), log_yvals=False)\n", - "\n", - "plot_computation_time(overhead_hpc1, overhead_hpc2, ngpus_sort, names_hpc, \\\n", - " \"./overhead_time_{0}_vs_{1}\".format(*names_hpc))\n", - "# plot speed-up factors\n", - "plot_speedup(tot_time_hpc1, tot_time_hpc2, ngpus_sort, names_hpc)" - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "metadata": {}, - "outputs": [], - "source": [ - "## evaluate iteration time\n", - "# get iteration times\n", - "iter_data_hpc1 = get_iter_time_all(base_dir, wildcard_hpc1, gpu_id_str=gpu_id_str[0])\n", - "iter_data_hpc2 = get_iter_time_all(base_dir, wildcard_hpc2, gpu_id_str=gpu_id_str[1])" - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Saving plot in file: boxplot_iter_time_Juwels_vs_Booster.png ...\n" - ] - } - ], - "source": [ - "# plot the iteration time in box plots\n", - "boxplot_iter_time(iter_data_hpc1, iter_data_hpc2, ngpus_sort, names_hpc)" - ] - }, - { - "cell_type": "code", - "execution_count": 115, - "metadata": {}, - "outputs": [], - "source": [ - "def get_slowiter(iter_time, threshold):\n", - " inds_slow = np.where(iter_time > threshold)[0]\n", - " return iter_time[inds_slow], np.shape(inds_slow)[0]\n", - "\n", - "def ana_slowiter(itertime1, itertime2, thres, names):\n", - " slowt1, nslow1 = get_slowiter(itertime1, thres)\n", - " slowt2, nslow2 = get_slowiter(itertime2, thres)\n", - " \n", - " if nslow1 > 0:\n", - " print(\"{0:d} slow iteration steps on {1} with averaged time of {2:5.2f}s (max: {3:5.2f}s)\"\\\n", - " .format(nslow1, names[0], np.mean(slowt1), np.max(slowt1)))\n", - " else: \n", - " print(\"No slow iterations on {0}\".format(names[0]))\n", - " \n", - " if nslow2 > 0:\n", - " print(\"{0:d} slow iteration steps on {1} with averaged time of {2:5.2f}s (max: {3:5.2f}s)\"\\\n", - " .format(nslow2, names[1], np.mean(slowt2), np.max(slowt2)))\n", - " else: \n", - " print(\"No slow iterations on {0}\".format(names[1]))" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "***** Analyse single GPUs experiments *****\n", - "1 slow iteration steps on Juwels with averaged time of 5.18s (max: 5.18s)\n", - "No slow iterations on Booster\n", - "***** Analyse 4 GPUs experiments *****\n", - "No slow iterations on Juwels\n", - "No slow iterations on Booster\n", - "***** Analyse 8 GPUs experiments *****\n", - "No slow iterations on Juwels\n", - "No slow iterations on Booster\n", - "***** Analyse 32 GPUs experiments *****\n", - "No slow iterations on Juwels\n", - "No slow iterations on Booster\n", - "***** Analyse 32 GPUs experiments *****\n", - "No slow iterations on Juwels\n", - "No slow iterations on Booster\n", - "***** Analyse 64 GPUs experiments *****\n", - "No slow iterations on Juwels\n", - "No slow iterations on Booster\n" - ] - } - ], - "source": [ - " \n", - "## settings\n", - "names = [\"Juwels\", \"Booster\"]\n", - "slowiter_time = 5. # arbitrary threshold for slow iteration steps\n", - "\n", - "# analyze single GPU experiments\n", - "print(\"***** Analyse single GPUs experiments *****\")\n", - "itertime_juwels = iter_data_hpc1[\"1 GPU(s)\"]\n", - "itertime_booster = iter_data_hpc2[\"1 GPU(s)\"]\n", - "\n", - "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n", - "\n", - "# analyze 4 GPUs experiments\n", - "print(\"***** Analyse 4 GPUs experiments *****\")\n", - "itertime_juwels = iter_data_hpc1[\"4 GPU(s)\"]\n", - "itertime_booster = iter_data_hpc2[\"4 GPU(s)\"]\n", - "\n", - "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n", - "\n", - "# analyze 8 GPUs experiments\n", - "print(\"***** Analyse 8 GPUs experiments *****\")\n", - "itertime_juwels = iter_data_hpc1[\"8 GPU(s)\"]\n", - "itertime_booster = iter_data_hpc2[\"8 GPU(s)\"]\n", - "\n", - "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n", - "\n", - "# analyze 16 GPUs experiments\n", - "print(\"***** Analyse 32 GPUs experiments *****\")\n", - "itertime_juwels = iter_data_hpc1[\"16 GPU(s)\"]\n", - "itertime_booster = iter_data_hpc2[\"16 GPU(s)\"]\n", - "\n", - "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n", - "\n", - "# analyze 32 GPUs experiments\n", - "print(\"***** Analyse 32 GPUs experiments *****\")\n", - "itertime_juwels = iter_data_hpc1[\"32 GPU(s)\"]\n", - "itertime_booster = iter_data_hpc2[\"32 GPU(s)\"]\n", - "\n", - "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n", - "\n", - "# analyze 64 GPUs experiments\n", - "print(\"***** Analyse 64 GPUs experiments *****\")\n", - "itertime_juwels = iter_data_hpc1[\"64 GPU(s)\"]\n", - "itertime_booster = iter_data_hpc2[\"64 GPU(s)\"]\n", - "\n", - "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Summary\n", - "- Occasionally, a few iteration steps are slow\n", - "- However, performance degradation seems to be much worser on Booster than on Juwels\n", - "- Higher chance for slow iteration steps on Booster in general" - ] - }, - { - "cell_type": "code", - "execution_count": 157, - "metadata": {}, - "outputs": [], - "source": [ - "def boxplot_iter_total_time(iteration_time, total_time, ngpu_list, name, log_yvals=False):\n", - " nexps = len(ngpu_list)\n", - " bar_width = 0.35\n", - " # create data lists for boxplot-routine\n", - " iter_time_all = []\n", - " for i in np.arange(nexps):\n", - " iter_time_all.append(iteration_time[\"{0} GPU(s)\".format(ngpu_list[i])])\n", - " \n", - " # trick to get list with duplicated entries\n", - " xlabels = [val for val in ngpu_list for _ in (0, 1)]\n", - " nlabels = len(xlabels)\n", - "\n", - " # Multiple box plots on one Axes\n", - " #fig, ax = plt.subplots()\n", - " fig = plt.figure(figsize=(6,4))\n", - " ax = plt.axes([0.1, 0.15, 0.75, 0.75]) \n", - " \n", - " bp = ax.boxplot(iter_time_all, positions=np.arange(0, nlabels, 2), notch=0, sym='+', vert=1, showfliers=False, widths=bar_width) # Outliers for initialization are disturbing\n", - " ax.set_xlabel('# GPUs')\n", - " ax.set_ylabel('Time [s]')\n", - " \n", - " # Reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html \n", - " num_boxes = len(iter_time_all)\n", - " medians = np.empty(num_boxes)\n", - " for i in range(num_boxes):\n", - " box = bp['boxes'][i]\n", - " boxX = []\n", - " boxY = []\n", - " for j in range(5):\n", - " boxX.append(box.get_xdata()[j])\n", - " boxY.append(box.get_ydata()[j])\n", - " box_coords = np.column_stack([boxX, boxY])\n", - " ax.add_patch(Polygon(box_coords, facecolor=colors[1]))\n", - " # Now draw the median lines back over what we just filled in\n", - " med = bp['medians'][i]\n", - " medianX = []\n", - " medianY = []\n", - " for j in range(2):\n", - " medianX.append(med.get_xdata()[j])\n", - " medianY.append(med.get_ydata()[j])\n", - " ax.plot(medianX, medianY, 'k')\n", - " medians[i] = medianY[0]\n", - " # Finally, overplot the sample averages, with horizontal alignment\n", - " # in the center of each box\n", - " ax.plot(np.average(med.get_xdata()), np.average(iter_time_all[i]),\n", - " color='w', marker='*', markeredgecolor='k', markersize=10)\n", - " \n", - " ax2 = ax.twinx()\n", - " x_pos = np.arange(1, nlabels+1 ,2)\n", - " \n", - " ytitle = \"Time [min]\"\n", - " max_time = np.max(total_time)\n", - " time_order = val_order(max_time)\n", - " ymax = np.ceil(max_time/(10**time_order) + 0.5)*(10**time_order) + 10**time_order\n", - " # np.ceil(np.maximum(np.max(times1)/100. + 0.5, np.max(times2)/100. + 0.5))*100.\n", - " if log_yvals: \n", - " total_time = np.log(total_time)\n", - " ytitle = \"LOG(Time) [min]\"\n", - " ymax = np.ceil(np.max(total_time) + 0.5)\n", - " \n", - " # create data bars\n", - " rects = ax2.bar(x_pos, np.round(total_time, 2), bar_width, label=names, color=colors[0])\n", - " # customize plot appearance\n", - " # Add some text for labels, title and custom x-axis tick labels, etc.\n", - " ax2.set_ylabel(ytitle)\n", - " ax2.set_xticks(np.arange(0, nlabels))\n", - " ax2.set_xticklabels(xlabels)\n", - " ax2.set_xlabel('# GPUs')\n", - " ax2.set_ylim(0., ymax)\n", - " \n", - " # add labels\n", - " autolabel(ax2, rects, rot=45) \n", - "\n", - " plt_fname = \"iter+tot_time_{0}_vs_{1}\".format(*names)\n", - " print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n", - " #plt.show()\n", - " plt.savefig(plt_fname+\".png\")\n", - " plt.close()\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": 158, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Saving plot in file: iter+tot_time_Juwels_vs_Booster.png ...\n" - ] - } - ], - "source": [ - "boxplot_iter_total_time(iter_data_hpc2, tot_time_hpc2, ngpus_sort, names_hpc[1])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}