diff --git a/Jupyter_Notebooks/calc_climatolgical_mean.ipynb b/Jupyter_Notebooks/calc_climatolgical_mean.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0094d2fe5617b0bb2af9241b21b5feefc3294afb --- /dev/null +++ b/Jupyter_Notebooks/calc_climatolgical_mean.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 15, + "id": "annoying-jamaica", + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys, time\n", + "import xarray as xr\n", + "import pandas as pd\n", + "import datetime as dt\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "legislative-portugal", + "metadata": {}, + "outputs": [], + "source": [ + "datadir = \"/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly\"\n", + "\n", + "datafile = \"1970-1999_t2m.nc\"\n", + "\n", + "datafile= os.path.join(datadir, datafile)\n", + "\n", + "datafile=\"/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly/t2m_1970_1999.nc\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "through-cornell", + "metadata": {}, + "outputs": [], + "source": [ + "with xr.open_dataset(datafile) as dfile:\n", + " t2m_all = dfile[\"var167\"]\n", + " coords = t2m_all.coords" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "steady-implement", + "metadata": {}, + "outputs": [], + "source": [ + "ntimes = len(coords[\"time\"])\n", + "\n", + "t2m_all = t2m_all.chunk({\"time\": ntimes, \"lat\":100, \"lon\":100})" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "universal-neutral", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n", + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n", + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n", + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n", + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n", + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n", + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Registering averaging took 1.08\n", + "Performing averaging took 1.08\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n", + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n", + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n", + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n", + "/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", + "chunk and silence this warning, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", + " ... array[indexer]\n", + "\n", + "To avoid creating the large chunks, set the option\n", + " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", + " ... array[indexer]\n", + " return self.array[key]\n" + ] + } + ], + "source": [ + "# define a function with the hourly calculation:\n", + "def hour_mean(x):\n", + " return x.groupby('time.hour').mean('time')\n", + "\n", + "time0 = time.time()\n", + "t2m_hourly = t2m_all.groupby(\"time.month\").apply(hour_mean)\n", + "\n", + "print(\"Registering averaging took {0:.2f}\".format(time.time()-time0))\n", + "\n", + "#print(t2m_hourly.values)\n", + "\n", + "print(\"Performing averaging took {0:.2f}\".format(time.time()-time0))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "signed-edmonton", + "metadata": {}, + "outputs": [], + "source": [ + "t2m_hourly = t2m_hourly.compute()" + ] + }, + { + "cell_type": "markdown", + "id": "sustainable-significance", + "metadata": {}, + "source": [ + "This works, but it takes about 3 minutes to process 30 years of data. <br>\n", + "However, the same operation is possible with CDO and only takes 36s to finish on Juwels. <br> \n", + "The two following shell commands (after loading CDO 1.9.8 and ecCodes 2.18.0) are:\n", + "```\n", + "clim_files=($(for year in {1991..2020}; do echo \"${year}_t2m.grb\"; done))\n", + "cdo -t ecmwf -f nc ensavg ${clim_files[@]} mutilyears_1991-2020.nc\n", + "```\n", + "In the following, we check the correctness of the data by computing the difference btween the data from a CDO-generated file against the data produced above. We choose the mean temperature in January at 12 UTC as an example." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "rental-suite", + "metadata": {}, + "outputs": [], + "source": [ + "datafile_cdo = os.path.join(datadir, \"climatology_t2m_1970-1999.nc\")\n", + "\n", + "with xr.open_dataset(datafile_cdo) as dfile:\n", + " t2m_hourly_cdo = dfile[\"T2M\"]\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "better-adventure", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<xarray.DataArray ()>\n", + "array(0.00097656, dtype=float32)\n", + "Coordinates:\n", + " hour int64 12\n", + " month int64 1\n", + " time datetime64[ns] 1979-01-01T12:00:00\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "test1 = t2m_hourly.sel(month=1, hour=12)\n", + "test2 = t2m_hourly_cdo.sel(time=\"1979-01-01 12:00\")\n", + "\n", + "diff = np.abs(test1-test2)\n", + "\n", + "print(np.max(diff))" + ] + }, + { + "cell_type": "markdown", + "id": "weird-sociology", + "metadata": {}, + "source": [ + "Thus, the maximum difference is in the $\\mathcal{O} (10^{-3})$ which can be neglected for our application." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "running-monday", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Jupyter_Notebooks/conditional_quantile_plot.ipynb b/Jupyter_Notebooks/conditional_quantile_plot.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d652d714e61a8df1ea5e004e6195b8e826d9fb59 --- /dev/null +++ b/Jupyter_Notebooks/conditional_quantile_plot.ipynb @@ -0,0 +1,398 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "enabling-vampire", + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys\n", + "import glob\n", + "import datetime as dt\n", + "import numpy as np\n", + "import xarray as xr\n", + "\n", + "import time " + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "qualified-statement", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "8440\n", + "Data variables:\n", + " 2t_in (in_hour, lat, lon) float32 ...\n", + " tcc_in (in_hour, lat, lon) float32 ...\n", + " t_850_in (in_hour, lat, lon) float32 ...\n", + " 2t_ref (fcst_hour, lat, lon) float32 ...\n", + " tcc_ref (fcst_hour, lat, lon) float32 ...\n", + " t_850_ref (fcst_hour, lat, lon) float32 ...\n", + " 2t_savp_fcst (fcst_hour, lat, lon) float32 ...\n", + " tcc_savp_fcst (fcst_hour, lat, lon) float32 ...\n", + " t_850_savp_fcst (fcst_hour, lat, lon) float32 ...\n", + " 2t_persistence_fcst (fcst_hour, lat, lon) float64 ...\n", + " tcc_persistence_fcst (fcst_hour, lat, lon) float64 ...\n", + " t_850_persistence_fcst (fcst_hour, lat, lon) float64 ...\n" + ] + } + ], + "source": [ + "# exemplary model to evaluate\n", + "forecast_path = \"/p/home/jusers/langguth1/juwels/video_prediction_shared_folder/results/era5-Y2007-2019M01to12-80x48-3960N0180E-2t_tcc_t_850_langguth1/savp/20210505T131220_mache1_karim_savp_smreg_cv3_3\"\n", + "fnames= os.path.join(forecast_path, \"vfp_date_*sample_ind_*.nc\" )\n", + "# get a list of all forecast files\n", + "fnames = glob.glob(fnames)\n", + "\n", + "# randomly open one file to take a look at its content\n", + "dfile = xr.open_dataset(fnames[99])\n", + "\n", + "print(dfile.data_vars)\n", + "#print(dfile[\"init_time\"])\n", + "#print(dfile[\"2t_savp_fcst\"][2])" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "certain-webmaster", + "metadata": {}, + "outputs": [], + "source": [ + "# some auxiliary functions to enhance data query with open_mfdataset\n", + "def non_interst_vars(ds):\n", + " \"\"\"\n", + " Creates list of variables that are not of interest. For this, vars2proc must be defined at global scope\n", + " :param ds: the dataset\n", + " :return: list of variables in dataset that are not of interest\n", + " \"\"\"\n", + " return [v for v in ds.data_vars\n", + " if v not in vars2proc]\n", + "#\n", + "# ====================================================================================================\n", + "\n", + "\n", + "def get_relevant_vars(ds):\n", + " \"\"\"\n", + " Drops variables that are not of interest from dataset and also shrinks data to cells of interest.\n", + " For this, ncells must be a dimension of the dataset and dmask_ref_inds must be defined at gloabl scope\n", + " :param ds: the dataset\n", + " :return: dataset with non-interesting variables dropped and data shrinked to region of interest\n", + " \"\"\"\n", + " return ds.drop(non_interst_vars(ds)).isel(fcst_hour=11)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "shaped-checklist", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Registering and loading data took 1057.31 seconds\n" + ] + } + ], + "source": [ + "# choose variable of interest and load data into memory (i.e. the dataset is not a dask-array anymore!!!)\n", + "# This takes about 15 minutes due to high IO-traffic (openening more than 8500 files)\n", + "vars2proc = [\"2t_savp_fcst\", \"2t_ref\"]\n", + "\n", + "time0 = time.time()\n", + "with xr.open_mfdataset(fnames, decode_cf=True, combine=\"nested\", concat_dim=[\"init_time\"], compat=\"broadcast_equals\", preprocess=get_relevant_vars) as dfiles:\n", + " data = dfiles.load()\n", + " print(\"Registering and loading data took {0:.2f} seconds\".format(time.time()- time0))" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "three-energy", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<xarray.Dataset>\n", + "Dimensions: (init_time: 8440, lat: 48, lon: 80)\n", + "Coordinates:\n", + " * init_time (init_time) datetime64[ns] 2010-08-20T05:00:00 ... 2010-03-...\n", + " * lat (lat) float64 53.7 53.4 53.1 52.8 52.5 ... 40.5 40.2 39.9 39.6\n", + " * lon (lon) float64 1.8 2.1 2.4 2.7 3.0 ... 24.3 24.6 24.9 25.2 25.5\n", + " fcst_hour int64 12\n", + "Data variables:\n", + " 2t_savp_fcst (init_time, lat, lon) float32 291.3 291.8 ... 288.5 288.2\n", + " 2t_ref (init_time, lat, lon) float32 292.2 292.1 ... 288.5 288.6\n" + ] + } + ], + "source": [ + "# Take a look at the data\n", + "print(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "stunning-emission", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313]\n" + ] + } + ], + "source": [ + "# get the vaiables of interest as data arrays\n", + "data_fcst, data_ref = data[\"2t_savp_fcst\"], data[\"2t_ref\"]\n", + "\n", + "# create the bins for which quantiles are plotted based on forecasts (=conditioning variable)\n", + "fcst_min, fcst_max = np.floor(np.min(data_fcst)), np.ceil(np.max(data_fcst))\n", + "x_bins = list(np.arange(int(fcst_min), int(fcst_max) + 1))\n", + "# center point of bins\n", + "x_bins_c = 0.5*(np.asarray(x_bins[0:-1]) + np.asarray(x_bins[1:]))\n", + "nbins = len(x_bins) - 1" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "generous-correction", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# set the quantiles and initialize data array\n", + "quantiles = [0.05, 0.5, 0.95]\n", + "nquantiles = len(quantiles)\n", + "quantile_panel = xr.DataArray(np.full((nbins, nquantiles), np.nan), coords={\"bin_center\": x_bins_c, \"quantile\": quantiles},\n", + " dims=[\"bin_center\", \"quantile\"])\n", + "# populate the quantile data array\n", + "for i in np.arange(nbins):\n", + " # conditioning of ground truth based on forecast\n", + " data_cropped = data_correct[\"2t_ref\"].where(np.logical_and(data_correct[\"2t_savp_fcst\"] >= x_bins[i],\n", + " data_correct[\"2t_savp_fcst\"] < x_bins[i+1]))\n", + " # quantile-calculation\n", + " quantile_panel.loc[dict(bin_center=x_bins_c[i])] = data_cropped.quantile([0.05, 0.5, 0.95])\n", + " \n", + "# transform \n", + "x_bins_c = x_bins_c - 273.15\n", + "quantile_panel = quantile_panel - 273.15" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "polyphonic-shelter", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'plt' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-1-94561a8add4c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# create plot of conditional forecast\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0max\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msubplots\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mls_all\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"--\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"-\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"--\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mlw_all\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m2.\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2.\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'plt' is not defined" + ] + } + ], + "source": [ + "# create plot of conditional forecast\n", + "fig, ax = plt.subplots(figsize=(12,6))\n", + "\n", + "ls_all = [\"--\", \"-\", \"--\"]\n", + "lw_all = [2., 1.5, 2.]\n", + "ax.plot(x_bins_c, x_bins_c, color='k', label='reference 1:1', linewidth=1.)\n", + "for i in np.arange(3):\n", + " ax.plot(x_bins_c, quantile_panel.isel(quantile=i), ls=ls_all[i], color=\"k\", lw=lw_all[i])\n", + " \n", + "ax.set_ylabel(\"2m temperature from ERA5 [°C]\", fontsize=16)\n", + "ax.set_xlabel(\"Predicted 2m temperature from SAVP [°C]\", fontsize=16)\n", + "\n", + "ax.tick_params(axis=\"both\", labelsize=14)\n", + "\n", + "fig.savefig(\"./first_cond_quantile.png\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "creative-athens", + "metadata": {}, + "outputs": [], + "source": [ + "#data_grouped = data_correct.groupby_bins(\"2t_savp_fcst\", x_bins)#.groups" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "id": "brief-antarctica", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[[22 22 22 ... 18 18 18]\n", + " [22 22 22 ... 18 18 18]\n", + " [22 22 22 ... 19 19 19]\n", + " ...\n", + " [29 29 29 ... 30 30 30]\n", + " [29 30 29 ... 31 31 31]\n", + " [29 30 29 ... 31 31 31]]\n", + "\n", + " [[21 21 21 ... 20 20 20]\n", + " [20 21 21 ... 20 20 20]\n", + " [20 21 21 ... 20 20 20]\n", + " ...\n", + " [30 30 30 ... 31 31 31]\n", + " [30 30 30 ... 31 31 31]\n", + " [30 30 30 ... 31 31 31]]\n", + "\n", + " [[21 21 21 ... 21 21 21]\n", + " [21 21 21 ... 21 21 21]\n", + " [21 21 21 ... 21 21 21]\n", + " ...\n", + " [28 28 28 ... 31 31 31]\n", + " [28 28 28 ... 32 32 31]\n", + " [28 29 29 ... 32 32 32]]\n", + "\n", + " ...\n", + "\n", + " [[22 22 22 ... 20 20 20]\n", + " [22 22 22 ... 20 20 20]\n", + " [22 21 21 ... 20 20 20]\n", + " ...\n", + " [29 29 29 ... 31 31 31]\n", + " [29 29 29 ... 32 32 32]\n", + " [30 30 29 ... 32 32 32]]\n", + "\n", + " [[21 21 21 ... 20 20 20]\n", + " [20 21 21 ... 20 20 20]\n", + " [20 20 21 ... 20 20 20]\n", + " ...\n", + " [30 30 30 ... 31 31 31]\n", + " [30 30 29 ... 31 31 31]\n", + " [30 30 30 ... 31 31 31]]\n", + "\n", + " [[22 22 22 ... 24 24 24]\n", + " [22 22 22 ... 24 23 24]\n", + " [22 22 22 ... 24 24 24]\n", + " ...\n", + " [27 27 27 ... 31 31 31]\n", + " [28 28 28 ... 32 32 32]\n", + " [28 28 28 ... 32 32 32]]]\n" + ] + } + ], + "source": [ + "inds_of_bins = np.digitize(data_fcst, x_bins, right=True)\n", + "\n", + "print(inds_of_bins)" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "id": "medieval-european", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<xarray.DataArray '2t_ref' (2t_savp_fcst_bins: 37)>\n", + "array([259.60351562, 264.13945557, 264.74759033, 265.45030518,\n", + " 266.47970703, 267.3628302 , 268.44342804, 269.80157959,\n", + " 270.4291217 , 271.22656982, 272.41841827, 274.18320801,\n", + " 274.74815369, 275.68839111, 276.3840918 , 277.0491394 ,\n", + " 277.99171387, 279.1111615 , 280.24440918, 281.56947693,\n", + " 282.817146 , 284.15313873, 285.25139038, 286.46736084,\n", + " 287.11281006, 287.56309875, 288.39205322, 289.28383789,\n", + " 290.12092529, 291.00213623, 291.93958588, 292.7901001 ,\n", + " 294.50114746, 295.28106201, 295.7451416 , 296.17975464,\n", + " 295.94475342])\n", + "Coordinates:\n", + " * 2t_savp_fcst_bins (2t_savp_fcst_bins) object (260, 261] ... (296, 297]\n", + " quantile float64 0.99\n", + "<xarray.DataArray '2t_savp_fcst' (2t_savp_fcst_bins: 37)>\n", + "array([260.51538086, 261.99571045, 262.96671509, 263.99466095,\n", + " 264.98212372, 265.99100769, 266.99321747, 267.99145386,\n", + " 268.99003754, 269.9897348 , 270.99363922, 271.99260651,\n", + " 272.98925781, 273.99296265, 274.99265747, 275.9934906 ,\n", + " 276.99263123, 277.99108887, 278.98980103, 279.99055573,\n", + " 280.98829712, 281.99064941, 282.98851074, 283.98887085,\n", + " 284.99008545, 285.99044281, 286.99019897, 287.9892334 ,\n", + " 288.98918335, 289.98908264, 290.98603363, 291.96720215,\n", + " 292.98072205, 293.98917358, 294.99038391, 295.9605835 ,\n", + " 296.59046722])\n", + "Coordinates:\n", + " * 2t_savp_fcst_bins (2t_savp_fcst_bins) object (260, 261] ... (296, 297]\n", + " quantile float64 0.99\n" + ] + } + ], + "source": [ + "def calc_quantile(x, dim =\"init_time\"):\n", + " return x.quantile(0.99)\n", + "\n", + "cond_quantile1 = data_grouped.map(calc_quantile)\n", + "#cond_quantile2 = data_grouped.map(calc_quantile)\n", + "\n", + "\n", + "print(cond_quantile1[\"quantile\"])\n", + "print(cond_quantile1[\"2t_savp_fcst\"])\n", + "\n", + "#print(cond_quantile2[\"2t_ref\"])\n", + "#print(cond_quantile2[\"2t_savp_fcst\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "measured-outreach", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Jupyter_Notebooks/first_cond_quantile.png b/Jupyter_Notebooks/first_cond_quantile.png new file mode 100644 index 0000000000000000000000000000000000000000..6ff3a7a8a081c4a874d2e2a8c8d3d0d2e47d1fb5 Binary files /dev/null and b/Jupyter_Notebooks/first_cond_quantile.png differ diff --git a/Jupyter_Notebooks/juwels_juwelsbooster_compare_old.ipynb b/Jupyter_Notebooks/juwels_juwelsbooster_compare_old.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d788742d00cb9054dd90557edc674e481cf1c77b --- /dev/null +++ b/Jupyter_Notebooks/juwels_juwelsbooster_compare_old.ipynb @@ -0,0 +1,684 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "import os, glob\n", + "import math\n", + "import pickle\n", + "import numpy as np\n", + "import xarray as xr\n", + "import matplotlib\n", + "matplotlib.use('Agg')\n", + "from matplotlib.transforms import Affine2D\n", + "from matplotlib.patches import Polygon\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "base = \"/p/project/deepacf/deeprain/video_prediction_shared_folder/models/\"+ \\\n", + " \"era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/\"\n", + "fname_timing_train = \"/timing_training_time.pkl\"\n", + "fname_timing_total = \"/timing_total_time.pkl\"\n", + "\n", + "fname_timing_iter = \"timing_per_iteration_time.pkl\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# some auxiliary functions\n", + "def orderOfMagnitude(number):\n", + " return np.floor(np.log(number, 10))\n", + "\n", + "def total_times(infile):\n", + " with open(infile,'rb') as tfile:\n", + " #print(\"Opening pickle time: '{0}'\".format(infile))\n", + " total_time_sec = pickle.load(tfile)\n", + " return np.asarray(total_time_sec/60)\n", + "\n", + "def log_total_times(infile):\n", + " total_time_min = total_times(infile)\n", + " return np.log(total_time_min)\n", + "\n", + "\n", + "def get_time_dict(base, wildcardspec, tfilename, gpu_id_str=\"gpu\", llog = False):\n", + " time_dict = {}\n", + " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", + " wrapper = total_times\n", + " if llog: wrapper = log_total_times\n", + " for tfile in flist_hpc: \n", + " ngpus = get_ngpus(tfile, gpu_id_str)\n", + " time_dict[\"{0:d} GPU(s)\".format(ngpus)] = wrapper(tfile + tfilename)\n", + " return time_dict\n", + "\n", + "def get_ngpus(fname, search_str, max_order=3):\n", + " \"\"\"\n", + " Tries to get numbers in the vicinty of search_str which is supposed to be a substring in fname.\n", + " First seaches for numbers right before the occurence of search_str, then afterwards.\n", + " :param fname: file name from which number should be inferred\n", + " :param search_str: seach string for which number identification is considered to be possible\n", + " :param max_order: maximum order of retrieved number (default: 3 -> maximum number is 999 then)\n", + " :return num_int: integer of number in the vicintity of search string. \n", + " \"\"\"\n", + " \n", + " ind_gpu_info = fname.lower().find(search_str)\n", + " if ind_gpu_info == -1:\n", + " raise ValueError(\"Unable to find search string '{0}' in file name '{1}'\".format(search_str, fname))\n", + " \n", + " # init loops\n", + " fname_len = len(fname)\n", + " success, flag = False, True\n", + " indm = 1\n", + " ind_sm, ind_sp = 0, 0\n", + "\n", + " # check occurence of numbers in front of search string\n", + " while indm < max_order and flag:\n", + " if ind_gpu_info - indm > 0:\n", + " if fname[ind_gpu_info - indm].isnumeric():\n", + " ind_sm += 1\n", + " success = True\n", + " else:\n", + " flag = False\n", + " else:\n", + " flag = False\n", + " indm += 1\n", + " \n", + "\n", + " if not success: # check occurence of numbers after search string\n", + " ind_gpu_info = ind_gpu_info + len(search_str)\n", + " flag = True\n", + " indm = 0\n", + " while indm < max_order and flag: \n", + " if ind_gpu_info + indm < fname_len:\n", + " if fname[ind_gpu_info + indm].isnumeric():\n", + " ind_sp += 1\n", + " success = True\n", + " else:\n", + " flag = False\n", + " else:\n", + " flag = False\n", + " indm += 1\n", + " \n", + " if success:\n", + " return(int(fname[ind_gpu_info:ind_gpu_info+ind_sp]))\n", + " else:\n", + " raise ValueError(\"Search string found in fname, but unable to infer number of GPUs.\")\n", + "\n", + " else:\n", + " return(int(fname[ind_gpu_info-ind_sm:ind_gpu_info]))\n", + " \n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total computation with 16 GPU(s): 152.50984706878663\n", + "Total computation with 32 GPU(s): 81.80640578667322\n", + "Total computation with 4 GPU(s): 554.5182513117791\n", + "Total computation with 64 GPU(s): 45.01537701288859\n", + "Total computation with 8 GPU(s): 287.91878341039023\n" + ] + } + ], + "source": [ + "# Juwels\n", + "wildcard_juwels = '20210115T135325_langguth1_test_venv_juwels_container*old'\n", + "total_time_min_juwels = get_time_dict(base, wildcard_juwels, fname_timing_total, \"gpus\")\n", + "training_time_min_juwels = get_time_dict(base, wildcard_juwels, fname_timing_train, \"gpus\")\n", + "for key in training_time_min_juwels.keys():\n", + " print(\"Total computation with {0}: {1}\".format(key, training_time_min_juwels[key]))\n", + "\n", + "overhead_time_juwels = {}\n", + "for key in training_time_min_juwels.keys() & total_time_min_juwels.keys():\n", + " overhead_time_juwels[key] = total_time_min_juwels[key] - training_time_min_juwels[key]\n", + " \n", + "#print('Juwels total time in minutes', get_time_d)\n", + "#print('Juwels total training time in minutes', training_time_min_juwels)\n", + "#overhead_time_juwels = np.array(total_time_min_juwels) - np.array(training_time_min_juwels)\n", + "#print('Juwels overhead time in minutes', overhead_time_juwels)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total computation with 1 GPU(s): 566.7376739541689\n", + "Total computation with 4 GPU(s): 159.4931242307027\n", + "Total computation with 8 GPU(s): 92.15467914342881\n", + "Total computation with 16 GPU(s): 46.11619712909063\n", + "Total computation with 32 GPU(s): 33.09077355464299\n", + "Total computation with 64 GPU(s): 23.24405464331309\n" + ] + } + ], + "source": [ + "# Juwels booster\n", + "wildcard_booster = '2020*gong1_booster_gpu*'\n", + "total_time_min_booster = get_time_dict(base, wildcard_booster, fname_timing_total)\n", + "training_time_min_booster = get_time_dict(base, wildcard_booster, fname_timing_train)\n", + "for key in training_time_min_booster.keys():\n", + " print(\"Total computation with {0}: {1}\".format(key, training_time_min_booster[key]))\n", + "\n", + "#print('Juwels Booster total time in minutes', list_times(base, wildcard_booster, filename_timing_total))\n", + "#print('Juwels Booster total training time in minutes', list_times(base, wildcard_booster, filename_timing_train))\n", + "overhead_time_booster = {}\n", + "for key in training_time_min_booster.keys() & total_time_min_booster.keys():\n", + " overhead_time_booster[key] = total_time_min_booster[key] - training_time_min_booster[key]\n", + "#print('Juwels overhead time in minutes', overhead_time_booster)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def time_per_iteration_mean_std(infile):\n", + " with open(infile, 'rb') as tfile:\n", + " time_per_iteration_list = pickle.load(tfile) \n", + " \n", + " time_per_iteration = np.array(time_per_iteration_list)\n", + " return np.mean(time_per_iteration), np.std(time_per_iteration)\n", + "\n", + "def iter_stat(base, wildcardspec, gpu_id_str=\"gpu\"):\n", + " stat_iter_dict = {}\n", + " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", + " for tdir in flist_hpc: \n", + " ngpus = get_ngpus(tdir, gpu_id_str)\n", + " ftname = os.path.join(tdir, fname_timing_iter)\n", + " mean_loc, std_loc = time_per_iteration_mean_std(ftname)\n", + " stat_iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = {\"mean\": mean_loc , \"std\": std_loc}\n", + " return stat_iter_dict\n", + "\n", + "def time_per_iteration_all(infile):\n", + " with open(infile,'rb') as tfile:\n", + " time_per_iteration_list = pickle.load(tfile)\n", + " return np.asarray(time_per_iteration_list)\n", + "\n", + "def all_iter(base, wildcardspec, gpu_id_str=\"gpu\"):\n", + " iter_dict = {}\n", + " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", + " for tdir in flist_hpc: \n", + " ngpus = get_ngpus(tdir, gpu_id_str)\n", + " ftname = os.path.join(tdir, fname_timing_iter)\n", + " iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = time_per_iteration_all(ftname)\n", + " return iter_dict \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "JUWELS (0.6151515198034729, 0.20104178037750603)\n", + "Booster (0.3521572324468615, 0.3656996619706779)\n" + ] + } + ], + "source": [ + "# Juwels\n", + "print('JUWELS', time_per_iteration_mean_std('/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/20201210T140958_stadtler1_comparison_1node_1gpu/timing_per_iteration_time.pkl'))\n", + "# Booster\n", + "print('Booster', time_per_iteration_mean_std('/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2010toY2222M01to12-160x128-2970N1500W-T2_MSL_gph500/convLSTM/20201210T141910_gong1_booster_gpu1/timing_per_iteration_time.pkl'))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Juwels mean and standart deviation {'16 GPU(s)': {'mean': 0.8209993402058342, 'std': 0.2627643291319852}, '32 GPU(s)': {'mean': 0.8590118098249986, 'std': 0.4078450977768068}, '4 GPU(s)': {'mean': 0.7445914211655112, 'std': 0.13789611351045}, '64 GPU(s)': {'mean': 0.9353915504630987, 'std': 0.6640973670265782}, '8 GPU(s)': {'mean': 0.7804724221628322, 'std': 0.21824334555299446}}\n" + ] + } + ], + "source": [ + "# Juwels\n", + "print('Juwels mean and standart deviation',iter_stat(base, wildcard_juwels))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Booster mean and standart deviation {'1 GPU(s)': {'mean': 0.3521572324468615, 'std': 0.3656996619706779}, '4 GPU(s)': {'mean': 0.41844419631014446, 'std': 0.5273198599590724}, '8 GPU(s)': {'mean': 0.48867375665101026, 'std': 0.4378652997442439}, '16 GPU(s)': {'mean': 0.4786909431320202, 'std': 0.49638173862734053}, '32 GPU(s)': {'mean': 0.6439339113469129, 'std': 1.4395666886291258}, '64 GPU(s)': {'mean': 0.8176603168024377, 'std': 2.1044189535471185}}\n" + ] + } + ], + "source": [ + "# Booster\n", + "print('Booster mean and standart deviation',iter_stat(base, wildcard_booster))" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "# Plotting \n", + "# Bar plot of total time and training time --> overhead time\n", + "\n", + "# dictionaries with the total times\n", + "tot_time_juwels_dict = get_time_dict(base, wildcard_juwels, fname_timing_total)\n", + "tot_time_booster_dict= get_time_dict(base, wildcard_booster, fname_timing_total)\n", + "\n", + "# dictionaries with the training times\n", + "train_time_juwels_dict = get_time_dict(base, wildcard_juwels, fname_timing_train)\n", + "train_time_booster_dict = get_time_dict(base, wildcard_booster, fname_timing_train)\n", + "\n", + "# get sorted arrays\n", + "# Note: The times for Juwels are divided by 2, since the experiments have been performed with an epoch number of 20\n", + "# instead of 10 (as Bing and Scarlet did)\n", + "ngpus_sort = sorted([int(ngpu.split()[0]) for ngpu in tot_time_juwels_dict.keys()])\n", + "nexps = len(ngpus_sort)\n", + "tot_time_juwels = np.array([tot_time_juwels_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])/2.\n", + "tot_time_booster = np.array([tot_time_booster_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", + "\n", + "train_time_juwels = np.array([train_time_juwels_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])/2.\n", + "train_time_booster = np.array([train_time_booster_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", + "\n", + "overhead_juwels = tot_time_juwels - train_time_juwels \n", + "overhead_booster= tot_time_booster - train_time_booster\n", + "\n", + "names = [\"Juwels\", \"Juwels Booster\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "400.0\n", + "278.0\n", + "100.0\n", + "2.0\n" + ] + } + ], + "source": [ + "plot_computation_times(tot_time_juwels, tot_time_booster, labels, [\"Juwels\", \"Juwels Booster\"], \\\n", + " \"./total_computation_time\", log_yvals=False)\n", + "\n", + "plot_computation_times(overhead_juwels, overhead_booster, labels, [\"Juwels\", \"Juwels Booster\"], \\\n", + " \"./overhead_time\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#print(labels)\n", + "#raise ValueError(\"Stop!\")\n", + "#x = np.arange(len(labels)) # the label locations\n", + "#width = 0.35 # the width of the bars\n", + "\n", + "#fig, ax = plt.subplots()\n", + "#rects1 = ax.bar(x - width/2, np.round(tot_time_juwels, 2), width, label='Juwels')\n", + "#rects2 = ax.bar(x + width/2, np.round(tot_time_booster, 2), width, label='Booster')\n", + "\n", + "def plot_computation_times(times1, times2, ngpus, names, plt_fname, log_yvals = False):\n", + " \n", + " nlabels = len(ngpus)\n", + " x_pos = np.arange(nlabels)\n", + " \n", + " bar_width = 0.35\n", + " ytitle = \"Time\"\n", + " ymax = np.ceil(np.maximum(np.max(times1)/100. + 0.5, np.max(times2)/100. + 0.5))*100.\n", + " print(ymax) \n", + " if log_yvals: \n", + " times1, times2 = np.log(times1), np.log(times2)\n", + " ytitle = \"LOG(Time) [min]\"\n", + " ymax = np.ceil(np.maximum(np.max(times1)+0.5, np.max(times2) + 0.5))\n", + " \n", + " # create plot object\n", + " fig, ax = plt.subplots()\n", + " # create data bars\n", + " rects1 = ax.bar(x_pos - bar_width/2, np.round(times1, 2), bar_width, label=names[0])\n", + " rects2 = ax.bar(x_pos + bar_width/2, np.round(times2, 2), bar_width, label=names[1])\n", + " # customize plot appearance\n", + " # Add some text for labels, title and custom x-axis tick labels, etc.\n", + " ax.set_ylabel(ytitle)\n", + " ax.set_title('Comparison {0} and {1} with convLSTM model'.format(*names))\n", + " ax.set_xticks(x_pos)\n", + " ax.set_xticklabels(labels)\n", + " ax.set_xlabel('# GPUs')\n", + " print(np.ceil(np.maximum(np.max(times1)+0.5, np.max(times2) + 0.5)))\n", + " ax.set_ylim(0., ymax)\n", + " ax.legend()\n", + " \n", + " # add labels\n", + " autolabel(ax, rects1)\n", + " autolabel(ax, rects2)\n", + " plt.savefig(plt_fname+\".png\")\n", + " plt.close()\n", + " \n", + "\n", + "def autolabel(ax, rects):\n", + " \"\"\"Attach a text label above each bar in *rects*, displaying its height.\"\"\"\n", + " for rect in rects:\n", + " height = rect.get_height()\n", + " ax.annotate('{}'.format(height),\n", + " xy=(rect.get_x() + rect.get_width() / 2, height),\n", + " xytext=(0, 3), # 3 points vertical offset\n", + " textcoords=\"offset points\",\n", + " ha='center', va='bottom')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot mean + std \n", + "# Juwels\n", + "dict_stat_juwels = iter_stat(base, wildcard_juwels, gpu_id_str=\"gpu\")\n", + "#print(dict_stat_juwels)\n", + "iter_mean_juwels = np.array([dict_stat_juwels[\"{0:d} GPU(s)\".format(key)][\"mean\"] for key in labels])\n", + "iter_std_juwels = np.array([dict_stat_juwels[\"{0:d} GPU(s)\".format(key)][\"std\"] for key in labels])\n", + "\n", + "dict_stat_booster = iter_stat(base, wildcard_booster, gpu_id_str=\"gpu\")\n", + "iter_mean_booster = np.array([dict_stat_booster[\"{0:d} GPU(s)\".format(key)][\"mean\"] for key in labels])\n", + "iter_std_booster = np.array([dict_stat_booster[\"{0:d} GPU(s)\".format(key)][\"std\"] for key in labels])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(21225,)\n" + ] + } + ], + "source": [ + "iter_time_juwels = all_iter(base, wildcard_juwels)\n", + "iter_time_booster= all_iter(base, wildcard_booster)\n", + "\n", + "max_iter_juwels = np.shape(iter_time_booster[\"{0:d} GPU(s)\".format(labels[0])])[0]\n", + "max_iter_booster = np.shape(iter_time_booster[\"{0:d} GPU(s)\".format(labels[0])])[0]\n", + "\n", + "arr_iter_juwels = np.full((nexps, max_iter_juwels), np.nan)\n", + "arr_iter_booster= np.full((nexps, max_iter_booster), np.nan)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "# box plot instead of errorbar plot\n", + "# Juwels\n", + "#data_juwels = list_time_per_iteration_all_runs(base, wildcard_juwels)\n", + "data_juwels = all_iter(base, wildcard_juwels, gpu_id_str=\"gpu\")\n", + "# Booster\n", + "#data_booster = list_time_per_iteration_all_runs(base, wildcard_booster)\n", + "data_booster = all_iter(base, wildcard_booster, gpu_id_str=\"gpu\")\n", + "def simple_boxplot(time_per_iteration_data, title):\n", + " # Multiple box plots on one Axes\n", + " fig, ax = plt.subplots()\n", + " ax.set_title(title)\n", + " ax.boxplot(time_per_iteration_data, showfliers=False) # Outliers for initialization are disturbing \n", + " plt.xticks([1, 2, 3, 4, 5 ,6], ['1', '4', '8', '16', '32', '64'])\n", + " #plt.savefig('boxplot_'+title)\n", + " #plt.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "886\n", + "64.08639097213745\n", + "31.232596397399902\n", + "(1326,)\n", + "***********\n", + "2100\n", + "4.405388832092285\n", + "29.095214366912842\n", + "(2653,)\n", + "***********\n", + "36981\n", + "7.751298189163208\n", + "26.409477949142456\n", + "(42450,)\n", + "***********\n", + "3843\n", + "66.00082683563232\n", + "29.385547637939453\n", + "(21225,)\n" + ] + } + ], + "source": [ + "print(np.argmax(data_booster[\"64 GPU(s)\"]))\n", + "print(np.max(data_booster[\"64 GPU(s)\"]))\n", + "print(data_booster[\"64 GPU(s)\"][0])\n", + "print(np.shape(data_booster[\"64 GPU(s)\"]))\n", + "print(\"***********\")\n", + "\n", + "print(np.argmax(data_juwels[\"64 GPU(s)\"][1::]))\n", + "print(np.max(data_juwels[\"64 GPU(s)\"][1::]))\n", + "print(data_juwels[\"64 GPU(s)\"][0])\n", + "print(np.shape(data_juwels[\"64 GPU(s)\"]))\n", + "print(\"***********\")\n", + "\n", + "print(np.argmax(data_juwels[\"4 GPU(s)\"][1::]))\n", + "print(np.max(data_juwels[\"4 GPU(s)\"][1::]))\n", + "print(data_juwels[\"4 GPU(s)\"][0])\n", + "print(np.shape(data_juwels[\"4 GPU(s)\"]))\n", + " \n", + "print(\"***********\")\n", + "print(np.argmax(data_booster[\"4 GPU(s)\"][1::]))\n", + "print(np.max(data_booster[\"4 GPU(s)\"][1::]))\n", + "print(data_booster[\"4 GPU(s)\"][0])\n", + "print(np.shape(data_booster[\"4 GPU(s)\"]))\n", + "\n", + "#simple_boxplot(data_juwels, 'Juwels')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "simple_boxplot(data_booster, 'Booster')" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "# Try more fancy box plot \n", + "def more_fancy_boxplot(time_per_iteration_data1, time_per_iteration_data2, ngpu_list, title):\n", + " nexps = len(ngpu_list)\n", + " # Shuffle data: EXPECT JUWELS FIRST FOR THE LEGEND! NOT GENERIC!\n", + " data = []\n", + " for i in np.arange(nexps):\n", + " data.append(time_per_iteration_data1[\"{0} GPU(s)\".format(ngpu_list[i])])\n", + " data.append(time_per_iteration_data2[\"{0} GPU(s)\".format(ngpu_list[i])])\n", + " \n", + " # trick to get list with duplicated entries\n", + " xlabels = [val for val in ngpu_list for _ in (0, 1)]\n", + "\n", + " # Multiple box plots on one Axes\n", + " #fig, ax = plt.subplots()\n", + " fig = plt.figure(figsize=(6,4))\n", + " ax = plt.axes([0.1, 0.15, 0.75, 0.75]) \n", + " \n", + " ax.set_title(title)\n", + " bp = ax.boxplot(data, notch=0, sym='+', vert=1, whis=1.5, showfliers=False) # Outliers for initialization are disturbing\n", + " plt.xticks(np.arange(1, nexps*2 +1), xlabels)\n", + " ax.set_xlabel('# GPUs')\n", + " ax.set_ylabel('Seconds')\n", + " \n", + " # Reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html \n", + " box_colors = ['darkkhaki', 'royalblue']\n", + " num_boxes = len(data)\n", + " medians = np.empty(num_boxes)\n", + " for i in range(num_boxes):\n", + " box = bp['boxes'][i]\n", + " boxX = []\n", + " boxY = []\n", + " for j in range(5):\n", + " boxX.append(box.get_xdata()[j])\n", + " boxY.append(box.get_ydata()[j])\n", + " box_coords = np.column_stack([boxX, boxY])\n", + " # Alternate between Dark Khaki and Royal Blue\n", + " ax.add_patch(Polygon(box_coords, facecolor=box_colors[i % 2]))\n", + " # Now draw the median lines back over what we just filled in\n", + " med = bp['medians'][i]\n", + " medianX = []\n", + " medianY = []\n", + " for j in range(2):\n", + " medianX.append(med.get_xdata()[j])\n", + " medianY.append(med.get_ydata()[j])\n", + " ax.plot(medianX, medianY, 'k')\n", + " medians[i] = medianY[0]\n", + " # Finally, overplot the sample averages, with horizontal alignment\n", + " # in the center of each box\n", + " ax.plot(np.average(med.get_xdata()), np.average(data[i]),\n", + " color='w', marker='*', markeredgecolor='k')\n", + " \n", + " # Finally, add a basic legend\n", + " fig.text(0.9, 0.15, 'Juwels',\n", + " backgroundcolor=box_colors[0], color='black', weight='roman',\n", + " size='small')\n", + " fig.text(0.9, 0.09, 'Booster',\n", + " backgroundcolor=box_colors[1],\n", + " color='white', weight='roman', size='small')\n", + " #fig.text(0.90, 0.015, '*', color='white', backgroundcolor='silver',\n", + " # weight='roman', size='medium')\n", + " fig.text(0.9, 0.03, '* Mean', color='white', backgroundcolor='silver',\n", + " weight='roman', size='small')\n", + "\n", + " \n", + " plt.savefig('fancy_boxplot_'+title.replace(' ', '_'))\n", + " plt.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "more_fancy_boxplot(data_juwels, data_booster, ngpus_sort, 'Time needed to iterate one step')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flist_hpc1 = sorted(glob.glob(base + wildcard_juwels))\n", + "flist_hpc2 = sorted(glob.glob(base + wildcard_booster))\n", + "\n", + "\n", + " \n", + "\n", + "print(get_ngpus(flist_hpc1[2], \"gpu\"))\n", + "print(get_ngpus(flist_hpc1[0], \"gpu\"))\n", + "\n", + "print(get_ngpus(flist_hpc2[2], \"gpu\"))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Jupyter_Notebooks/performance_check.ipynb b/Jupyter_Notebooks/performance_check.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..3caf9018e91049c7ef7ee826382871dc5168a27a --- /dev/null +++ b/Jupyter_Notebooks/performance_check.ipynb @@ -0,0 +1,724 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "## import all required modules\n", + "import os, glob\n", + "import numpy as np\n", + "import pickle\n", + "# for plotting\n", + "import matplotlib\n", + "matplotlib.use('Agg')\n", + "from matplotlib.transforms import Affine2D\n", + "from matplotlib.patches import Polygon\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": {}, + "outputs": [], + "source": [ + "## some auxiliary functions\n", + "#\n", + "#colors = ['darkkhaki', 'royalblue']\n", + "colors = [\"midnightblue\", \"darkorange\"]\n", + "\n", + "def val_order(number):\n", + " return int(np.floor(np.log10(number)))\n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "def get_ngpus(fname, search_str, max_order=3):\n", + " \"\"\"\n", + " Tries to get numbers in the vicinty of search_str which is supposed to be a substring in fname.\n", + " First seaches for numbers right before the occurence of search_str, then afterwards.\n", + " :param fname: file name from which number should be inferred\n", + " :param search_str: seach string for which number identification is considered to be possible\n", + " :param max_order: maximum order of retrieved number (default: 3 -> maximum number is 999 then)\n", + " :return num_int: integer of number in the vicintity of search string. \n", + " \"\"\"\n", + " \n", + " ind_gpu_info = fname.lower().find(search_str)\n", + " if ind_gpu_info == -1:\n", + " raise ValueError(\"Unable to find search string '{0}' in file name '{1}'\".format(search_str, fname))\n", + " \n", + " # init loops\n", + " fname_len = len(fname)\n", + " success, flag = False, True\n", + " indm = 1\n", + " ind_sm, ind_sp = 0, 0\n", + " # check occurence of numbers in front of search string\n", + " while indm < max_order and flag:\n", + " if ind_gpu_info - indm > 0:\n", + " if fname[ind_gpu_info - indm].isnumeric():\n", + " ind_sm += 1\n", + " success = True\n", + " else:\n", + " flag = False\n", + " else:\n", + " flag = False\n", + " indm += 1\n", + " # end while-loop\n", + " if not success: # check occurence of numbers after search string\n", + " ind_gpu_info = ind_gpu_info + len(search_str)\n", + " flag = True\n", + " indm = 0\n", + " while indm < max_order and flag: \n", + " if ind_gpu_info + indm < fname_len:\n", + " if fname[ind_gpu_info + indm].isnumeric():\n", + " ind_sp += 1\n", + " success = True\n", + " else:\n", + " flag = False\n", + " else:\n", + " flag = False\n", + " indm += 1\n", + " # end while-loop \n", + " if success:\n", + " return(int(fname[ind_gpu_info:ind_gpu_info+ind_sp]))\n", + " else:\n", + " raise ValueError(\"Search string found in fname, but unable to infer number of GPUs.\")\n", + "\n", + " else:\n", + " return(int(fname[ind_gpu_info-ind_sm:ind_gpu_info]))\n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "# functions for computing time\n", + "def compute_time_tot(infile):\n", + " with open(infile,'rb') as tfile:\n", + " #print(\"Opening pickle time: '{0}'\".format(infile))\n", + " total_time_sec = pickle.load(tfile)\n", + " return np.asarray(total_time_sec/60)\n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "def compute_time_tot_log(infile):\n", + " total_time_min = compute_time_tot(infile)\n", + " return np.log(total_time_min)\n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "def get_time_dict(base, wildcardspec, tfilename, gpu_id_str=\"gpu\", llog = False):\n", + " time_dict = {}\n", + " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", + " print(flist_hpc)\n", + " wrapper = compute_time_tot\n", + " if llog: wrapper = compute_time_tot_log\n", + " for tfile in flist_hpc: \n", + " ngpus = get_ngpus(tfile, gpu_id_str)\n", + " time_dict[\"{0:d} GPU(s)\".format(ngpus)] = wrapper(tfile + tfilename)\n", + " return time_dict\n", + "#\n", + "def calc_speedup(comp_time, ngpus, l_ideal= False):\n", + " nn = np.shape(ngpus)[0]\n", + " if l_ideal:\n", + " spd_data = np.array(ngpus, dtype=float)\n", + " else:\n", + " spd_data = comp_time\n", + "\n", + " spd_up = spd_data[0:nn-1]/spd_data[1::]\n", + " \n", + " if l_ideal: spd_up = 1./spd_up\n", + "\n", + " return spd_up\n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "# functions for iteration time data \n", + "def iter_time_mean_std(infile):\n", + " with open(infile, 'rb') as tfile:\n", + " time_per_iteration_list = pickle.load(tfile) \n", + " \n", + " time_per_iteration = np.array(time_per_iteration_list)\n", + " return np.mean(time_per_iteration), np.std(time_per_iteration)\n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "def iter_stat(base, wildcardspec, gpu_id_str=\"gpu\"):\n", + " stat_iter_dict = {}\n", + " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", + " for tdir in flist_hpc: \n", + " ngpus = get_ngpus(tdir, gpu_id_str)\n", + " ftname = os.path.join(tdir, fname_timing_iter)\n", + " mean_loc, std_loc = iter_time_mean_std(ftname)\n", + " stat_iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = {\"mean\": mean_loc , \"std\": std_loc}\n", + " return stat_iter_dict\n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "def read_iter_time(infile):\n", + " with open(infile,'rb') as tfile:\n", + " time_per_iteration_list = pickle.load(tfile)\n", + " return np.asarray(time_per_iteration_list)\n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "def get_iter_time_all(base, wildcardspec, gpu_id_str=\"gpu\"):\n", + " iter_dict = {}\n", + " flist_hpc = sorted(glob.glob(base + wildcardspec))\n", + " for tdir in flist_hpc: \n", + " ngpus = get_ngpus(tdir, gpu_id_str)\n", + " ftname = os.path.join(tdir, fname_timing_iter)\n", + " iter_dict[\"{0:d} GPU(s)\".format(ngpus)] = read_iter_time(ftname)\n", + " return iter_dict \n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "# functions for plotting\n", + "def autolabel(ax, rects, rot=45):\n", + " \"\"\"Attach a text label above each bar in *rects*, displaying its height.\"\"\"\n", + " scal = 1\n", + " if rot <0.:\n", + " scal = -1\n", + " for rect in rects:\n", + " height = rect.get_height()\n", + " ax.annotate('{}'.format(height),\n", + " xy=(rect.get_x() + rect.get_width()*scal, height),\n", + " xytext=(0, 3), # 3 points vertical offset\n", + " textcoords=\"offset points\",\n", + " ha='center', va='bottom', rotation=rot)\n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "def plot_computation_time(times1, times2, ngpus, names, plt_fname, log_yvals = False):\n", + " \n", + " nlabels = len(ngpus)\n", + " x_pos = np.arange(nlabels)\n", + " \n", + " bar_width = 0.35\n", + " ytitle = \"Time [min]\"\n", + " max_time = np.maximum(np.max(times1), np.max(times2))\n", + " time_order = val_order(max_time)\n", + " ymax = np.ceil(max_time/(10**time_order) + 0.5)*(10**time_order) + 10**time_order\n", + " # np.ceil(np.maximum(np.max(times1)/100. + 0.5, np.max(times2)/100. + 0.5))*100.\n", + " if log_yvals: \n", + " times1, times2 = np.log(times1), np.log(times2)\n", + " ytitle = \"LOG(Time) [min]\"\n", + " ymax = np.ceil(np.maximum(np.max(times1)+0.5, np.max(times2) + 0.5))\n", + " \n", + " # create plot object\n", + " fig, ax = plt.subplots()\n", + " # create data bars\n", + " rects1 = ax.bar(x_pos - bar_width/2, np.round(times1, 2), bar_width, label=names[0], color=colors[0])\n", + " rects2 = ax.bar(x_pos + bar_width/2, np.round(times2, 2), bar_width, label=names[1], color=colors[1])\n", + " # customize plot appearance\n", + " # Add some text for labels, title and custom x-axis tick labels, etc.\n", + " ax.set_ylabel(ytitle)\n", + " ax.set_title('Comparison {0} and {1} with convLSTM model'.format(*names))\n", + " ax.set_xticks(x_pos)\n", + " ax.set_xticklabels(ngpus)\n", + " ax.set_xlabel('# GPUs')\n", + " ax.set_ylim(0., ymax)\n", + " ax.legend()\n", + " \n", + " # add labels\n", + " autolabel(ax, rects1)\n", + " autolabel(ax, rects2)\n", + " print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n", + " plt.savefig(plt_fname+\".png\")\n", + " plt.close()\n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "def plot_speedup(comp_time_hpc1, comp_time_hpc2, ngpus, names):\n", + " fig = plt.figure(figsize=(6,4))\n", + " ax = plt.axes([0.1, 0.15, 0.75, 0.75]) \n", + " \n", + " spd_up1 = calc_speedup(comp_time_hpc1, ngpus)\n", + " spd_up2 = calc_speedup(comp_time_hpc2, ngpus)\n", + " spd_ideal= calc_speedup(comp_time_hpc2, ngpus, l_ideal=True)\n", + " \n", + " plt.plot(spd_up1/spd_ideal, label= names[0], c=colors[0], lw=1.5)\n", + " plt.plot(spd_up2/spd_ideal, label= names[1], c=colors[1], lw=1.5)\n", + " plt.plot(spd_ideal/spd_ideal, label= \"Ideal\", c=\"r\", lw=3.)\n", + " \n", + " xlabels = []\n", + " for i in np.arange(len(ngpus)-1):\n", + " xlabels.append(\"{0} -> {1}\".format(ngpus[i], ngpus[i+1]))\n", + " plt.xticks(np.arange(0, len(ngpus)-1), xlabels)\n", + " ax.set_xlim(-0.5, len(ngpus)-1.5)\n", + " ax.set_ylim(0.5, 1.5)\n", + " legend = ax.legend(loc='upper left')\n", + " ax.set_xlabel('GPU usage')\n", + " ax.set_ylabel('Ratio Speedup factor') \n", + " \n", + " plt_fname = \"speed_up_{0}_vs_{1}.png\".format(*names)\n", + " print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n", + " plt.savefig(\"speed_up_{0}_vs_{1}.png\".format(*names))\n", + "#\n", + "# ****************************************************************************************************\n", + "#\n", + "def boxplot_iter_time(time_per_iteration_data1, time_per_iteration_data2, ngpu_list, names):\n", + " nexps = len(ngpu_list)\n", + " # create data lists for boxplot-routine\n", + " data = []\n", + " for i in np.arange(nexps):\n", + " data.append(time_per_iteration_data1[\"{0} GPU(s)\".format(ngpu_list[i])])\n", + " data.append(time_per_iteration_data2[\"{0} GPU(s)\".format(ngpu_list[i])])\n", + " \n", + " # trick to get list with duplicated entries\n", + " xlabels = [val for val in ngpu_list for _ in (0, 1)]\n", + "\n", + " # Multiple box plots on one Axes\n", + " #fig, ax = plt.subplots()\n", + " fig = plt.figure(figsize=(6,4))\n", + " ax = plt.axes([0.1, 0.15, 0.75, 0.75]) \n", + " \n", + " ax.set_title(\"Time per iteration step\")\n", + " bp = ax.boxplot(data, notch=0, sym='+', vert=1, whis=1.5, showfliers=False) # Outliers for initialization are disturbing\n", + " plt.xticks(np.arange(1, nexps*2 +1), xlabels)\n", + " ax.set_xlabel('# GPUs')\n", + " ax.set_ylabel('Time [s]')\n", + " \n", + " # Reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html \n", + " box_colors = colors\n", + " num_boxes = len(data)\n", + " medians = np.empty(num_boxes)\n", + " for i in range(num_boxes):\n", + " box = bp['boxes'][i]\n", + " boxX = []\n", + " boxY = []\n", + " for j in range(5):\n", + " boxX.append(box.get_xdata()[j])\n", + " boxY.append(box.get_ydata()[j])\n", + " box_coords = np.column_stack([boxX, boxY])\n", + " # Alternate between Dark Khaki and Royal Blue\n", + " ax.add_patch(Polygon(box_coords, facecolor=box_colors[i % 2]))\n", + " # Now draw the median lines back over what we just filled in\n", + " med = bp['medians'][i]\n", + " medianX = []\n", + " medianY = []\n", + " for j in range(2):\n", + " medianX.append(med.get_xdata()[j])\n", + " medianY.append(med.get_ydata()[j])\n", + " ax.plot(medianX, medianY, 'k')\n", + " medians[i] = medianY[0]\n", + " # Finally, overplot the sample averages, with horizontal alignment\n", + " # in the center of each box\n", + " ax.plot(np.average(med.get_xdata()), np.average(data[i]),\n", + " color='w', marker='*', markeredgecolor='k', markersize=10)\n", + " \n", + " # Finally, add a basic legend\n", + " fig.text(0.86, 0.15, names[0],\n", + " backgroundcolor=box_colors[0], color='white', weight='roman',\n", + " size='small')\n", + " fig.text(0.86, 0.09, names[1],\n", + " backgroundcolor=box_colors[1],\n", + " color='white', weight='roman', size='small')\n", + " #fig.text(0.90, 0.015, '*', color='white', backgroundcolor='silver',\n", + " # weight='roman', size='medium')\n", + " #fig_transform = ax.figure.transFigure #+ ax.transAxes.inverted() #+ ax.figure.transFigure.inverted()\n", + " #ax.plot(0.1, 0.03, marker='*', markersize=30, color=\"w\", markeredgecolor=\"k\", transform=fig_transform)\n", + " fig.text(0.86, 0.03, '* Mean', color='black', backgroundcolor='white', \n", + " weight='roman', size='small', bbox=dict(facecolor='none', edgecolor='k'))\n", + "\n", + " plt_fname = \"boxplot_iter_time_{0}_vs_{1}\".format(*names)\n", + " print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n", + " plt.savefig(plt_fname+\".png\")\n", + " plt.close()\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "## some basic settings\n", + "base_dir = \"/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/\"\n", + "\n", + "wildcard_hpc1 = '20210325T095504_langguth1_juwels_container_[1-9]*gpu*' # search pattern for finding the experiments\n", + "wildcard_hpc2 = '20210325T095504_langguth1_jwb_container_[1-9]*gpu*'\n", + "\n", + "gpu_id_str = [\"gpu\", \"gpu\"] # search substring to get the number of GPUs used in the experiments,\n", + " # e.g. \"gpu\" if '64gpu' is a substring in the experiment directory\n", + " # or \"ngpu\" if 'ngpu64' is a substring in the experiment directory\n", + " # -> see wilcard-variables above\n", + "names_hpc = [\"Juwels\", \"Booster\"]\n", + "\n", + "# name of pickle files tracking computing time\n", + "fname_timing_train = \"/timing_training_time.pkl\"\n", + "fname_timing_total = \"/timing_total_time.pkl\"\n", + "\n", + "fname_timing_iter = \"timing_per_iteration_time.pkl\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_8gpus']\n", + "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_8gpus']\n", + "{'16 GPU(s)': array(53.40843068), '1 GPU(s)': array(930.4968381), '32 GPU(s)': array(45.96871045), '4 GPU(s)': array(217.45655225), '64 GPU(s)': array(35.7369519), '8 GPU(s)': array(106.4218419)}\n", + "{'16 GPU(s)': array(34.26928383), '1 GPU(s)': array(492.70926997), '32 GPU(s)': array(35.05492661), '4 GPU(s)': array(100.99109779), '64 GPU(s)': array(30.98471271), '8 GPU(s)': array(49.63896298)}\n", + "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_juwels_container_8gpus']\n", + "['/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_16gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_1gpu', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_32gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_4gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_64gpus', '/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/convLSTM_container/20210325T095504_langguth1_jwb_container_8gpus']\n" + ] + } + ], + "source": [ + "## evaluate computing time\n", + "# dictionaries with the total times\n", + "tot_time_hpc1_dict = get_time_dict(base_dir, wildcard_hpc1, fname_timing_total, gpu_id_str=gpu_id_str[0])\n", + "tot_time_hpc2_dict= get_time_dict(base_dir, wildcard_hpc2, fname_timing_total, gpu_id_str=gpu_id_str[1])\n", + "\n", + "print(tot_time_hpc1_dict)\n", + "print(tot_time_hpc2_dict)\n", + "\n", + "# dictionaries with the training times\n", + "train_time_hpc1_dict = get_time_dict(base_dir, wildcard_hpc1, fname_timing_train, gpu_id_str=gpu_id_str[0])\n", + "train_time_hpc2_dict = get_time_dict(base_dir, wildcard_hpc2, fname_timing_train, gpu_id_str=gpu_id_str[1])\n", + "\n", + "# get sorted arrays\n", + "# Note: The times for Juwels are divided by 2, since the experiments have been performed with an epoch number of 20\n", + "# instead of 10 (as Bing and Scarlet did)\n", + "ngpus_sort = sorted([int(ngpu.split()[0]) for ngpu in tot_time_hpc1_dict.keys()])\n", + "nexps = len(ngpus_sort)\n", + "tot_time_hpc1 = np.array([tot_time_hpc1_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", + "tot_time_hpc1[0] = tot_time_hpc1[0]#*2.\n", + "tot_time_hpc2 = np.array([tot_time_hpc2_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", + "\n", + "train_time_hpc1 = np.array([train_time_hpc1_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", + "train_time_hpc1[0] = train_time_hpc1[0]#*2.\n", + "train_time_hpc2 = np.array([train_time_hpc2_dict[\"{0:d} GPU(s)\".format(key)] for key in ngpus_sort])\n", + "\n", + "overhead_hpc1 = tot_time_hpc1 - train_time_hpc1\n", + "overhead_hpc2= tot_time_hpc2 - train_time_hpc2" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[492.70926997 100.99109779 49.63896298 34.26928383 35.05492661\n", + " 30.98471271]\n", + "Saving plot in file: ./total_computation_time_Juwels_vs_Booster.png ...\n", + "Saving plot in file: ./overhead_time_Juwels_vs_Booster.png ...\n", + "Saving plot in file: speed_up_Juwels_vs_Booster.png.png ...\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEFCAYAAAAYKqc0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAA010lEQVR4nO3dd3hUZfbA8e9JCAQIzSQgEJqKSJFEWmyAgAIiihQRFYG1YENl7e4urmuvu66u5afrigoiiqKABZUiiIgQJNIFUSGClAhIqCnn98d7EyOkTMhMbjI5n+e5DzN37tx77iScefPe955XVBVjjDHhJcLvAIwxxgSfJXdjjAlDltyNMSYMWXI3xpgwZMndGGPCkCV3Y4wJQ1X8DqCk4uLitHnz5n6HYYwxvktJSdmhqvEFvVbhknvz5s1ZsmSJ32EYY4zvROSnwl6zbhljjAlDltyNMSYMWXI3xpgwVOH63AuSmZlJWloaBw4c8DuUcis6OpqEhASioqL8DsUYUwbCIrmnpaVRq1Ytmjdvjoj4HU65o6qkp6eTlpZGixYt/A7HGFMGwqJb5sCBA8TGxlpiL4SIEBsba3/ZGFOJhEVyByyxF8M+H2Mql7BJ7uVBTExMSPZ777338sQTT4Rk38aY8GTJ3RhjwpAl9yCbO3cu/fv3z3s+ZswYxo8fz+LFixk0aBAA77//PtWrV+fQoUMcOHCA4447DoDvv/+evn370rFjR7p27cqaNWuO2P/TTz9NmzZtaN++PcOGDSubkzLGVDhhMVomv3vumcrKlT8HdZ9t2zbmvvsGlmofp5xyCsuWLQNg/vz5tGvXjsWLF5OVlUVycjIAo0eP5oUXXqBly5YsWrSI66+/ntmzZ/9hP4888gg//PAD1apVY9euXaWKyRgTvsIuuZdXVapU4fjjj2f16tV8/fXX3HLLLcybN4/s7Gy6du1KRkYGX375JRdddFHeew4ePHjEftq3b89ll13GhRdeyIUXXliGZ2CMqUjCLrmXtoVdWlWqVCEnJyfvef7hh926deOjjz4iKiqKs88+m1GjRpGdnc3jjz9OTk4OdevWzWvdF+aDDz5g3rx5TJ8+nQcffJDly5dTpUrY/RiNMaVkfe5B1qxZM1atWsXBgwfZtWsXs2bNynuta9euPPXUU5x22mnEx8eTnp7O2rVradeuHbVr16ZFixa8/fbbgLvxKDU19Q/7zsnJYdOmTfTo0YNHH32U3bt3k5GRUabnZ4ypGKzJFyRZWVlUq1aNJk2aMHToUNq1a0eLFi045ZRT8rZJTk5m69atdOvWDXBdLL/88kveGPSJEydy3XXX8cADD5CZmcmwYcNITEzMe392djbDhw9n9+7dqCo33XQTdevWLdPzNMZUDKKqfsdQIp06ddLD67mvXr2a1q1b+xSRk5qaytVXX83XX3/taxxFKQ+fkzEmeEQkRVU7FfRayLplROR/IrJNRFYUs11nEckSkSGhiiXUXnjhBS655BIeeOABv0MxxhggtH3u44G+RW0gIpHAo8AnIYwj5K699lpWrVpF7969/Q7FGGOAECZ3VZ0H/FrMZjcC7wDbQhWHMcZURr6NlhGRxsBA4PkAth0tIktEZMn27dtDH5wxxlRwfg6FfAq4U1VzittQVV9U1U6q2ik+vsCJvo0xxuTj51DITsCb3jDAOKCfiGSp6ns+xmSMMWHBt5a7qrZQ1eaq2hyYAlxfkRN7ZGQkSUlJJCYm0qFDB7788sug7v+hhx4K6v6MMeEtlEMhJwELgVYikiYiV4rItSJybaiO6afq1auzbNkyUlNTefjhh7n77ruDuv+jSe7Z2dlBjcEYU3GEcrTMJaraUFWjVDVBVV9W1RdU9YUCth2lqlNCFUtZ++2336hXrx7gygjcfvvttGvXjpNPPpnJkycXuX7Lli1069aNpKQk2rVrx/z587nrrrvYv38/SUlJXHbZZQBMmDCBLl26kJSUxDXXXJOXyGNiYrj11ltJTExk4cKFPpy9MaY8CL/yA3PGwrZlwd1n/STo8VSRm+Qm3wMHDrBly5a8Ur3vvvtuXot+x44ddO7cmW7duvHll18WuP6NN96gT58+/PWvfyU7O5t9+/bRtWtX/vOf/+QVFVu9ejWTJ09mwYIFREVFcf311zNx4kRGjBjB3r17SU5O5sknnwzuZ2CMqVDCL7n7JLdbBmDhwoWMGDGCFStW8MUXX3DJJZcQGRlJgwYN6N69O4sXLy50fefOnbniiivIzMzkwgsvJCkp6YhjzZo1i5SUFDp37gy4L5b69esDru9/8ODBZXXaxphyKvySezEt7LJw2mmnsWPHDo5mTH63bt2YN28eH3zwAaNGjeKWW25hxIgRf9hGVRk5ciQPP/zwEe+Pjo4mMjLyqGM3xoQHK/kbAmvWrCE7O5vY2Fi6du3K5MmTyc7OZvv27cybN48uXboUuv6nn36iQYMGXH311Vx11VUsXboUgKioKDIzMwHo1asXU6ZMYds2d2Pvr7/+yk8//eTb+Rpjyp/wa7n7JLfPHVzL+tVXXyUyMpKBAweycOFCEhMTEREee+wxjj322ELXv/rqqzz++ONERUURExPDa6+9Brgp+Nq3b0+HDh2YOHEiDzzwAL179yYnJ4eoqCieffZZmjVr5uMnYIwpT6zkbyVin5Mx4cWXkr/GGGP8Y8ndGGPCkCV3Y4wJQ5bcjTEmDFlyN8aYMGTJ3RhjwpAl9yCJiYkpcP2oUaOYMuXoaqLde++9PPHEE6UJyxhTSVlyN8aYMGTJPchUlTFjxtCqVSvOPvvsvBIBACkpKXTv3p2OHTvSp08ftmzZAsBLL71E586dSUxMZPDgwezbt8+v8I0xYSL8krtI6JYATJ06lbVr17Jq1Spee+21vBmZMjMzufHGG5kyZQopKSlcccUV/PWvfwVg0KBBLF68mNTUVFq3bs3LL78cso/HGFM5WG2ZIJs3b15eKd9GjRrRs2dPANauXcuKFSs455xzADdLUsOGDQFYsWIFf/vb39i1axcZGRn06dPHt/iNMeHBknsZUVXatm1b4OxIo0aN4r333iMxMZHx48czd+7csg/QGBNWwq9bRjV0SwC6deuWV8p3y5YtzJkzB4BWrVqxffv2vOSemZnJypUrAdizZw8NGzYkMzOTiRMnhuZzMcZUKtZyD7KBAwcye/Zs2rRpQ9OmTTnttNMAqFq1KlOmTOGmm25i9+7dZGVlMXbsWNq2bcv9999PcnIy8fHxJCcns2fPHp/PwhhT0VnJ30rEPidjwouV/DXGmErGkrsxxoQhS+7GGBOGwia5V7RrB2XNPh9jKpewSO7R0dGkp6dbAiuEqpKenk50dLTfoRhjykhYDIVMSEggLS2N7du3+x1KuRUdHU1CQoLfYRhjykhYJPeoqChatGjhdxjGGFNuhEW3jDHGmD+y5G6MMWGoyOQuIpEiYsVOjDGmgikyuatqNtBMRKqWUTzGGGOCIJALqhuABSIyDdibu1JV/xmyqMorVVjyJJx8FUTX9TsaY4wpVCB97t8DM7xta+VbKp+tS2D+XfBqO/hxpt/RGGNMoYptuavqPwBEJMZ7nhHIjkXkf0B/YJuqtivg9cuAOwEB9gDXqWpq4KH74NjOcOlC+GgkvNMXTr4aznoSqlbO7zpjTPlVbMtdRNqJyDfASmCliKSISNsA9j0e6FvE6z8A3VX1ZOB+4MUA9um/YzvD5Uuh8x2w4mV49WTYONvvqIwx5g8C6ZZ5EbhFVZupajPgVuCl4t6kqvOAX4t4/UtV3ek9/QqoOLdPVomGbo/CsC8gsiq83QtmjYHMvcW/1xhjykAgyb2mqs7JfaKqc4GaQY7jSuCjIO8z9BqdBpcvgw5jYdlz8FoipH3hd1TGGBNQct8gIuNEpLm3/A03giYoRKQHLrnfWcQ2o0VkiYgsKXf1Y6JqQI9/wcVzQXNgcjeYeytk7vc7MmNMJRZIcr8CiAfeBd4B4oA/BePgItIe+C8wQFXTC9tOVV9U1U6q2ik+Pj4Yhw6+hG4w4ltIvBZS/gmvnwJbFvkdlTGmkgokuZ+tqjepagdV7aiqY4FzSntgEWmK+8K4XFW/K+3+yoWqMXD2czDkU8jaD5NOh/l3Q9ZBvyMzxlQygST3uwNc9wciMglYCLQSkTQRuVJErhWRa71N7gFigedEZJmILCl0ZxVNs7Nh5HJo+yf4+hGY0BG2pvgdlTGmEpHCJrgQkXOBfsBQYHK+l2oDbVS1S+jDO1KnTp10yZIK9D3ww0fwyVWwdysk/xVO/asbYWOMMaUkIimq2qmg14pquW8GlgAHgJR8yzSgT7CDDFstzoWRK6D1pfDVfTAxGbZ/63dUxpgwV2jLPW8DkdrAXq+IGCISCVRT1X1lEN8RKlzLPb/178Ono+HATjjt79DlTogIi/lSjDE+ONqWe65PgOr5nlcHPgtGYJXOCQNg5EpoOQgW/M1dcE1f7XdUxpgwFEhyj85fT8Z7XCN0IYW5GnHQ/03o/xbs2uCGTC5+HHKy/Y7MGBNGAknue0WkQ+4TEekI2B06pdXqIhi10vXJz7vD3fy0c53fURljwkQgyX0s8LaIzBeRL3AjZ8aENKrKomYDuOBd6DcB0le58gVL/+3udDXGmFIIpOTvYhE5CWjlrVqrqpmhDasSEYHWl0GTHvDJ1TBnLKybCn3+B3WP8zs6Y0wFFegE2a2ANkAH4BIRGRG6kCqpmEYwcIZL6tu+gdfaQ+oLbvYnY4wpoUDquf8deMZbegCPAReEOK7KSQTa/cnd3drodPjsOpjSG37b6HdkxpgKJpCW+xCgF/CLqv4JSATqhDSqyq52Uxg8E85+AbYsdBOCLP+fteKNMQELJLnvV9UcIMu7oWkb0CS0YRlEIPEa14qvfwp8ciVM7Q8Zm/2OzBhTAQSS3JeISF3c7EspwFJcQTBTFuq0gKGzoce/YdMcGN8WVk2wVrwxpkiFJncROcN7+GdV3aWqL+BK/Y70umdMWZEI6HATjEiF2Dbw0eUwbZArRmaMMQUoquX+tPdvXitdVX9UVat65Zd6LeHiedDtcVdtcnxbWPuW31EZY8qhosa5Z4rIi0CCiDx9+IuqelPowjKFioiEzrfBcf3g41Ew42L47h3o9awrbWCMMRTdcu8PzMaVGkgpYDF+im0Dl3wJZz4I66fCq21h3Xt+R2WMKScKbbmr6g7gTRFZraqpZRiTCVREFUj+CxzXHz4aCdMGQuvh0PNpiK7nd3TGGB8VO1rGEnsFEN8eLvva1Yhf+ya82g42fOh3VMYYHwVafsCUd5FRcPq9cOkiqFYPpp4HM6+Eg7v9jswY4wNL7uGmQQcYngJd7oaV493drT/Z3CrGVDaB1JaJFZFnRGSpiKSIyL9FJLYsgjNHqUo16PqQu+BapQZMOcfVqTmUUfx7jTFhIZCW+5u4kgODcXVmtuNqulc6OTk5ZGQc8DuMwDVMhsu/gY63QOr/uUqTmz73OypjTBkIZILsFara7rB1y1X15JBGVgg/J8jetOlXkpPvp1GjurRs2eCIJTY2xpe4ApL2BcwcBbu+hw43w5kPQZTNlmhMRVbUBNnFTtYBfCIiw4DcWyGHADODFVxFEh0dxV139WPduq2sW7eVN974in37DuW9Xq9eTVq2rH9E0m/UqC4RET5f3kg405UvmHeXm+3phw+hz3hofLq/cRljQiKQlvseoCaQO/dbBLDXe6yqWjt04R3Jz5b74XJycti8eTfr12/NS/i5y6+/7s3brnr1qpxwwpFJv3nzOKKiIss+8I2zYeYVsGcTdLwVzrgPqkSXfRzGmFIpquVebHIvb8pTci9KenrGEQl/3bqtbN68K2+bKlUiaN487oikf8IJ9alRo1poAzy0Bz6/Db59EY5pDee+Csd2Du0xjTFBVarkLiLdClqvqvOCEFuJVZTkXpi9ew/ma+lvy0v6P/64g+zs3yfGbty4npfs6+dL+iHo1/9xphsPv/cX6HIXnDrOjbYxxpR7pU3u0/M9jQa6ACmq2jN4IQauoif3whw6lMWPP+44oqW/fv02Dhz4fT7yY46pScuWDTjxxN8Tfm6/vogc3cEP7IK5f3bj4uNOhnNfg/pJwTgtY0wIBbVbRkSaAE+p6uBgBFdS4ZrcC5OTk8PPP+8qIOlvZefOfXnb1axZ7Q/9+rlJv3nzWKpUCbBf//vp8Olo2L/DteC73O3ufDXGlEvBTu4CrFTVNsEIrqQqW3IvjKoW0q+/jS1bduVtFxUVSYsWcUck/eOPr0+NGlWP3PH+dJh9E6x5Axp0hL7jIa7dkdsZY3xX2m6ZZ4DcjSKAJOBHVR0ezCADZcm9eBkZB1i/ftsRif+nn9Lz+vVFhISEeoclfdfyr1evpqsR/9l1cGg3nPYPV0M+IpCRs8aYslLa5D4y39MsXGJfEMT4SsSS+9E7eDCLH3/cntfC/+67X1i3bisbNmz/Q79+XFwMLVs2IOnEaIYfO4EWWZ9zKLYTUee/jsSe5OMZGGPys6GQpkjZ2Tmkpf16xAie9eu3snv3Pga0+o4He82hRlQWr2/ox/Kqg4mLr0tcXAyxsTHExcXkPY6NjaF69QK6e4wxQXdUyV1ElvN7d8wRVLV9cMIrGUvuZUdV2b59D+vWbSVt7Urab32Q1lWXsHRbM0ZP78/mXQVfbK1ZsxqxsTWJi6uVl/AP/wJwr9UkNjaGqlWtu8eYo3G0yb2Z9/AG79/XvX+H4+5MvauYg/4PN1XftsNr03ivC/BvoB+wDxilqkuLORdL7n5ShVWvwWfXonVOYN95H7BjXw3S0/eyY8ceduzIID3dLbmP86/LzMwucLe1a0cX+EVwzDG5j2vlfVnUq1cj8NE/xoS50va5f6Oqpxy2bqmqdijmfd2ADOC1QpJ7P+BGXHJPBv6tqslFBoMl93Jh42yYej7UbgoXzYKYRsW+RVX57bcD7Nixp8gvgPyPc3KO/N0UEerWrfGHLqGC/jrI/bKoW7e6/3V9jAmR0hYOExE5I/ciqoicTmDT880TkeZFbDIAl/gV+EpE6opIQ1XdEkBMxk9Ne8Lgj+HdfjC5O1w0G2o3KfItIkKdOtWpU6c6xx9fv9hD5OTksHPnPn799Y9/FRz+RbBmzRbS0/eyc+feAvcTGRnBMcfUPOJL4MgvBvdlULt29NHfDGZMORJIcr8S+J+I1PGe7wKuCMKxGwOb8j1P89aV7+Ru//EPsx6uahr0vUYAsd7SsrQ7Syt1OMYUrRwOTCk2uatqCpCYm9xVtcwn5RSR0cBogKZNg59IjDEm3AQyzV4DEXkZeFNVd4tIGxG5MgjH/hnI/7d8grfuCKr6oqp2UtVO8fHxQTi0McaEt0CuNI3HTc6Re9XsO2BsEI49DRghzqnA7grR365qS0HLtlR4Ng6ebwg7VvkfT4iXd99ZQqOGY3ni8Y98j8WWcrCUQ4Ek9zhVfQtvsg5VzQIKHtOWj4hMAhYCrUQkTUSuFJFrReRab5MPgQ3AeuAl4PqjOQFTTsS3h6FzQXPgrbNgxwq/IwqpQYM6MmRIJ5566hMWLdrgdzjGHCGQoZBzcZNjf6qqHbxW9qOq2r0M4juCDYUs535dC2/3hKyDcNFnYV06OCPjAL17P0lmZjaffnobdevanLSmbBU1FDKQlvstuC6U40VkAfAabny6MUc6phUM/dxNvv12T9ia4ndEIRMTE82zzw5n69bd3HnnW1S0Uh4mvAUyXn0p0B04HbgGaKuq34Y6MFOB1TsBLv4cqtaBt3vBlkV+RxQyp5zSjNtvP5fp01OZNCl8z9NUPIGMlqkB3AWMVdUVQHMR6R/yyEzFVqeFS/DV42DKOZD2hd8RhcwNN/TkzDNbMm7cVNat2+p3OMYAgXXLvAIcAk7znv8MPBCyiEz4qN3UddHUbAjv9oVNc/2OKCQiIiJ4+unLiI6O4vrrX+fgwSy/QzImoOR+vKo+BmQCqOo+wG7TNIGp1di14Gs3c+UKfvrM74hC4thj6/Cvfw1j5cqfefjhGX6HY0xAyf2QiFQHV/5XRI4HDoY0KhNeah4LQ+dA3RNgan/44WO/IwqJ3r3bMWrUmbz44ufMmbPa73BMJRdIcv878DHQREQmArOAO0IalQk/Neq7BB/bBt4f4CbjDkPjxp3PSSc15Oab32D79j1+h2MqsUBGy3wKDAJGAZOATqo6N7RhmbBUPdaVCI5PhGmDYN27fkcUdNWrV+X55y8nI+MgY8e+QU5Ojt8hmUoq0ELX3YFeQA+ga+jCMWEvuh4M+RQadIbpQ2HNZL8jCrpWrRry978PYM6cNfz3v/P8DsdUUoEMhXwOuBZYDqwArhGRZ0MdmAlj1erAkJnQ6DT48FJYNcHviIJuxIjT6dOnHQ8+OIPly63msCl7gbTcewJ9VPUVVX0FN3NSz9CGZcJe1Vpuwo+E7vDRCFgx3u+IgkpEeOKJi4mNjeH6619n3z4bg2DKViDJfT2Qv4h6E2+dMaUTVRMGzoBm58DMP8G3L/odUVDFxsbwzDOXsWHDdsaNm+p3OKaSCSS51wJWi8hcr4jYKqC2iEwTkWkhjc6Ev6gacOH70KIffHoNfBNePX5nnNGSMWN6MWnSIqZPX+Z3OKYSCWSavXtCHoWp3KpEwwXvwoyLYfYYyDkEHf/sd1RBc9ttfVmwYB233z6ZU05pSkLCMX6HZCqBQIZCfq6qn+Muph4DZOSu89YbU3pVqsH5b8OJQ2DuLfD1o35HFDRRUZE8++xwcnKUG26YQFZWsdMhGFNqhSZ3EZkhIu28xw1xyf0K4HURGVs24ZlKJTIKzpsEJ10C8++Chff7HVHQNGsWxyOPXMTixT/w1FOf+h2OqQSKarm38KpAAvwJN1nH+UAyLskbE3wRVeDc16HN5fDlPbBgXLmdxqykbPYmU5aKSu6Z+R73wk2Lh6ruwZtyz5iQiIiEPq9Auyvhqwdg/t1hk+AfemgwTZocw5gxE9i1a5/f4ZgwVlRy3yQiN4rIQKADrr4MXhGxqLIIzlRiEZHQ+0VIvBYWPwqf3xoWCT4mJprnnrvcZm8yIVdUcr8SaIurKXOxqu7y1p+Kq/FuTGhJBPR6Dk65CVL+BbNvdBNwV3A2e5MpC4UOhVTVbbiyA4evnwPMCWVQxuQRgR5PQWRVWPIE5GTC2c+7xF+B3XBDT+bP/45x46bSuXMLWrZs4HdIJsxU7P8hpnIQgW6PQfJf3F2sM6+EnIo9nNBmbzKhZsndVAwicMYDcNq9sHI8fDwScip2QrTZm0woWXI3FYcInP53OPNBWD0RPrgMsjOLf185ZrM3mVAJpORvgohMFZHtIrJNRN4RkYSyCM6YAiX/Bbo9Dt+95UoWZB/yO6JSsdmbTCgE0nJ/BZgGNAQaAdOx0TLGb51vgx7/hvVTYdpgyKq4JXXzz9508802e5MJjkCSe7xXyz3LW8YD8SGOy5jidbjJDZXcMAPevxAy9/sd0VHLnb1p7lybvckERyDJPV1EhotIpLcMB9JDHZgxAUm6Dnr/F36cCe+dD5kV965Pm73JBFMgyf0KYCjwC7AFGIKrNWNM+XDyldB3PGyaA+/2g0MZfkd0VGz2JhNMgZT8/UlVL1DVeFWtr6oXqurGsgjOmIC1HQHnToCfv4B3+sLB3/yO6KjY7E0mWAq9Q1VE7lDVx0TkGeCIAhiqelNIIzOmpFpf4soGf3AJvNMbBn0M0XX9jqrEcmdveuaZzzjrrJM4//wkv0MyFVBRMzHlDrpdUhaBGBMUJw6BiCiYfhFMORsGfwLVK97MRzZ7kymtQrtlVHW693Cfqr6afwEq7lUrE/5OGAAD3oMdK+DtnrBvu98RlZjN3mRKK5ALqncHuM6Y8uO4fnDhNNi5Ft7qAXu3+h1RidnsTaY0ippm71yvv72xiDydbxkPBFTUQ0T6ishaEVkvIncV8HpTEZkjIt+IyLci0u+oz8SYwzXvDQM/gN0/wFtnQcZmvyMqsT/O3vS93+GYCqSolvtmXH/7ASAl3zIN6FPcjkUkEngWOBdoA1wiIm0O2+xvwFuqegowDHiupCdgTJGa9oTBH8OeNJjcHX7b5HdEJfbgg272phtusNmbTOCK6nNP9frXTzisz/1dVd0ZwL67AOtVdYOqHgLeBAYcfhigtve4Du4LxZjgSugKg2fCvm3wVnfY/aPfEZVIrVpu9qZt236z2ZtMwALpc28uIlNEZJWIbMhdAnhfYyB/MynNW5ffvcBwEUnDzdF6YyBBG1NijU+HIZ/CgZ2uBb+rYk1QbbM3mZIKtHDY87h+9h7Aa8CEIB3/EmC8qiYA/YDXRY6cYkdERovIEhFZsn17xRv5YMqJhl3golmQmQGTu8HOdX5HVCI33NCTM89sybhxU1m3ruJdIDZlK5DkXl1VZwHi3a16L3BeAO/7GWiS73mCty6/K4G3AFR1IRANxB2+I1V9UVU7qWqn+HirWWZKoUEHGDrHlQme3B3SK04NdZu9yZREIMn9oNeaXiciY0RkIBATwPsWAy1FpIWIVMVdMJ122DYbgV4AItIal9ytaW5CK749XDzXTbb91lluPHwFYbM3mUAFktxvBmoANwEdgcuBEcW9SVWzgDHATNzdrm+p6koRuU9ELvA2uxW4WkRSgUnAKLWrRaYsxLaBiz+HiCow+SzYtszviAJmszeZQEhJc6k3xHGYqk4MTUhF69Spky5ZYhURTJDsXO/uYs3McKUKju3kd0QB2b//EOed9xQ7duxh1qw7iI+v5XdIxgcikqKqBf7SFnUTU20RuVtE/iMivcUZA6zHlQA2puKrd4JrwVetA2/3gs1f+R1RQKpXr8pzz9nsTaZwRXXLvA60ApYDVwFzgIuAgap6+Hh1YyquOi1cgq8R76pJpn3hd0QBOemkhtxzzwU2e5MpUFHJ/ThVHaWq/4cbstgG6KOqy8okMmPKUu2mMPRzqNkQ3u0Lm+b6HVFARo48w2ZvMgUqKrln5j5Q1WwgTVUPhD4kY3xSq7Frwddu5mZ0+ukzvyMqls3eZApTVHJPFJHfvGUP0D73sYhUzGlujClOzWPdOPi6J8DU/vDDx35HVCybvckUpKjaMpGqWttbaqlqlXyPaxf2PmMqvBr1XYKPbQPvD4Dvpxf/Hp/lzt40adIipk9f5nc4phwIZJy7MZVP9VhXqiA+EaYNgnXv+h1RsW67rS8dOjTj9tsnk5b2q9/hGJ9ZcjemMNH1XLGxBp1h+lBYM9nviIpkszeZ/Cy5G1OUanVgyExodDp8eCmsClbNvNCw2ZtMLkvuxhSnai0Y/BEkdIePRsCKV/yOqEg2e5MBS+7GBCaqJgycAc3OgZlXQOoLfkdUJJu9yVhyNyZQUTXgwvfhuPPgs+tg1o2udHA5ZLM3GUvuxpRElWi4YCp0vAWW/Qfe6lFuJ9622ZsqN0vuxpRUZBSc9SSc9yZsT4UJHSFtvt9RFchmb6q8LLkbc7ROuhguXeQuuL7dE5b+G8pZ94fN3lR5WXI3pjTi2sJli6HFeTBnLHw4HDL3+h3VH9jsTZWTJXdjSqtaHRjwLpz5IKyZBG+cWu4m37bZmyofS+7GBINEQPJfYPDH7gLrxM7lribNuHHnc9JJDbn55jfYvn2P3+GYELPkbkwwNe8Nw1OgzvHw3gWwYBzklI8yADZ7U+Viyd2YYKvTHIZ9AW3/BF894EoH7y8fhbzyz9700ks2e1M4s+RuTChEVYc+L8PZL8DGWW645NZv/I4K+H32pocestmbwpkld2NCRQQSr4Fh8yEnC948HVa+6ndUNntTJWHJ3ZhQa5gMl6dAw9Pg41Hw2fW+ly2w2ZvCnyV3Y8pCjfow5BPodDukPg+Tu8Oen30NyWZvCm+W3I0pKxFVoPtjcP7bsGMFTOgAmz73NSSbvSl8WXI3pqydOAQu+xqq1YO3e8GSf/pWtsBmbwpfltyN8UNsa5fgTxgAn98KM4bBoQxfQrHZm8KTJXdj/FKtNpw/Bbo+AuumwBvJ8OtaX0IZNKgjgwd3tNmbSqg818mX8hxcQTp16qRLlizxOwxjguunWfDBMMg+CH1fg5YXlnkIe/YcoHfvJ8jMzOazz26nbt0aZR5DeXbwYBarV29m2bKNLFu2idTUjQwc2IGbbjrHt5hEJEVVOxX0WpWyDsYYU4BmvVzZgmmDYdpA6HI3nHE/RESWWQi5szcNGPA0d9zxFv/3fyMRkTI7fnmSnZ3D+vVbWbZsk5fMN7J69WYOHXLXJGJjY0hKakLTprE+R1o4S+7GlBe1m7obnmbfCF8/DFuXQL83oEZcmYWQO3vTww9/wKRJi7j00lPL7Nh+UVU2bkzPS+SpqZtYvjyNvXvdzV0xMdVITGzCVVd1JympCUlJTWncuF65/+KzbhljyqNv/wuzb4CaDeGCd6BBxzI7dE5ODsOGvUBKyk98/PEttGzZoMyOXRa2bfstr2slN5nv3Olq8FerVoW2bRuTmOiSeFJSU44/Pp6IiPJ5ebKobhlL7saUV78shmlDYN9W6PUcnHxF2R36l9306vU4jRrVZcaMsVSrVjH/yN+9ez+pqS6B5yb0LVt2ARARIZx0UsO8RJ6Y2ISTTmpI1aoV51wtuRtTUe3bAR9cAhs/g/ajocfTUKVamRz6k09WMGrUy4we3Z17772wTI5ZGvv2HWLlyp9JTd3IN9+4hL5hw/a811u0iCMxsSlJSU1ITGzKySc3pkaNsvksQ8UuqBpTUdWIcxOALPgbfP0IbFvmhk/WbhLyQ+efval791b06NE65McMVGZmNmvWbCE19ffulbVrfyE729Wob9iwDomJTRk6tDOJia5VXtlG/4S05S4ifYF/A5HAf1X1kQK2GQrcCyiQqqqXFrVPa7mbSmvdVPh4JERGQ/83oWnPkB9y//5DnHfeU+zYsYdZs+4gPr5WyI95uJycHDZs2JGvRb6RlSs3c+BAJgB169b4Q9dKUlJTjj22TpnH6QdfumVEJBL4DjgHSAMWA5eo6qp827QE3gJ6qupOEamvqtuK2q8ld1Oppa+BaYNg51p381On21xp4RBas2YL/fr9i1NPPZ4JE64O6cVFVeXnn3d5LXLXtZKauok9ew4Abjap9u0T/pDImzWLLfcjV0LFr26ZLsB6Vd3gBfEmMABYlW+bq4FnVXUnQHGJ3ZhKL/YkuGwRzLwS5t0BWxZB31egauha1LmzN/3lL+/w0kvzuOaas4K27/T0jHwXO92yY4crwxAVFUnr1g0ZOLBDXl95y5YNqFKl7Mb+V2ShTO6NgU35nqcByYdtcyKAiCzAdd3cq6ofH74jERkNjAZo2rRpSII1psKoWgv6T4aUZJh3J0xcCRdMdYk/REaOPIPPP1/LQw/N4PTTT+DkkxNKvI+MjAN8+22a1yJ3feWbNrlKlCJCy5b16dmzdV4ib926EdHRUcE+lUojlN0yQ4C+qnqV9/xyIFlVx+TbZgaQCQwFEoB5wMmququw/Vq3jDH5bJwDMy6GrP3QdzycODhkh0pPz+Ccc56gZs1qzJx5S5EjTQ4ezGLVqp/zbtP/5puNrF+/La8WS5Mmx+SNWklKakr79gnExESHLPZw5Ve3zM9A/kv6Cd66/NKARaqaCfwgIt8BLXH988aY4jTtAZcvdWULpg+BznfAmQ+62vFBljt709ChzzNu3FSefHIY4G7V/+67X/ISubtVfwuZme5W/fj4WiQmNmHAgFPy+spjY2OCHp/5o1Am98VASxFpgUvqw4DDR8K8B1wCvCIicbhumg0hjMmY8FMrAS6eB3NuhsWPubIF570JNeKDfqjc2ZueeeYzMjOz2bTpV779No39+920gbVqRZOY2ITRo7vn3eHZqFHdSnvB00+hHgrZD3gK15/+P1V9UETuA5ao6jRxP/Engb5ANvCgqr5Z1D6tW8aYIqx4BT67zk3rd8E7cGznoB8iMzObIUOeZfnyNNq2bZxXbyUxsSnHHRdXbm/VD0d2h6oxlcnWpW645N4t0PM/0P7qoB8iOzsHVbWRKz4rKrnbV6wx4aZBB1c+OOEs+HQ0zLwKsg4E9RCRkRGW2PduhYzNfkdRKEvuxoSj6rEw6ENI/iuseBne7Aq//eR3VBXb/nRY9y7MGgPj28ILx0LKv/yOqlBWW8aYcBURCWc+4PrdPxoBr3d0ZQuane13ZBXDwd/g5/mwcbZbtqcCClVqQEJXaDMSjuvnd5SFsuRuTLg7YQBcttj1w7/TB854ELrcGfKyBRVO5j74eQFsmgObZsMvS0CzIbIaNDodzrgPmvRwX5aRVf2OtliW3I2pDI45ES79Cj65Cr64G3752t30VK2235H5J+sg/LLo95b5lq8gJ9PdI3BsF0i+G5r0hEanQZWKd4OVJXdjKouqMXDeJGh4Knx+G0zsDAOmQmwbvyMrGzlZrjW+aba7s3fzAndnr0RA/Q7Q8c+uZd74TPdZVXCW3I2pTESg41g3omb6UJjYBfq8Aq0u8juy4MvJdv3km+a4lvnP8+HQHvdafHs3+UmTnpDQDaLr+hpqKFhyN6YySujmhktOvwhmDIUtt0K3R0JStqDMqEL6KpfIN82BtLlwYKd7rV4raD3ctcybnBWSu3fLmwr8kzTGlEqtxnDxXJh7C6Q8CdtSXLXJGvX9jiwwqrBr/e8t801zYJ9XNbxOCzhhoJvQpEkPiGnkb6w+sORuTGUWWRV6/cddQPzsGni9g5vGr9GpfkdWsN82/p7IN86GjDS3PqYRNOvtEnnTHi65V3KW3I0x0HaE64eeNggmd4OeT0P7a/wfLrn3F3fxc5M3omW3V1ewepyXyHu6fvN6Lf2PtZyx5G6MceonwWVL4KPhrvjYlq+g1/MQVb3sYtifDpvm/t4y/3W1W1+tjiun0OFml9Tj2rpRLqZQltyNMb+rfgwMnAFf/gO+ug+2f+uqS4aqm+Pgbkib//vwxNy7QKNqQuOu0O5PrnUen+TuuDUBs+RujPkjiYAz/uGVLRgOEzrBeW9A8z6l33fm3t/vAt0429We1xx3F2jjM+CM+/PdBWpT7JWGJXdjTMGO7++6aaYNgnfOdYk3+e6SdYdkHXTdOxtnu9b5lkW/3wXa8FRX2KxpT/e4At4FWp5ZcjfGFK7eCXDpQvhkNCz4m1e24NXCb/rJznSt8dwRLZsXuHLDEgENOkLHW9xolkZnhMVdoOVZhZusQ0S2A37XLo0Ddvgcg58q+/mDfQZgnwH4/xk0U9UC78iqcMm9PBCRJYXNflIZVPbzB/sMwD4DKN+fgY0lMsaYMGTJ3RhjwpAl96Pzot8B+Kyynz/YZwD2GUA5/gysz90YY8KQtdyNMSYMVdrkLiL/E5FtIrIixMcZLCIqIuXyirqIRIrINyIyIwT7ThKRr0RkmYgsEZEuwT7G0RCRP4vIShFZISKTRCSod8+IyEXe/nMO/7mLSHsRWei9vjzYxy4ipkJ/30XkRhFZ48X0WJCP20xElnq/AytF5FpvfQ0R+SDfcR8J5nELiCNaRL4WkVTveP/I99pEEVnr/T78T0SCfmtscT93EZkW9FykqpVyAboBHYAVJXhPvRIeoxYwD/gK6OT3ORcS4y3AG8CMYH8GwCfAud7jfsDccnC+jYEfgOre87eAUUE+79ZAK2Bu/p877qbBb4FE73ksEFlG513g7zvQA/gMqOY9rx/kz6Jqvn3HAD8CjYAaQI9828zP/V0J0fkLEOM9jgIWAafm+90Ub5kEXBfkz6DInzswyPs/GHAuCmSptC13VZ0H/FrCt73nfcNeICKB3N17P/AocKDEAZYBEUkAzgP+W4K3LfFaOj1Fiq2xqkDuDMx1gM1HEWYoVAGqez/DGgQWV8DnraqrVXVtAS/1Br5V1VRvu3RVzS5p8EejiN/364BHVPWgt922AHZ3sdfKvVVEipzSSFUP5e4bqIbXW6Cq+1R1Tu42wFIgIbCzKTl1MrynUd6i3msfeq8r8HWAcdzu/SVwjYgUN8t4oT93EYnBNbAeKPlZFa3SJvejdBbwT2AIsFpEHhKREwraUEQ6AE1U9YMyjK+kngLuAHJK8J4Tca2bMcAqEfmLiBQ2zc1Y4HER2QQ8Adx99KEGh6r+7MWyEdgC7FbVTwJ4a0nOu6h9qIjM9Loq7ijh+0PhRKCriCwSkc9FpHNxb1DVF4BzcV+M80Rkioj0FSm46IyINBGRb4FNwKOquvmw1+sC5wOzSnkuRfK6IJcB24BPVXXRYa9HAZcDHxe3L1X9i7ftccBSEXlFRM4sZPOifu73A08C+0p8QgEEWWkXoDlH+acQrkX6IJAFDD7stQjcn+TNvedzKWfdMkB/4Dnv8VkE2C1z2D7ica3+LKBLAa8/nfvZAEOBz8rBedcDZnuxRwHvAcODed75tvvDzx24DdclFIdLjAuBXmV47kf8vgMrgGdwXRJdvPikBPsUXLfGZmBaMds2wrWMG+RbVwX4CBhbhp9DXWAO0O6w9S8BTx3F/iKB4cBu4OkCXi/w5w4k5X5mpclFhS3Wci9E7re8t9yXb311EbkUeBfoA9wMfHrY22sB7YC5IvIjcCowrZxdVD0DuMCL702gp4hMyL+B1+LK/Qyuzbe+johcA0wDWgJX4PoUDzcS9zkBvI1LHn47G/hBVberaiYuvtPzbxCE8y5MGjBPVXeo6j7gQ1w/uJ/SgHfV+Rr3V1xc/g1E5MHcz+Ow9V2A53Bf4m9RzF9m6lrsK4Cu+Va/CKxT1adKeR4BU9VduOTeN3ediPwd96V9S0Hv8Vrmy0Tkw3zrRER6Aq8C9+A+hycLeHthP/fTgE7e/8EvgBNFZG6pTzBXWX1blseFEn5bAo/hvoH/A5xSgvfNpZy13A+L7ywCv6A6AfgeeARoWcy2q4GzvMe9gJRycK7JwEpcC0pw/zFvDOZ5F/Zzx/3VsNQ7dhXchczzyvDcj/h9B64F7vMen4jrOimy5Y7Xh4y7YD4UqFrEtgn8fvG6HvAdcLL3/AHgHSCiDM49HqjrPa6Ou4Db33t+FfBlbpwB7u8yYK0Xfz+KuDAeyM+9pLkooBjL6hervC24/tMtQCbum/XKAN7TD4g+imOFU3K/AKgS4LZnAilAKm50Qke/z9WL6x/AGlwr8nW80RxBPO+B3u/UQWArMDPfa8O9L5cVwGNleM4F/r7jRqpM8OJZCvQMYF8dcdUIAznuOd4XQar372hvfQLuguZqYJm3XBXC828PfOPFsAK4J99rWbgv7tw47glgf2cC8SU4fpE/91Akd7tD1RhjwpD1uRtjTBiy5G6MMWHIkrsxxoQhS+7GGBOGLLkbY0wYsuRuwoqINBCRN0Rkg4ikeJX4BnqvnSUiu72bUVZ7N64gIqNE5D+H7WduObvpzJgSseRuwoZX0Os93N2Ax6lqR2AYfywENV9Vk4BOwHCvBpAxYceSuwknPYFD6gpbAaCqP6nqM4dvqKp7cTdYFVj4rTAi8qOIxHmPO+XeLi4i3fOVLPhGRGqJSIyIzPKKRS0XkQH59jNOXA3xL8TVlL/NW3+8iHzs/dUxX0ROOpoPwphAytYaU1G0xd1lWSwRicXV/LkfKLYSYgBuA25Q1QVeGdfcMs8DVfU37wvhKxGZhvurYTCQiCtethT3RQOu1sq1qrpORJJxtVt6BiE+U8lYcjdhS0Sexd0mfkhVcxN4VxH5Blcg6xFVXVlE33pJbt9eAPxTRCbiCnGleSVkHxKRbt7xGgMNcEXb3lfVA8ABEZnuxRuDK2L2tvxeMr5aCWIwJo8ldxNOVuJaxACo6g1ei3lJvm3mq2r/w96XjivulN8xwI4CjpHF792ZeVOlqeojIvIBrv7QAhHpg/vLIB5XUyfTq/5X1LR6EcAu75qAMaVife4mnMwGokXkunzragTwvsXAGSJyLLi+dFyLeVMB2/6IK5wF+b5IROR4VV2uqo96+zsJN/vUNi+x9wCaeZsvAM4XN69nDK62Pqr6G/CDiFzk7VNEJDGA+I05grXcTdhQVRWRC4F/ebPdbAf2AncW876tInIz8KE3m1AGcImqFjRD1T+Al0Xkfly1z1xjvQSeg/sL4iNcXf/pIrIc99fDGu94i72+929xVSOX4yZ6AFdK9nkR+RuuP/5NXEVFY0rEqkIa4wMRiVHVDBGpgZtEfbSqBnQx2JhAWMvdGH+8KCJtcH3wr1piN8FmLXdjjAlDdkHVGGPCkCV3Y4wJQ5bcjTEmDFlyN8aYMGTJ3RhjwpAld2OMCUP/D4LzvOpD9rk/AAAAAElFTkSuQmCC\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# plot the computing time\n", + "print(tot_time_hpc2)\n", + "plot_computation_time(tot_time_hpc1, tot_time_hpc2, ngpus_sort, names_hpc, \\\n", + " \"./total_computation_time_{0}_vs_{1}\".format(*names_hpc), log_yvals=False)\n", + "\n", + "plot_computation_time(overhead_hpc1, overhead_hpc2, ngpus_sort, names_hpc, \\\n", + " \"./overhead_time_{0}_vs_{1}\".format(*names_hpc))\n", + "# plot speed-up factors\n", + "plot_speedup(tot_time_hpc1, tot_time_hpc2, ngpus_sort, names_hpc)" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "## evaluate iteration time\n", + "# get iteration times\n", + "iter_data_hpc1 = get_iter_time_all(base_dir, wildcard_hpc1, gpu_id_str=gpu_id_str[0])\n", + "iter_data_hpc2 = get_iter_time_all(base_dir, wildcard_hpc2, gpu_id_str=gpu_id_str[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving plot in file: boxplot_iter_time_Juwels_vs_Booster.png ...\n" + ] + } + ], + "source": [ + "# plot the iteration time in box plots\n", + "boxplot_iter_time(iter_data_hpc1, iter_data_hpc2, ngpus_sort, names_hpc)" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "def get_slowiter(iter_time, threshold):\n", + " inds_slow = np.where(iter_time > threshold)[0]\n", + " return iter_time[inds_slow], np.shape(inds_slow)[0]\n", + "\n", + "def ana_slowiter(itertime1, itertime2, thres, names):\n", + " slowt1, nslow1 = get_slowiter(itertime1, thres)\n", + " slowt2, nslow2 = get_slowiter(itertime2, thres)\n", + " \n", + " if nslow1 > 0:\n", + " print(\"{0:d} slow iteration steps on {1} with averaged time of {2:5.2f}s (max: {3:5.2f}s)\"\\\n", + " .format(nslow1, names[0], np.mean(slowt1), np.max(slowt1)))\n", + " else: \n", + " print(\"No slow iterations on {0}\".format(names[0]))\n", + " \n", + " if nslow2 > 0:\n", + " print(\"{0:d} slow iteration steps on {1} with averaged time of {2:5.2f}s (max: {3:5.2f}s)\"\\\n", + " .format(nslow2, names[1], np.mean(slowt2), np.max(slowt2)))\n", + " else: \n", + " print(\"No slow iterations on {0}\".format(names[1]))" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "***** Analyse single GPUs experiments *****\n", + "1 slow iteration steps on Juwels with averaged time of 5.18s (max: 5.18s)\n", + "No slow iterations on Booster\n", + "***** Analyse 4 GPUs experiments *****\n", + "No slow iterations on Juwels\n", + "No slow iterations on Booster\n", + "***** Analyse 8 GPUs experiments *****\n", + "No slow iterations on Juwels\n", + "No slow iterations on Booster\n", + "***** Analyse 32 GPUs experiments *****\n", + "No slow iterations on Juwels\n", + "No slow iterations on Booster\n", + "***** Analyse 32 GPUs experiments *****\n", + "No slow iterations on Juwels\n", + "No slow iterations on Booster\n", + "***** Analyse 64 GPUs experiments *****\n", + "No slow iterations on Juwels\n", + "No slow iterations on Booster\n" + ] + } + ], + "source": [ + " \n", + "## settings\n", + "names = [\"Juwels\", \"Booster\"]\n", + "slowiter_time = 5. # arbitrary threshold for slow iteration steps\n", + "\n", + "# analyze single GPU experiments\n", + "print(\"***** Analyse single GPUs experiments *****\")\n", + "itertime_juwels = iter_data_hpc1[\"1 GPU(s)\"]\n", + "itertime_booster = iter_data_hpc2[\"1 GPU(s)\"]\n", + "\n", + "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n", + "\n", + "# analyze 4 GPUs experiments\n", + "print(\"***** Analyse 4 GPUs experiments *****\")\n", + "itertime_juwels = iter_data_hpc1[\"4 GPU(s)\"]\n", + "itertime_booster = iter_data_hpc2[\"4 GPU(s)\"]\n", + "\n", + "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n", + "\n", + "# analyze 8 GPUs experiments\n", + "print(\"***** Analyse 8 GPUs experiments *****\")\n", + "itertime_juwels = iter_data_hpc1[\"8 GPU(s)\"]\n", + "itertime_booster = iter_data_hpc2[\"8 GPU(s)\"]\n", + "\n", + "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n", + "\n", + "# analyze 16 GPUs experiments\n", + "print(\"***** Analyse 32 GPUs experiments *****\")\n", + "itertime_juwels = iter_data_hpc1[\"16 GPU(s)\"]\n", + "itertime_booster = iter_data_hpc2[\"16 GPU(s)\"]\n", + "\n", + "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n", + "\n", + "# analyze 32 GPUs experiments\n", + "print(\"***** Analyse 32 GPUs experiments *****\")\n", + "itertime_juwels = iter_data_hpc1[\"32 GPU(s)\"]\n", + "itertime_booster = iter_data_hpc2[\"32 GPU(s)\"]\n", + "\n", + "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)\n", + "\n", + "# analyze 64 GPUs experiments\n", + "print(\"***** Analyse 64 GPUs experiments *****\")\n", + "itertime_juwels = iter_data_hpc1[\"64 GPU(s)\"]\n", + "itertime_booster = iter_data_hpc2[\"64 GPU(s)\"]\n", + "\n", + "ana_slowiter(itertime_juwels[1:], itertime_booster[1:], slowiter_time, names)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Summary\n", + "- Occasionally, a few iteration steps are slow\n", + "- However, performance degradation seems to be much worser on Booster than on Juwels\n", + "- Higher chance for slow iteration steps on Booster in general" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "metadata": {}, + "outputs": [], + "source": [ + "def boxplot_iter_total_time(iteration_time, total_time, ngpu_list, name, log_yvals=False):\n", + " nexps = len(ngpu_list)\n", + " bar_width = 0.35\n", + " # create data lists for boxplot-routine\n", + " iter_time_all = []\n", + " for i in np.arange(nexps):\n", + " iter_time_all.append(iteration_time[\"{0} GPU(s)\".format(ngpu_list[i])])\n", + " \n", + " # trick to get list with duplicated entries\n", + " xlabels = [val for val in ngpu_list for _ in (0, 1)]\n", + " nlabels = len(xlabels)\n", + "\n", + " # Multiple box plots on one Axes\n", + " #fig, ax = plt.subplots()\n", + " fig = plt.figure(figsize=(6,4))\n", + " ax = plt.axes([0.1, 0.15, 0.75, 0.75]) \n", + " \n", + " bp = ax.boxplot(iter_time_all, positions=np.arange(0, nlabels, 2), notch=0, sym='+', vert=1, showfliers=False, widths=bar_width) # Outliers for initialization are disturbing\n", + " ax.set_xlabel('# GPUs')\n", + " ax.set_ylabel('Time [s]')\n", + " \n", + " # Reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html \n", + " num_boxes = len(iter_time_all)\n", + " medians = np.empty(num_boxes)\n", + " for i in range(num_boxes):\n", + " box = bp['boxes'][i]\n", + " boxX = []\n", + " boxY = []\n", + " for j in range(5):\n", + " boxX.append(box.get_xdata()[j])\n", + " boxY.append(box.get_ydata()[j])\n", + " box_coords = np.column_stack([boxX, boxY])\n", + " ax.add_patch(Polygon(box_coords, facecolor=colors[1]))\n", + " # Now draw the median lines back over what we just filled in\n", + " med = bp['medians'][i]\n", + " medianX = []\n", + " medianY = []\n", + " for j in range(2):\n", + " medianX.append(med.get_xdata()[j])\n", + " medianY.append(med.get_ydata()[j])\n", + " ax.plot(medianX, medianY, 'k')\n", + " medians[i] = medianY[0]\n", + " # Finally, overplot the sample averages, with horizontal alignment\n", + " # in the center of each box\n", + " ax.plot(np.average(med.get_xdata()), np.average(iter_time_all[i]),\n", + " color='w', marker='*', markeredgecolor='k', markersize=10)\n", + " \n", + " ax2 = ax.twinx()\n", + " x_pos = np.arange(1, nlabels+1 ,2)\n", + " \n", + " ytitle = \"Time [min]\"\n", + " max_time = np.max(total_time)\n", + " time_order = val_order(max_time)\n", + " ymax = np.ceil(max_time/(10**time_order) + 0.5)*(10**time_order) + 10**time_order\n", + " # np.ceil(np.maximum(np.max(times1)/100. + 0.5, np.max(times2)/100. + 0.5))*100.\n", + " if log_yvals: \n", + " total_time = np.log(total_time)\n", + " ytitle = \"LOG(Time) [min]\"\n", + " ymax = np.ceil(np.max(total_time) + 0.5)\n", + " \n", + " # create data bars\n", + " rects = ax2.bar(x_pos, np.round(total_time, 2), bar_width, label=names, color=colors[0])\n", + " # customize plot appearance\n", + " # Add some text for labels, title and custom x-axis tick labels, etc.\n", + " ax2.set_ylabel(ytitle)\n", + " ax2.set_xticks(np.arange(0, nlabels))\n", + " ax2.set_xticklabels(xlabels)\n", + " ax2.set_xlabel('# GPUs')\n", + " ax2.set_ylim(0., ymax)\n", + " \n", + " # add labels\n", + " autolabel(ax2, rects, rot=45) \n", + "\n", + " plt_fname = \"iter+tot_time_{0}_vs_{1}\".format(*names)\n", + " print(\"Saving plot in file: {0}.png ...\".format(plt_fname))\n", + " #plt.show()\n", + " plt.savefig(plt_fname+\".png\")\n", + " plt.close()\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving plot in file: iter+tot_time_Juwels_vs_Booster.png ...\n" + ] + } + ], + "source": [ + "boxplot_iter_total_time(iter_data_hpc2, tot_time_hpc2, ngpus_sort, names_hpc[1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/test/run_pytest.sh b/test/run_pytest.sh index 83220d34a51379e93add931ae6e03e9491b5bce4..6aae33cf0312455efbaa95bbd491440e6d672b2e 100644 --- a/test/run_pytest.sh +++ b/test/run_pytest.sh @@ -2,7 +2,7 @@ # Name of virtual environment #VIRT_ENV_NAME="vp_new_structure" -VIRT_ENV_NAME="juwels_env" +VIRT_ENV_NAME="env_hdfml" if [ -z ${VIRTUAL_ENV} ]; then if [[ -f ../video_prediction_tools/${VIRT_ENV_NAME}/bin/activate ]]; then @@ -24,7 +24,7 @@ fi source ../video_prediction_tools/env_setup/modules_train.sh ##Test for preprocess moving mnist #python -m pytest test_prepare_moving_mnist_data.py -python -m pytest test_train_moving_mnist_data.py +#python -m pytest test_train_moving_mnist_data.py #Test for process step2 #python -m pytest test_data_preprocess_step2.py #python -m pytest test_era5_data.py @@ -33,5 +33,5 @@ python -m pytest test_train_moving_mnist_data.py #rm /p/project/deepacf/deeprain/video_prediction_shared_folder/models/test/* #python -m pytest test_train_model_era5.py #python -m pytest test_vanilla_vae_model.py -#python -m pytest test_visualize_postprocess.py +python -m pytest test_visualize_postprocess.py #python -m pytest test_meta_postprocess.py diff --git a/test/test_visualize_postprocess.py b/test/test_visualize_postprocess.py index aebcda3754dd3599c1da1c7c5b0cec1df8364b94..3d0408bab293864af689bc4705f7c8cfa5506403 100644 --- a/test/test_visualize_postprocess.py +++ b/test/test_visualize_postprocess.py @@ -7,17 +7,17 @@ from main_scripts.main_visualize_postprocess import * import pytest import numpy as np import datetime - +from netCDF4 import Dataset, date2num ########Test case 1################ results_dir = "/p/project/deepacf/deeprain/video_prediction_shared_folder/results/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/savp/20210324T120926_ji4_savp_cv12" checkpoint = "/p/project/deepacf/deeprain/video_prediction_shared_folder/models/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/savp/20210324T120926_ji4_savp_cv12" mode = "test" batch_size = 2 -num_samples = 16 num_stochastic_samples = 2 gpu_mem_frac = 0.5 -seed=12345 +seed = 12345 +eval_metrics=["mse", "psnr"] class MyClass: @@ -31,28 +31,17 @@ args = MyClass(results_dir) @pytest.fixture(scope="module") def vis_case1(): return Postprocess(results_dir=results_dir,checkpoint=checkpoint, - mode=mode,batch_size=batch_size,num_samples=num_samples,num_stochastic_samples=num_stochastic_samples, - gpu_mem_frac=gpu_mem_frac,seed=seed,args=args) -######instance2 -num_samples2 = 200000 -@pytest.fixture(scope="module") -def vis_case2(): - return Postprocess(results_dir=results_dir,checkpoint=checkpoint, - mode=mode,batch_size=batch_size,num_samples=num_samples2,num_stochastic_samples=num_stochastic_samples, - gpu_mem_frac=gpu_mem_frac,seed=seed,args=args) + mode=mode,batch_size=batch_size, + num_stochastic_samples=num_stochastic_samples, + seed=seed,args=args,eval_metrics=eval_metrics) def test_load_jsons(vis_case1): - vis_case1.set_seed() - vis_case1.save_args_to_option_json() - vis_case1.copy_data_model_json() - vis_case1.load_jsons() assert vis_case1.dataset == "era5" assert vis_case1.model == "savp" assert vis_case1.input_dir_tfr == "/p/project/deepacf/deeprain/video_prediction_shared_folder/preprocessedData/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/tfrecords_seq_len_24" assert vis_case1.run_mode == "deterministic" def test_get_metadata(vis_case1): - vis_case1.get_metadata() assert vis_case1.height == 56 assert vis_case1.width == 92 assert vis_case1.vars_in[0] == "2t" @@ -60,70 +49,123 @@ def test_get_metadata(vis_case1): def test_setup_test_dataset(vis_case1): - vis_case1.setup_test_dataset() vis_case1.test_dataset.mode == mode - -#def test_copy_data_model_json(vis_case1): -# vis_case1.copy_data_model_json() -# isfile_copy = os.path.isfile(os.path.join(checkpoint,"options.json")) -# assert isfile_copy == True -# isfile_copy_model_hpamas = os.path.isfile(os.path.join(checkpoint,"model_hparams.json")) -# assert isfile_copy_model_hpamas == True - - -def test_setup_num_samples_per_epoch(vis_case1): - vis_case1.setup_test_dataset() - vis_case1.setup_num_samples_per_epoch() - assert vis_case1.num_samples_per_epoch == num_samples - + def test_get_data_params(vis_case1): - vis_case1.get_data_params() assert vis_case1.context_frames == 12 assert vis_case1.future_length == 12 def test_run_deterministic(vis_case1): - vis_case1() + vis_case1.num_samples_per_epoch = 20 vis_case1.init_session() vis_case1.restore(vis_case1.sess,vis_case1.checkpoint) - vis_case1.sample_ind = 0 - vis_case1.init_eval_metrics_list() - vis_case1.input_results,vis_case1.input_images_denorm_all, vis_case1.t_starts = vis_case1.run_and_plot_inputs_per_batch() - assert len(vis_case1.t_starts_results) == batch_size - ts_1 = vis_case1.t_starts[0][0] + print("fcast-product",vis_case1.fcst_products) + eval_metric_ds = Postprocess.init_metric_ds(vis_case1.fcst_products, vis_case1.eval_metrics, vis_case1.vars_in[vis_case1.channel], vis_case1.num_samples_per_epoch, vis_case1.future_length) + + input_results,input_images_denorm_all,t_starts = vis_case1.get_input_data_per_batch(vis_case1.inputs) + assert len(t_starts) == batch_size + ts_1 = t_starts[0][0] year = str(ts_1)[:4] month = str(ts_1)[4:6] filename = "ecmwf_era5_" + str(ts_1)[2:] + ".nc" - fl = os.path.join("/p/project/deepacf/deeprain/video_prediction_shared_folder/extractedData",year,month,filename) + fl = os.path.join("/p/scratch/deepacf/deeprain/ambs_era5/extractedData",year, month, filename) print("netCDF file name:",fl) with Dataset(fl,"r") as data_file: t2_var = data_file.variables["2t"][0,:,:] t2_var = np.array(t2_var) t2_max = np.max(t2_var[117:173,0:92]) t2_min = np.min(t2_var[117:173,0:92]) - input_image = np.array(vis_case1.input_images_denorm_all)[0,0,:,:,0] #get the first batch id and 1st sequence image + input_image = np.array(input_images_denorm_all)[0,0,:,:,0] #get the first batch id and 1st sequence image input_img_max = np.max(input_image) input_img_min = np.min(input_image) print("input_image",input_image[0,:10]) assert t2_max == input_img_max - assert t2_min == input_img_min - - feed_dict = {input_ph: vis_case1.input_results[name] for name, input_ph in vis_case1.inputs.items()} + assert t2_min == input_img_min + sample_ind = 0 + feed_dict = {input_ph: input_results[name] for name, input_ph in vis_case1.inputs.items()} gen_images = vis_case1.sess.run(vis_case1.video_model.outputs['gen_images'], feed_dict=feed_dict) + gen_images_denorm = vis_case1.denorm_images_all_channels(gen_images, vis_case1.vars_in, vis_case1.norm_cls, + norm_method="minmax") ############Test persistenct value############# - vis_case1.ts = Postprocess.generate_seq_timestamps(vis_case1.t_starts[0], len_seq=vis_case1.sequence_length) - vis_case1.get_and_plot_persistent_per_sample(sample_id=0) - ts_1_per = (datetime.datetime.strptime(str(ts_1), '%Y%m%d%H') - datetime.timedelta(hours=23)).strftime("%Y%m%d%H") + times_0, init_times = vis_case1.get_init_time(t_starts) + batch_ds = vis_case1.create_dataset(input_images_denorm_all, gen_images_denorm, init_times) + nbs = np.minimum(vis_case1.batch_size, vis_case1.num_samples_per_epoch - sample_ind) + + times_seq = (pd.date_range(times_0[0], periods=int(vis_case1.sequence_length), freq="h")).to_pydatetime() + persistence_seq, _ = Postprocess.get_persistence(times_seq, vis_case1.input_dir_pkl) + ts_1_per = (pd.to_datetime(times_0[0]) - datetime.timedelta(hours=23)).strftime("%Y%m%d%H") + year_per = str(ts_1_per)[:4] month_per = str(ts_1_per)[4:6] filename_per = "ecmwf_era5_" + str(ts_1_per)[2:] + ".nc" - fl_per = os.path.join("/p/project/deepacf/deeprain/video_prediction_shared_folder/extractedData",year_per,month_per,filename_per) + + fl_per = os.path.join("/p/scratch/deepacf/deeprain/ambs_era5/extractedData",year_per,month_per,filename_per) with Dataset(fl_per,"r") as data_file: - t2_var_per = data_file.variables["2t"][0,117:173,0:92] + t2_var_per = data_file.variables["2t"][0,117:173,0:92] t2_per_var = np.array(t2_var_per) t2_per_max = np.max(t2_per_var) - per_image_max = np.max(vis_case1.persistence_images[0]) + per_image_max = np.max(persistence_seq[0]) assert t2_per_max == per_image_max + + + ##Test evaluation metric + for ivar, var in enumerate(vis_case1.vars_in): + batch_ds["{0}_persistence_fcst".format(var)].loc[dict(init_time=init_times[0])] = \ + persistence_seq[vis_case1.context_frames-1:, :, :, ivar] + + eval_metric_ds = vis_case1.populate_eval_metric_ds(eval_metric_ds,batch_ds,sample_ind,vis_case1.vars_in[vis_case1.channel]) + ##now manuly calculate the mse and see if values is the same as the ones in eval_metric_ds + #calculate the mse between generateed images and reference images + sample_gen = gen_images_denorm[0,vis_case1.context_frames-1:,:,:,vis_case1.channel] + sample_ref = input_images_denorm_all[0,vis_case1.context_frames:,:,:,vis_case1.channel] + sample_gen_ref_mse_t0 = np.mean((sample_gen[0] - sample_ref[0])**2) + metric_name = "2t_savp_mse" + print("eval_metric_ds",eval_metric_ds) + assert eval_metric_ds[metric_name][0,0] == sample_gen_ref_mse_t0 + sample_gen_ref_mse_t5 = np.mean((sample_gen[5] - sample_ref[5])**2) + assert eval_metric_ds[metric_name][0,5] == sample_gen_ref_mse_t5 + + +def test_plot_conditional_quantiles(vis_case1): + vis_case1.nun_samples_per_epoch = 20 + vis_case1.run_deterministic() + # the variables for conditional quantile plot + var_fcst = vis_case1.cond_quantile_vars[0] + var_ref = vis_case1.cond_quantile_vars[1] + data_fcst = get_era5_varatts(vis_case1.cond_quantiple_ds[var_fcst], vis_case1.cond_quantiple_ds[var_fcst].name) + data_ref = get_era5_varatts(vis_case1.cond_quantiple_ds[var_ref], vis_case1.cond_quantiple_ds[var_ref].name) + print("data_fcast",data_fcst) + fhhs = data_fcst["fcst_hour"] + + hh = 1 + quantile_panel_cf, cond_variable_cf = calculate_cond_quantiles(data_fcst.sel(fcst_hour=hh), + data_ref.sel(fcst_hour=hh), + factorization="calibration_refinement", + quantiles=(0.05, 0.5, 0.95)) + + + + + data_cond = data_fcst.sel(fcst_hour=hh) + data_tar = data_ref.sel(fcst_hour=hh) + data_cond_min, data_cond_max = np.floor(np.min(data_cond)), np.ceil(np.max(data_cond)) + bins = list(np.arange(int(data_cond_min), int(data_cond_max) + 1)) + nbins = len(bins) - 1 + + bin_l_1, bin_r_1 = bins[0], bins[1] + #find position of the values between bin + data_cropped = data_tar.where(np.logical_and(data_cond >= bins_l_1, data_cond < bins_r_l)) + + + + + +#def test_run_determinstic_quantile_plot(vis_case1): +# vis_case1.init_metric_ds() + + + #def test_make_test_dataset_iterator(vis_case1): # vis_case1.make_test_dataset_iterator() # pass @@ -159,7 +201,4 @@ def test_run_deterministic(vis_case1): -############Test case 2################## - - diff --git a/video_prediction_tools/HPC_scripts/train_model_era5_template.sh b/video_prediction_tools/HPC_scripts/train_model_era5_template.sh index fff363affdd39ac4c10542961ab084406ab6ad62..9c03ae7adde3886fd7e005fec5b17b4c7da84dd9 100644 --- a/video_prediction_tools/HPC_scripts/train_model_era5_template.sh +++ b/video_prediction_tools/HPC_scripts/train_model_era5_template.sh @@ -50,8 +50,4 @@ dataset=era5 # run training srun python ../main_scripts/main_train_models.py --input_dir ${source_dir} --datasplit_dict ${datasplit_dict} \ - --dataset ${dataset} --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir} - - - - + --dataset ${dataset} --model ${model} --model_hparams_dict ${model_hparams} --output_dir ${destination_dir} --checkpoint ${destination_dir} diff --git a/video_prediction_tools/data_preprocess/calc_climatology.py b/video_prediction_tools/data_preprocess/calc_climatology.py new file mode 100644 index 0000000000000000000000000000000000000000..1bd3f54735da18e4a2b03dffc001c9364ff96395 --- /dev/null +++ b/video_prediction_tools/data_preprocess/calc_climatology.py @@ -0,0 +1,104 @@ + +__email__ = "b.gong@fz-juelich.de" +__author__ = "Yan Ji, Bing Gong" +__date__ = "2021-05-26" +""" +Use the monthly average vaules to calculate the climitological values from the datas sources that donwload from ECMWF : /p/fastdata/slmet/slmet111/met_data/ecmwf/era5/grib/monthly +""" + +import os +import time +import glob +import json + + +class Calc_climatology(object): + def __init__(self, input_dir="/p/fastdata/slmet/slmet111/met_data/ecmwf/era5/grib/monthly", output_clim=None, region:list=[10,20], json_path=None): + self.input_dir = input_dir + self.output_clim = output_clim + self.region = region + self.lats = None + self.lons = None + self.json_path = json_path + + @staticmethod + def calc_avg_per_year(grib_fl: str=None): + """ + :param grib_fl: the relative path of the monthly average values + :return: None + """ + return None + + def cal_avg_all_files(self): + + """ + Average by time for all the grib files + :return: None + """ + + method = Calc_climatology.cal_avg_all_files.__name__ + + multiyears_path = os.path.join(self.output_clim,"multiyears.grb") + climatology_path = os.path.join(self.output_clim,"climatology.grb") + grib_files = glob.glob(os.path.join(self.input_dir,"*t2m.grb")) + if grib_files: + print ("{0}: There are {1} monthly years grib files".format(method,len(grib_files))) + # merge all the files into one grib file + os.system("cdo mergetime {0} {1}".format(grib_files,multiyears_path)) + # average by time + os.system("cdo timavg {0} {1}".format(multiyears_path,climatology_path)) + + else: + FileExistsError("%{0}: The monthly years grib files do not exit in the input directory %{1}".format(method,self.input_dir)) + + return None + + + def get_lat_lon_from_json(self): + """ + Get lons and lats from json file + :return: list of lats, and lons + """ + method = Calc_climatology.get_lat_lon_from_json.__name__ + + if not os.path.exists(self.json_path): + raise FileExistsError("{0}: The json file {1} does not exist".format(method,self.json_path)) + + with open(self.json_path) as fl: + meta_data = json.load(fl) + + if "coordinates" not in list(meta_data.keys()): + raise KeyError("{0}: 'coordinates' is not one of the keys for metadata,json".format(method)) + else: + meta_coords = meta_data["coordinates"] + self.lats = meta_coords["lat"] + self.lons = meta_coords["lon"] + return self.lats, self.lons + + + def get_region_climate(self): + """ + Get the climitollgical values from the selected regions + :return: None + """ + pass + + + def __call__(self, *args, **kwargs): + + if os.path.exists(os.path.join(self.output_clim,"climatology.grb")): + pass + else: + self.cal_avg_all_files() + self.get_lat_lon_from_json() + self.get_region_climate() + + + + + +if __name__ == '__main__': + exp = Calc_climatology(json_path="/p/project/deepacf/deeprain/video_prediction_shared_folder/preprocessedData/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/metadata.json") + exp() + + diff --git a/video_prediction_tools/data_preprocess/prepare_era5_data.py b/video_prediction_tools/data_preprocess/prepare_era5_data.py index 4f9e4c8246e36a83b8353cd00366b3e7ed83761c..fbb71bdb8b9df163d38a2bfbd06a8e8ecea4588d 100644 --- a/video_prediction_tools/data_preprocess/prepare_era5_data.py +++ b/video_prediction_tools/data_preprocess/prepare_era5_data.py @@ -60,7 +60,7 @@ class ERA5DataExtraction(object): temp_path = os.path.join(self.target_dir, self.year, month) os.makedirs(temp_path, exist_ok=True) - for var,value in self.varslist_surface.items(): + for var, value in self.varslist_surface.items(): # surface variables infile = os.path.join(self.src_dir, self.year, month, self.year+month+day+hour+'_sf.grb') outfile_sf = os.path.join(self.target_dir, self.year, month, self.year+month+day+hour+'_'+var+'.nc') diff --git a/video_prediction_tools/data_preprocess/process_netCDF_v2.py b/video_prediction_tools/data_preprocess/process_netCDF_v2.py index eb62d01c0243c19b722c260942b21aaf64788549..2d030d3e9bb65d917f8725ecbe8df740800dbbcb 100644 --- a/video_prediction_tools/data_preprocess/process_netCDF_v2.py +++ b/video_prediction_tools/data_preprocess/process_netCDF_v2.py @@ -196,8 +196,11 @@ class PreprocessNcToPkl(object): data = data.roll(lon=nroll_lon, roll_coords=True) # init resulting numpy-array... - dshape = list(np.shape(data[self.vars[0]])) + [self.nvars] + print("data[self.vars[0]] shape is: ",data[self.vars[0]].shape) + dshape = list(np.shape(np.squeeze(data[self.vars[0]]))) + [self.nvars] + print("dshape is: ",dshape) data_arr = np.full(dshape, np.nan) + print("data_arr shape is: ",data_arr.shape) # ... and populate the data in it for ivar, var in enumerate(self.vars): data_arr[..., ivar] = np.squeeze(data[var].values) diff --git a/video_prediction_tools/main_scripts/main_train_models.py b/video_prediction_tools/main_scripts/main_train_models.py index c6ac709d7d5d82487e60ab915bf7b41cd11ffabc..751506d4b65df80cc4489f05054cedb9c0799ba8 100644 --- a/video_prediction_tools/main_scripts/main_train_models.py +++ b/video_prediction_tools/main_scripts/main_train_models.py @@ -177,9 +177,9 @@ class TrainModel(object): self.inputs = self.iterator.get_next() #since era5 tfrecords include T_start, we need to remove it from the tfrecord when we train the model, # otherwise the model will raise error - #if self.dataset == "era5" and self.model == "savp": - # del self.inputs["T_start"] - + + if self.dataset == "era5" and self.model == "savp": + del self.inputs["T_start"] def save_dataset_model_params_to_checkpoint_dir(self, dataset, video_model): @@ -233,11 +233,16 @@ class TrainModel(object): self.total_steps = self.steps_per_epoch * max_epochs print("Batch size is {} ; max_epochs is {}; num_samples per epoch is {}; steps_per_epoch is {}, total steps is {}".format(batch_size,max_epochs, self.num_examples,self.steps_per_epoch,self.total_steps)) - def restore(self,sess, checkpoints, restore_to_checkpoint_mapping=None): + def restore(self, sess, checkpoints, restore_to_checkpoint_mapping=None): """ Restore the models checkpoints if the checkpoints is given """ - if checkpoints: + + if checkpoints is None: + print ("Checkpoint is empty!!") + elif os.path.isdir(checkpoints) and (not os.path.exists(os.path.join(checkpoints,"checkpoint"))): + print("There is not checkpoints in the dir {}".format(checkpoints)) + else: var_list = self.video_model.saveable_variables # possibly restore from multiple checkpoints. useful if subset of weights # (e.g. generator or discriminator) are on different checkpoints. @@ -259,13 +264,14 @@ class TrainModel(object): """ Restore the train and validation losses in the pickle file if checkpoint is given """ - if self.start_step == 0: - train_losses = [] - val_losses = [] + if self.checkpoint is None: + train_losses, val_losses = [], [] + elif os.path.isdir(self.checkpoint) and (not os.path.exists(os.path.join(self.output_dir,"checkpoint"))): + train_losses,val_losses = [], [] else: - with open(os.path.join(self.checkpoint,"train_losses.pkl"),"rb") as f: + with open(os.path.join(self.output_dir,"train_losses.pkl"),"rb") as f: train_losses = pkl.load(f) - with open(os.path.join(self.checkpoint,"val_losses.pkl"),"rb") as f: + with open(os.path.join(self.output_dir,"val_losses.pkl"),"rb") as f: val_losses = pkl.load(f) return train_losses,val_losses @@ -293,15 +299,11 @@ class TrainModel(object): self.create_fetches_for_train() # In addition to the loss, we fetch the optimizer self.results = sess.run(self.fetches) # ...and run it here! train_losses.append(self.results["total_loss"]) - print("t_start for training",self.results["inputs"]["T_start"]) - print("len of t_start per iteration",len(self.results["inputs"]["T_start"])) #Run and fetch losses for validation data val_handle_eval = sess.run(self.val_handle) self.create_fetches_for_val() self.val_results = sess.run(self.val_fetches,feed_dict={self.train_handle: val_handle_eval}) val_losses.append(self.val_results["total_loss"]) - print("t_start for validation",self.val_results["inputs"]["T_start"]) - print("len of t_start per iteration",len(self.val_results["inputs"]["T_start"])) self.write_to_summary() self.print_results(step,self.results) timeit_end = time.time() diff --git a/video_prediction_tools/main_scripts/main_visualize_postprocess.py b/video_prediction_tools/main_scripts/main_visualize_postprocess.py index 60dfef0032a7746d57734285a7b86328e0c74f50..0c6504115baa11c6764e1d3933b493c7f4acfe8c 100644 --- a/video_prediction_tools/main_scripts/main_visualize_postprocess.py +++ b/video_prediction_tools/main_scripts/main_visualize_postprocess.py @@ -16,94 +16,113 @@ import tensorflow as tf import pickle import datetime as dt import json -import matplotlib - -matplotlib.use('Agg') -import matplotlib.pyplot as plt -from mpl_toolkits.basemap import Basemap +from typing import Union, List +# own modules from normalization import Norm_data +from general_utils import get_era5_varatts, check_dir from metadata import MetaData as MetaData from main_scripts.main_train_models import * from data_preprocess.preprocess_data_step2 import * from model_modules.video_prediction import datasets, models, metrics -from statistical_evaluation import perform_block_bootstrap_metric, avg_metrics, Scores +from statistical_evaluation import perform_block_bootstrap_metric, avg_metrics, calculate_cond_quantiles, Scores +from postprocess_plotting import plot_avg_eval_metrics, plot_cond_quantile, create_geo_contour_plot class Postprocess(TrainModel): - def __init__(self, results_dir=None, checkpoint=None, mode="test", batch_size=None, num_stochastic_samples=1, - stochastic_plot_id=0, gpu_mem_frac=None, seed=None, args=None, run_mode="deterministic"): - """ - The function for inference, generate results and images - results_dir :str, The output directory to save results - checkpoint :str, The directory point to the checkpoints - mode :str, Default is test, could be "train","val", and "test" - batch_size :int, The batch size used for generating test samples for each iteration - num_stochastic_samples: int, for the stochastic models such as SAVP, VAE, it is used for generate a number of - ensemble for each prediction. - For deterministic model such as convLSTM, it is default setup to 1 - stochastic_plot_id :int, the index for stochastically generated images to plot - gpu_mem_frac :int, GPU memory fraction to be used - seed :seed for control test samples - run_mode :str, if "deterministic" then the model running for deterministic forecasting, other string values, it will go for stochastic forecasting - - Side notes : other important varialbes in the class: - self.ts : list, contains the sequence_length timestamps - self.gen_images_ : the length of generate images by model is sequence_length - 1 - self.persistent_image : the length of persistent images is sequence_length - 1 - self.input_images : the length of inputs images is sequence length - - """ - - # initialize input directories (to be retrieved by load_jsons) - self.input_dir = None - self.input_dir_tfr = None - self.input_dir_pkl = None - # forecast products and evaluation metrics to be handled in postprocessing - self.eval_metrics = ["mse", "psnr"] - self.fcst_products = {"persistence": "pfcst", "model": "mfcst"} - # initialize dataset to track evaluation metrics and configure bootstrapping procedure - self.eval_metrics_ds = None - self.nboots_block = 1000 - self.block_length = 7 * 24 # this corresponds to a block length of 7 days when forecasts are produced every hour - # other attributes - self.stat_fl = None - self.norm_cls = None # placeholder for normalization instance - self.channel = 0 # index of channel/input variable to evaluate - self.num_samples_per_epoch = None - # set further attributes from parsed arguments + def __init__(self, results_dir: str = None, checkpoint: str= None, mode: str = "test", batch_size: int = None, + num_stochastic_samples: int = 1, stochastic_plot_id: int = 0, gpu_mem_frac: float = None, + seed: int = None, channel: int = 0, args=None, run_mode: str = "deterministic", + eval_metrics: List = ("mse", "psnr", "ssim","acc"), clim_path: str ="/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly"): + """ + Initialization of the class instance for postprocessing (generation of forecasts from trained model + + basic evauation). + :param results_dir: output directory to save results + :param checkpoint: directory point to the model checkpoints + :param mode: mode of dataset to be processed ("train", "val" or "test"), default: "test" + :param batch_size: mini-batch size for generating forecasts from trained model + :param num_stochastic_samples: number of ensemble members for variational models (SAVP, VAE), default: 1 + not supported yet!!! + :param stochastic_plot_id: not supported yet! + :param gpu_mem_frac: fraction of GPU memory to be pre-allocated + :param seed: Integer controlling randomization + :param channel: Channel of interest for statistical evaluation + :param args: namespace of parsed arguments + :param run_mode: "deterministic" or "stochastic", default: "deterministic", "stochastic is not supported yet!!! + :param eval_metrics: metrics used to evaluate the trained model + :param clim_path: the path to the climatology nc file + """ + # copy over attributes from parsed argument self.results_dir = self.output_dir = os.path.normpath(results_dir) - if not os.path.exists(self.results_dir): - os.makedirs(self.results_dir) + _ = check_dir(self.results_dir, lcreate=True) self.batch_size = batch_size self.gpu_mem_frac = gpu_mem_frac self.seed = seed + self.set_seed() self.num_stochastic_samples = num_stochastic_samples + #self.num_samples_per_epoch = 20 # reduce number of epoch samples self.stochastic_plot_id = stochastic_plot_id self.args = args self.checkpoint = checkpoint + self.clim_path = clim_path + _ = check_dir(self.checkpoint) self.run_mode = run_mode self.mode = mode - if self.checkpoint is None: - raise ValueError("The directory point to checkpoint is empty, must be provided for postprocess step") - - if not os.path.isdir(self.checkpoint): - raise NotADirectoryError("The checkpoint-directory '{0}' does not exist".format(self.checkpoint)) - - def __call__(self): - self.set_seed() - self.save_args_to_option_json() - self.copy_data_model_json() - self.load_jsons() - self.get_metadata() - self.setup_test_dataset() + self.channel = channel + # Attributes set during runtime + self.norm_cls = None + # configuration of basic evaluation + self.eval_metrics = eval_metrics + self.nboots_block = 1000 + self.block_length = 7 * 24 # this corresponds to a block length of 7 days in case of hourly forecasts + # initialize evrything to get an executable Postprocess instance + self.save_args_to_option_json() # create options.json-in results directory + self.copy_data_model_json() # copy over JSON-files from model directory + # get some parameters related to model and dataset + self.datasplit_dict, self.model_hparams_dict, self.dataset, self.model, self.input_dir_tfr = self.load_jsons() + self.model_hparams_dict_load = self.get_model_hparams_dict() + # set input paths and forecast product dictionary + self.input_dir, self.input_dir_pkl = self.get_input_dirs() + self.fcst_products = {"persistence": "pfcst", self.model: "mfcst"} + # correct number of stochastic samples if necessary + self.check_num_stochastic_samples() + # get metadata + md_instance = self.get_metadata() + self.height, self.width = md_instance.ny, md_instance.nx + self.vars_in = md_instance.variables + self.lats, self.lons = md_instance.get_coord_array() + # get statistics JSON-file + self.stat_fl = self.set_stat_file() + self.cond_quantile_vars = self.init_cond_quantile_vars() + # setup test dataset and model + self.test_dataset, self.num_samples_per_epoch = self.setup_test_dataset() + # self.num_samples_per_epoch = 100 # reduced number of epoch samples -> useful for testing + self.sequence_length, self.context_frames, self.future_length = self.get_data_params() + self.inputs, self.input_ts = self.make_test_dataset_iterator() + # set-up model, its graph and do GPU-configuration (from TrainModel) self.setup_model() - self.get_data_params() - self.setup_num_samples_per_epoch() - self.get_stat_file() - self.make_test_dataset_iterator() - self.check_stochastic_samples_ind_based_on_model() self.setup_graph() self.setup_gpu_config() + self.load_climdata() + # Methods that are called during initialization + def get_input_dirs(self): + """ + Retrieves top-level input directory and nested pickle-directory from input_dir_tfr + :return input_dir: top-level input-directoy + :return input_dir_pkl: Input directory where pickle-files are placed + """ + method = Postprocess.get_input_dirs.__name__ + + if not hasattr(self, "input_dir_tfr"): + raise AttributeError("Attribute input_dir_tfr is still missing.".format(method)) + + _ = check_dir(self.input_dir_tfr) + + input_dir = os.path.dirname(self.input_dir_tfr.rstrip("/")) + input_dir_pkl = os.path.join(input_dir, "pickle") + + _ = check_dir(input_dir_pkl) + + return input_dir, input_dir_pkl # methods that are executed with __call__ def save_args_to_option_json(self): @@ -149,19 +168,24 @@ class Postprocess(TrainModel): """ Set attributes pointing to JSON-files which track essential information and also load some information to store it to attributes of the class instance + :return datasplit_dict: path to datasplit-dictionary JSON-file of trained model + :return model_hparams_dict: path to model hyperparameter-dictionary JSON-file of trained model + :return dataset: Name of datset used to train model + :return model: Name of trained model + :return input_dir_tfr: path to input directory where TF-records are stored """ method_name = Postprocess.load_jsons.__name__ - self.datasplit_dict = os.path.join(self.results_dir, "data_dict.json") - self.model_hparams_dict = os.path.join(self.results_dir, "model_hparams.json") + datasplit_dict = os.path.join(self.results_dir, "data_dict.json") + model_hparams_dict = os.path.join(self.results_dir, "model_hparams.json") checkpoint_opt_dict = os.path.join(self.results_dir, "options_checkpoints.json") # sanity checks on the JSON-files - if not os.path.isfile(self.datasplit_dict): + if not os.path.isfile(datasplit_dict): raise FileNotFoundError("%{0}: The file data_dict.json is missing in {1}".format(method_name, self.results_dir)) - if not os.path.isfile(self.model_hparams_dict): + if not os.path.isfile(model_hparams_dict): raise FileNotFoundError("%{0}: The file model_hparams.json is missing in {1}".format(method_name, self.results_dir)) @@ -172,20 +196,15 @@ class Postprocess(TrainModel): try: with open(checkpoint_opt_dict) as f: options_checkpoint = json.loads(f.read()) - self.dataset = options_checkpoint["dataset"] - self.model = options_checkpoint["model"] - self.input_dir_tfr = options_checkpoint["input_dir"] - self.input_dir = os.path.dirname(self.input_dir_tfr.rstrip("/")) - self.input_dir_pkl = os.path.join(self.input_dir, "pickle") - # update self.fcst_products - if "model" in self.fcst_products.keys(): - self.fcst_products[self.model] = self.fcst_products.pop("model") + dataset = options_checkpoint["dataset"] + model = options_checkpoint["model"] + input_dir_tfr = options_checkpoint["input_dir"] except Exception as err: print("%{0}: Something went wrong when reading the checkpoint-file '{1}'".format(method_name, checkpoint_opt_dict)) raise err - self.model_hparams_dict_load = self.get_model_hparams_dict() + return datasplit_dict, model_hparams_dict, dataset, model, input_dir_tfr def get_metadata(self): @@ -215,45 +234,126 @@ class Postprocess(TrainModel): attrs={"units": "degrees_east"}) self.lons = xr.DataArray(md_instance.lon, coords={"lon": md_instance.lon}, dims="lon", attrs={"units": "degrees_north"}) + #print('self.lats: ',self.lats) + return md_instance + def load_climdata(self,clim_path="/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly", + var="T2M",climatology_fl="climatology_t2m_1991-2020.nc"): + """ + params:climatology_fl: str, the full path to the climatology file + params:var : str, the variable name + + """ + data_clim_path = os.path.join(clim_path,climatology_fl) + data = xr.open_dataset(data_clim_path) + dt_clim = data[var] + + clim_lon = dt_clim['lon'].data + clim_lat = dt_clim['lat'].data + + meta_lon_loc = np.zeros((len(clim_lon)), dtype=bool) + for i in range(len(clim_lon)): + if np.round(clim_lon[i],1) in self.lons.data: + meta_lon_loc[i] = True + + meta_lat_loc = np.zeros((len(clim_lat)), dtype=bool) + for i in range(len(clim_lat)): + if np.round(clim_lat[i],1) in self.lats.data: + meta_lat_loc[i] = True + + # get the coordinates of the data after running CDO + coords = dt_clim.coords + nlat, nlon = len(coords["lat"]), len(coords["lon"]) + # modify it our needs + coords_new = dict(coords) + coords_new.pop("time") + coords_new["month"] = np.arange(1, 13) + coords_new["hour"] = np.arange(0, 24) + # initialize a new data array with explicit dimensions for month and hour + data_clim_new = xr.DataArray(np.full((12, 24, nlat, nlon), np.nan), coords=coords_new, dims=["month", "hour", "lat", "lon"]) + # do the reorganization + for month in np.arange(1, 13): + data_clim_new.loc[dict(month=month)]=dt_clim.sel(time=dt_clim["time.month"]==month) + + self.data_clim = data_clim_new[dict(lon=meta_lon_loc,lat=meta_lat_loc)] + print("self.data_clim",self.data_clim) + def setup_test_dataset(self): """ setup the test dataset instance + :return test_dataset: the test dataset instance """ VideoDataset = datasets.get_dataset_class(self.dataset) - self.test_dataset = VideoDataset(input_dir=self.input_dir_tfr, mode=self.mode, - datasplit_config=self.datasplit_dict) + test_dataset = VideoDataset(input_dir=self.input_dir_tfr, mode=self.mode, datasplit_config=self.datasplit_dict) + nsamples = test_dataset.num_examples_per_epoch() + + return test_dataset, nsamples - def setup_num_samples_per_epoch(self): + def get_data_params(self): """ - For generating images, the user can define the examples used, and will be taken as num_examples_per_epoch - For testing we only use exactly one epoch, but to be consistent with the training, we keep the name '_per_epoch' + Get the context_frames, future_frames and total frames from hparamters settings. + Note that future_frames_length is the number of predicted frames. """ - method = Postprocess.setup_num_samples_per_epoch.__name__ + method = Postprocess.get_data_params.__name__ - self.num_samples_per_epoch = self.test_dataset.num_examples_per_epoch() + if not hasattr(self, "model_hparams_dict_load"): + raise AttributeError("%{0}: Attribute model_hparams_dict_load is still unset.".format(method)) - return self.num_samples_per_epoch + try: + context_frames = self.model_hparams_dict_load["context_frames"] + sequence_length = self.model_hparams_dict_load["sequence_length"] + except Exception as err: + print("%{0}: Could not retrieve context_frames and sequence_length from model_hparams_dict_load-attribute" + .format(method)) + raise err + future_length = sequence_length - context_frames + if future_length <= 0: + raise ValueError("Calculated future_length must be greater than zero.".format(method)) - def get_data_params(self): + return sequence_length, context_frames, future_length + + def set_stat_file(self): """ - Get the context_frames, future_frames and total frames from hparamters settings. - Note that future_frames_length is the number of predicted frames. + Set the name of the statistic file from the input directory + :return stat_fl: Path to statistics JSON-file of input data used to train the model """ - self.context_frames = self.model_hparams_dict_load["context_frames"] - self.sequence_length = self.model_hparams_dict_load["sequence_length"] - self.future_length = self.sequence_length - self.context_frames + method = Postprocess.set_stat_file.__name__ + + if not hasattr(self, "input_dir"): + raise AttributeError("%{0}: Attribute input_dir is still unset".format(method)) + + stat_fl = os.path.join(self.input_dir, "statistics.json") + if not os.path.isfile(stat_fl): + raise FileNotFoundError("%{0}: Cannot find statistics JSON-file '{1}'".format(method, stat_fl)) + + return stat_fl - def get_stat_file(self): + def init_cond_quantile_vars(self): """ - Load the statistics from statistic file from the input directory + Get a list of variable names for conditional quantile plot + :return cond_quantile_vars: list holding the variable names of interest """ - self.stat_fl = os.path.join(self.input_dir, "statistics.json") + method = Postprocess.init_cond_quantile_vars.__name__ + + if not hasattr(self, "model"): + raise AttributeError("%{0}: Attribute model is still unset.".format(method)) + cond_quantile_vars = ["{0}_{1}_fcst".format(self.vars_in[self.channel], self.model), + "{0}_ref".format(self.vars_in[self.channel])] + + return cond_quantile_vars def make_test_dataset_iterator(self): """ Make the dataset iterator """ + method = Postprocess.make_test_dataset_iterator.__name__ + + if not hasattr(self, "test_dataset"): + raise AttributeError("%{0}: Attribute test_dataset is still unset".format(method)) + + if not hasattr(self, "batch_size"): + raise AttributeError("%{0}: Attribute batch_sie is still unset".format(method)) + test_tf_dataset = self.test_dataset.make_dataset(self.batch_size) test_iterator = test_tf_dataset.make_one_shot_iterator() # The `Iterator.string_handle()` method returns a tensor that can be evaluated @@ -261,27 +361,28 @@ class Postprocess(TrainModel): test_handle = test_iterator.string_handle() dataset_iterator = tf.data.Iterator.from_string_handle(test_handle, test_tf_dataset.output_types, test_tf_dataset.output_shapes) - self.inputs = dataset_iterator.get_next() - self.input_ts = self.inputs["T_start"] - # if self.dataset == "era5" and self.model == "savp": - # del self.inputs["T_start"] + input_iter = dataset_iterator.get_next() + ts_iter = input_iter["T_start"] + + return input_iter, ts_iter - def check_stochastic_samples_ind_based_on_model(self): + def check_num_stochastic_samples(self): """ stochastic forecasting only suitable for the geneerate models such as SAVP, vae. For convLSTM, McNet only do determinstic forecasting """ + method = Postprocess.check_num_stochastic_samples.__name__ + + if not hasattr(self, "model"): + raise AttributeError("%{0}: Attribute model is still unset".format(method)) + if not hasattr(self, "num_stochastic_samples"): + raise AttributeError("%{0}: Attribute num_stochastic_samples is still unset".format(method)) + if self.model == "convLSTM" or self.model == "test_model" or self.model == 'mcnet': if self.num_stochastic_samples > 1: print("Number of samples for deterministic model cannot be larger than 1. Higher values are ignored.") self.num_stochastic_samples = 1 - def init_session(self): - self.sess = tf.Session(config=self.config) - self.sess.graph.as_default() - self.sess.run(tf.global_variables_initializer()) - self.sess.run(tf.local_variables_initializer()) - # the run-factory def run(self): if self.model == "convLSTM" or self.model == "test_model" or self.model == 'mcnet': @@ -394,7 +495,7 @@ class Postprocess(TrainModel): self.stochastic_loss_all_batches = np.mean(np.array(self.stochastic_loss_all_batches), axis=0) assert len(np.array(self.persistent_loss_all_batches).shape) == 1 assert np.array(self.persistent_loss_all_batches).shape[0] == self.future_length - print("Bug here:", np.array(self.stochastic_loss_all_batches).shape) + assert len(np.array(self.stochastic_loss_all_batches).shape) == 2 assert np.array(self.stochastic_loss_all_batches).shape[0] == self.num_stochastic_samples @@ -408,13 +509,13 @@ class Postprocess(TrainModel): # init the session and restore the trained model self.init_session() self.restore(self.sess, self.checkpoint) - - # init sample index for looping and acculmulators for evaulation metrics + # init sample index for looping sample_ind = 0 nsamples = self.num_samples_per_epoch - # initialize datasets + # initialize xarray datasets eval_metric_ds = Postprocess.init_metric_ds(self.fcst_products, self.eval_metrics, self.vars_in[self.channel], nsamples, self.future_length) + cond_quantiple_ds = None while sample_ind < self.num_samples_per_epoch: # get normalized and denormalized input data @@ -429,7 +530,7 @@ class Postprocess(TrainModel): # denormalize forecast sequence (self.norm_cls is already set in get_input_data_per_batch-method) gen_images_denorm = self.denorm_images_all_channels(gen_images, self.vars_in, self.norm_cls, norm_method="minmax") - # store data into datset and get number of samples (may differ from batch_size at the end of the test dataset) + # store data into datset & get number of samples (may differ from batch_size at the end of the test dataset) times_0, init_times = self.get_init_time(t_starts) batch_ds = self.create_dataset(input_images_denorm, gen_images_denorm, init_times) nbs = np.minimum(self.batch_size, self.num_samples_per_epoch - sample_ind) @@ -438,6 +539,7 @@ class Postprocess(TrainModel): for i in np.arange(nbs): # work-around to make use of get_persistence_forecast_per_sample-method times_seq = (pd.date_range(times_0[i], periods=int(self.sequence_length), freq="h")).to_pydatetime() + print('times_seq: ',times_seq) # get persistence forecast for sequences at hand and write to dataset persistence_seq, _ = Postprocess.get_persistence(times_seq, self.input_dir_pkl) for ivar, var in enumerate(self.vars_in): @@ -447,19 +549,41 @@ class Postprocess(TrainModel): # save sequences to netcdf-file and track initial time nc_fname = os.path.join(self.results_dir, "vfp_date_{0}_sample_ind_{1:d}.nc" .format(pd.to_datetime(init_times[i]).strftime("%Y%m%d%H"), sample_ind + i)) - self.save_ds_to_netcdf(batch_ds.isel(init_time=i), nc_fname) + + if os.path.exists(nc_fname): + print("The file {} exist".format(nc_fname)) + else: + self.save_ds_to_netcdf(batch_ds.isel(init_time=i), nc_fname) + # end of batch-loop - # write evaluation metric to corresponding dataset... + # write evaluation metric to corresponding dataset and sa eval_metric_ds = self.populate_eval_metric_ds(eval_metric_ds, batch_ds, sample_ind, self.vars_in[self.channel]) + cond_quantiple_ds = Postprocess.append_ds(batch_ds, cond_quantiple_ds, self.cond_quantile_vars, "init_time", dtype=np.float16) # ... and increment sample_ind sample_ind += self.batch_size # end of while-loop for samples # safe dataset with evaluation metrics for later use self.eval_metrics_ds = eval_metric_ds + self.cond_quantiple_ds = cond_quantiple_ds #self.add_ensemble_dim() # all methods of the run factory + def init_session(self): + """ + Initialize TensorFlow-session + :return: - + """ + method = Postprocess.init_session.__name__ + + if not hasattr(self, "config"): + raise AttributeError("Attribute config is still unset.".format(method)) + + self.sess = tf.Session(config=self.config) + self.sess.graph.as_default() + self.sess.run(tf.global_variables_initializer()) + self.sess.run(tf.local_variables_initializer()) + def get_input_data_per_batch(self, input_iter, norm_method="minmax"): """ Get the input sequence from the dataset iterator object stored in self.inputs and denormalize the data @@ -531,30 +655,33 @@ class Postprocess(TrainModel): # dictionary of implemented evaluation metrics dims = ["lat", "lon"] - known_eval_metrics = {"mse": Scores("mse", dims), "psnr": Scores("psnr", dims)} - - # generate list of functions that calculate requested evaluation metrics - if set(self.eval_metrics).issubset(known_eval_metrics): - eval_metrics_func = [known_eval_metrics[metric].score_func for metric in self.eval_metrics] - else: - misses = list(set(self.eval_metrics) - known_eval_metrics.keys()) - raise NotImplementedError("%{0}: The following requested evaluation metrics are not implemented yet: " - .format(method, ", ".join(misses))) - + eval_metrics_func = [Scores(metric,dims).score_func for metric in self.eval_metrics] varname_ref = "{0}_ref".format(varname) # reset init-time coordinate of metric_ds in place and get indices for slicing ind_end = np.minimum(ind_start + self.batch_size, self.num_samples_per_epoch) init_times_metric = metric_ds["init_time"].values init_times_metric[ind_start:ind_end] = data_ds["init_time"] metric_ds = metric_ds.assign_coords(init_time=init_times_metric) + print("metric_ds",metric_ds) # populate metric_ds for fcst_prod in self.fcst_products.keys(): for imetric, eval_metric in enumerate(self.eval_metrics): metric_name = "{0}_{1}_{2}".format(varname, fcst_prod, eval_metric) varname_fcst = "{0}_{1}_fcst".format(varname, fcst_prod) dict_ind = dict(init_time=data_ds["init_time"]) - metric_ds[metric_name].loc[dict_ind] = eval_metrics_func[imetric](data_ds[varname_fcst], - data_ds[varname_ref]) + print('metric_name: ',metric_name) + print('varname_fcst: ',varname_fcst) + print('varname_ref: ',varname_ref) + print('dict_ind: ',dict_ind) + print('fcst_prod: ',fcst_prod) + print('imetric: ',imetric) + print('eval_metric: ',eval_metric) + metric_ds[metric_name].loc[dict_ind] = eval_metrics_func[imetric](data_fcst=data_ds[varname_fcst], + data_ref=data_ds[varname_ref], + data_clim=self.data_clim) + print('data_ds[varname_fcst] shape: ',data_ds[varname_fcst].shape) + print('metric_ds[metric_name].loc[dict_ind] shape: ',metric_ds[metric_name].loc[dict_ind].shape) + print('metric_ds[metric_name].loc[dict_ind]: ',metric_ds[metric_name].loc[dict_ind]) # end of metric-loop # end of forecast product-loop @@ -578,7 +705,6 @@ class Postprocess(TrainModel): :param ts_ini: initial time of forecast (=last time step of effective input sequence) :return data_ds: above mentioned data in a nicely formatted dataset """ - method = Postprocess.create_dataset.__name__ # auxiliary variables for temporal dimensions @@ -609,7 +735,7 @@ class Postprocess(TrainModel): # forecast and into the the reference sequences (which can be compared to the forecast) # as where the persistence forecast is containing NaNs (must be generated later) data_in_dict = dict([("{0}_in".format(var), input_seq.isel(fcst_hour=slice(None, self.context_frames), - varname=ivar) \ + varname=ivar) .rename({"fcst_hour": "in_hour"}) .reset_coords(names="varname", drop=True)) for ivar, var in enumerate(self.vars_in)]) @@ -629,7 +755,7 @@ class Postprocess(TrainModel): # fill persistence forecast variables with dummy data (to be populated later) data_pfcst_dict = dict([("{0}_persistence_fcst".format(var), (["init_time", "fcst_hour", "lat", "lon"], - np.full(shape_fcst, np.nan))) + np.full(shape_fcst, np.nan))) for ivar, var in enumerate(self.vars_in)]) # create the dataset @@ -663,8 +789,106 @@ class Postprocess(TrainModel): Postprocess.save_ds_to_netcdf(self.eval_metrics_ds, nc_fname) # also save averaged metrics to JSON-file and plot it for diagnosis - _ = Postprocess.plot_avg_eval_metrics(self.eval_metrics_ds, self.eval_metrics, self.fcst_products, - self.vars_in[self.channel], self.results_dir) + _ = plot_avg_eval_metrics(self.eval_metrics_ds, self.eval_metrics, self.fcst_products, + self.vars_in[self.channel], self.results_dir) + + def plot_example_forecasts(self, metric="mse", channel=0): + """ + Plots example forecasts. The forecasts are chosen from the complete pool of the test dataset and are chosen + according to the accuracy in terms of the chosen metric. In add ition, to the best and worst forecast, + every decil of the chosen metric is retrieved to cover the whole bandwith of forecasts. + :param metric: The metric which is used for measuring accuracy + :param channel: The channel index of the forecasted variable to plot (correspondong to self.vars_in) + :return: 11 exemplary forecast plots are created + """ + method = Postprocess.plot_example_forecasts.__name__ + + metric_name = "{0}_{1}_{2}".format(self.vars_in[channel], self.model, metric) + if not metric_name in self.eval_metrics_ds: + raise ValueError("%{0}: Cannot find requested evaluation metric '{1}'".format(method, metric_name) + + " onto which selection of plotted forecast is done.") + # average metric of interest and obtain quantiles incl. indices + metric_mean = self.eval_metrics_ds[metric_name].mean(dim="fcst_hour") + quantiles = np.arange(0., 1.01, .1) + quantiles_val = metric_mean.quantile(quantiles, interpolation="nearest") + quantiles_inds = self.get_matching_indices(metric_mean.values, quantiles_val) + + for i, ifcst in enumerate(quantiles_inds): + date_init = pd.to_datetime(metric_mean.coords["init_time"][ifcst].data) + nc_fname = os.path.join(self.results_dir, "vfp_date_{0}_sample_ind_{1:d}.nc" + .format(date_init.strftime("%Y%m%d%H"), ifcst)) + if not os.path.isfile(nc_fname): + raise FileNotFoundError("%{0}: Could not find requested file '{1}'".format(method, nc_fname)) + else: + # get the data + varname = self.vars_in[channel] + with xr.open_dataset(nc_fname) as dfile: + data_fcst = dfile["{0}_{1}_fcst".format(varname, self.model)] + data_ref = dfile["{0}_ref".format(varname)] + + data_diff = data_fcst - data_ref + # name of plot + plt_fname_base = os.path.join(self.output_dir, "forecast_{0}_{1}_{2}_{3:d}percentile.png" + .format(varname, date_init.strftime("%Y%m%dT%H00"), metric, + int(quantiles[i] * 100.))) + + create_geo_contour_plot(data_fcst, data_diff, varname, plt_fname_base) + + def plot_conditional_quantiles(self): + + # release some memory + Postprocess.clean_obj_attribute(self, "eval_metrics_ds") + + # the variables for conditional quantile plot + var_fcst = self.cond_quantile_vars[0] + var_ref = self.cond_quantile_vars[1] + + data_fcst = get_era5_varatts(self.cond_quantiple_ds[var_fcst], self.cond_quantiple_ds[var_fcst].name) + data_ref = get_era5_varatts(self.cond_quantiple_ds[var_ref], self.cond_quantiple_ds[var_ref].name) + + # create plots + fhhs = data_fcst.coords["fcst_hour"] + for hh in fhhs: + # calibration refinement factorization + plt_fname_cf = os.path.join(self.results_dir, "cond_quantile_{0}_{1}_fh{2:0d}_calibration_refinement.png" + .format(self.vars_in[self.channel], self.model, int(hh))) + + quantile_panel_cf, cond_variable_cf = calculate_cond_quantiles(data_fcst.sel(fcst_hour=hh), + data_ref.sel(fcst_hour=hh), + factorization="calibration_refinement", + quantiles=(0.05, 0.5, 0.95)) + + plot_cond_quantile(quantile_panel_cf, cond_variable_cf, plt_fname_cf) + + # likelihood-base rate factorization + plt_fname_lbr = plt_fname_cf.replace("calibration_refinement", "likelihood-base_rate") + quantile_panel_lbr, cond_variable_lbr = calculate_cond_quantiles(data_fcst.sel(fcst_hour=hh), + data_ref.sel(fcst_hour=hh), + factorization="likelihood-base_rate", + quantiles=(0.05, 0.5, 0.95)) + + plot_cond_quantile(quantile_panel_lbr, cond_variable_lbr, plt_fname_lbr) + + @staticmethod + def clean_obj_attribute(obj, attr_name, lremove=False): + """ + Cleans attribute of object by setting it to None (can be used to releave memory) + :param obj: the object/ class instance + :param attr_name: the attribute from the object to be cleaned + :param lremove: flag if attribute is removed or set to None + :return: the object/class instance with the attribute's value changed to None + """ + method = Postprocess.clean_obj_attribute.__name__ + + if not hasattr(obj, attr_name): + print("%{0}: Class attribute '{1}' does not exist. Nothing to do...".format(method, attr_name)) + else: + if lremove: + delattr(obj, attr_name) + else: + setattr(obj, attr_name, None) + + return obj # auxiliary methods (not necessarily bound to class instance) @staticmethod @@ -834,7 +1058,7 @@ class Postprocess(TrainModel): # Retrieve starting index ind_first_m = list(time_pickle_first).index(np.array(t_persistence_first_m[0])) - #print("time_pickle_second:", time_pickle_second) + # print("time_pickle_second:", time_pickle_second) ind_second_m = list(time_pickle_second).index(np.array(t_persistence_second_m[0])) # append the sequence of the second month to the first month @@ -908,47 +1132,54 @@ class Postprocess(TrainModel): print("%{0}: Something unexpected happened when creating netCDF-file '1'".format(method, nc_fname)) raise err - def plot_example_forecasts(self, metric="mse", channel=0): + @staticmethod + def append_ds(ds_in: xr.Dataset, ds_preexist: xr.Dataset, varnames: list, dim2append: str, dtype=None): """ - Plots example forecasts. The forecasts are chosen from the complete pool of the test dataset and are chosen - according to the accuracy in terms of the chosen metric. In add ition, to the best and worst forecast, - every decil of the chosen metric is retrieved to cover the whole bandwith of forecasts. - :param metric: The metric which is used for measuring accuracy - :param channel: The channel index of the forecasted variable to plot (correspondong to self.vars_in) - :return: 11 exemplary forecast plots are created + Append existing datset with subset of dataset based on selected variables + :param ds_in: the input dataset from which variables should be retrieved + :param ds_preexist: the accumulator datsaet to be appended (can be initialized with None) + :param dim2append: + :param varnames: List of variables that should be retrieved from ds_in and that are appended to ds_preexist + :return: appended version of ds_preexist """ - method = Postprocess.plot_example_forecasts.__name__ - - metric_name = "{0}_{1}_{2}".format(self.vars_in[channel], self.model, metric) - if not metric_name in self.eval_metrics_ds: - raise ValueError("%{0}: Cannot find requested evaluation metric '{1}'".format(method, metric_name) + - " onto which selection of plotted forecast is done.") - # average metric of interest and obtain quantiles incl. indices - metric_mean = self.eval_metrics_ds[metric_name].mean(dim="fcst_hour") - quantiles = np.arange(0., 1.01, .1) - quantiles_val = metric_mean.quantile(quantiles, interpolation="nearest") - quantiles_inds = self.get_matching_indices(metric_mean.values, quantiles_val) - print(metric_mean.coords["init_time"]) - for i, ifcst in enumerate(quantiles_inds): - date_init = pd.to_datetime(metric_mean.coords["init_time"][ifcst].data) - nc_fname = os.path.join(self.results_dir, "vfp_date_{0}_sample_ind_{1:d}.nc" - .format(date_init.strftime("%Y%m%d%H"), ifcst)) - if not os.path.isfile(nc_fname): - raise FileNotFoundError("%{0}: Could not find requested file '{1}'".format(method, nc_fname)) - else: - # get the data - varname = self.vars_in[channel] - with xr.open_dataset(nc_fname) as dfile: - data_fcst = dfile["{0}_{1}_fcst".format(varname, self.model)] - data_ref = dfile["{0}_ref".format(varname)] + method = Postprocess.append_ds.__name__ - data_diff = data_fcst - data_ref - # name of plot - plt_fname_base = os.path.join(self.output_dir, "forecast_{0}_{1}_{2}_{3:d}percentile.png" - .format(varname, date_init.strftime("%Y%m%dT%H00"), metric, - int(quantiles[i]*100.))) + varnames_str = ",".join(varnames) + # sanity checks + if not isinstance(ds_in, xr.Dataset): + raise ValueError("%{0}: ds_in must be a xarray dataset, but is of type {1}".format(method, type(ds_in))) + + if not set(varnames).issubset(ds_in.data_vars): + raise ValueError("%{0}: Could not find all variables ({1}) in input dataset ds_in.".format(method, + varnames_str)) + #Bing : why using dtype as an aurument since it seems you only want ton configure dtype as np.double + if dtype is None: + dtype = np.double + else: + if not isinstance(dtype, type(np.double)): + raise ValueError("%{0}: dytpe must be a NumPy datatype, but is of type '{1}'".format(method, type(dtype))) + + if ds_preexist is None: + ds_preexist = ds_in[varnames].copy(deep=True) + ds_preexist = ds_preexist.astype(dtype) # change data type (if necessary) + return ds_preexist + else: + if not isinstance(ds_preexist, xr.Dataset): + raise ValueError("%{0}: ds_preexist must be a xarray dataset, but is of type {1}" + .format(method, type(ds_preexist))) + if not set(varnames).issubset(ds_preexist.data_vars): + raise ValueError("%{0}: Could not find all varibales ({1}) in pre-existing dataset ds_preexist" + .format(method, varnames_str)) + + try: + ds_preexist = xr.concat([ds_preexist, ds_in[varnames].astype(dtype)], dim2append) + except Exception as err: + print("%{0}: Failed to concat datsets along dimension {1}.".format(method, dim2append)) + print(ds_in) + print(ds_preexist) + raise err - Postprocess.create_plot(data_fcst, data_diff, varname, plt_fname_base) + return ds_preexist @staticmethod def init_metric_ds(fcst_products, eval_metrics, varname, nsamples, nlead_steps): @@ -971,7 +1202,6 @@ class Postprocess(TrainModel): return eval_metric_ds - @staticmethod def get_matching_indices(big_array, subset): """ @@ -986,151 +1216,6 @@ class Postprocess(TrainModel): return indexes - @staticmethod - def plot_avg_eval_metrics(eval_ds, eval_metrics, fcst_prod_dict, varname, out_dir): - """ - Plots error-metrics averaged over all predictions to file incl. 90%-confidence interval that is estimated by - block bootstrapping. - :param eval_ds: The dataset storing all evaluation metrics for each forecast (produced by init_metric_ds-method) - :param eval_metrics: list of evaluation metrics - :param fcst_prod_dict: dictionary of forecast products, e.g. {"persistence": "pfcst"} - :param varname: the variable name for which the evaluation metrics are available - :param out_dir: output directory to save the lots - :return: a bunch of plots as png-files - """ - method = Postprocess.plot_avg_eval_metrics.__name__ - - # settings for block bootstrapping - # sanity checks - if not isinstance(eval_ds, xr.Dataset): - raise ValueError("%{0}: Argument 'eval_ds' must be a xarray dataset.".format(method)) - - if not isinstance(fcst_prod_dict, dict): - raise ValueError("%{0}: Argument 'fcst_prod_dict' must be dictionary with short names of forecast product" + - "as key and long names as value.".format(method)) - - try: - nhours = np.shape(eval_ds.coords["fcst_hour"])[0] - except Exception as err: - print("%{0}: Input argument 'eval_ds' appears to be unproper.".format(method)) - raise err - - nmodels = len(fcst_prod_dict.values()) - colors = ["blue", "red", "black", "grey"] - for metric in eval_metrics: - # create a new figure object - fig = plt.figure(figsize=(6, 4)) - ax = plt.axes([0.1, 0.15, 0.75, 0.75]) - hours = np.arange(1, nhours+1) - - for ifcst, fcst_prod in enumerate(fcst_prod_dict.keys()): - metric_name = "{0}_{1}_{2}".format(varname, fcst_prod, metric) - try: - metric2plt = eval_ds[metric_name+"_avg"] - metric_boot = eval_ds[metric_name+"_bootstrapped"] - except Exception as err: - print("%{0}: Could not retrieve {1} and/or {2} from evaluation metric dataset." - .format(method, metric_name, metric_name+"_boot")) - raise err - # plot the data - metric2plt_min = metric_boot.quantile(0.05, dim="iboot") - metric2plt_max = metric_boot.quantile(0.95, dim="iboot") - plt.plot(hours, metric2plt, label=fcst_prod, color=colors[ifcst], marker="o") - plt.fill_between(hours, metric2plt_min, metric2plt_max, facecolor=colors[ifcst], alpha=0.3) - # configure plot - plt.xticks(hours) - # automatic y-limits for PSNR wich can be negative and positive - if metric != "psnr": ax.set_ylim(0., None) - legend = ax.legend(loc="upper right", bbox_to_anchor=(1.15, 1)) - ax.set_xlabel("Lead time [hours]") - ax.set_ylabel(metric.upper()) - plt_fname = os.path.join(out_dir, "evaluation_{0}".format(metric)) - print("Saving basic evaluation plot in terms of {1} to '{2}'".format(method, metric, plt_fname)) - plt.savefig(plt_fname) - - plt.close() - - return True - - @staticmethod - def create_plot(data, data_diff, varname, plt_fname): - """ - Creates filled contour plot of forecast data and also draws contours for differences. - ML: So far, only plotting of the 2m temperature is supported (with 12 predicted hours/frames) - :param data: the forecasted data array to be plotted - :param data_diff: the reference data ('ground truth') - :param varname: the name of the variable - :param plt_fname: the filename to the store the plot - :return: - - """ - method = Postprocess.create_plot.__name__ - - try: - coords = data.coords - # handle coordinates and forecast times - lat, lon = coords["lat"], coords["lon"] - date0 = pd.to_datetime(coords["init_time"].data) - fhhs = coords["fcst_hour"].data - except Exception as err: - print("%{0}: Could not retrieve expected coordinates lat, lon and time_forecast from data.".format(method)) - raise err - - lons, lats = np.meshgrid(lon, lat) - - date0_str = date0.strftime("%Y-%m-%d %H:%M UTC") - - # check data to be plotted since programme is not generic so far - if np.shape(fhhs)[0] != 12: - raise ValueError("%{0}: Currently, only 12 hour forecast can be handled properly.".format(method)) - - if varname != "2t": - raise ValueError("%{0}: Currently, only 2m temperature is plotted nicely properly.".format(method)) - - # define levels - clevs = np.arange(-10., 40., 1.) - clevs_diff = np.arange(0.5, 10.5, 2.) - clevs_diff2 = np.arange(-10.5, -0.5, 2.) - - # create fig and subplot axes - fig, axes = plt.subplots(2, 6, sharex=True, sharey=True, figsize=(12, 6)) - axes = axes.flatten() - - # create all subplots - for t, fhh in enumerate(fhhs): - m = Basemap(projection='cyl', llcrnrlat=np.min(lat), urcrnrlat=np.max(lat), - llcrnrlon=np.min(lon), urcrnrlon=np.max(lon), resolution='l', ax=axes[t]) - m.drawcoastlines() - x, y = m(lons, lats) - if t%6 == 0: - lat_lab = [1, 0, 0, 0] - axes[t].set_ylabel(u'Latitude', labelpad=30) - else: - lat_lab = list(np.zeros(4)) - if t/6 >= 1: - lon_lab = [0, 0, 0, 1] - axes[t].set_xlabel(u'Longitude', labelpad=15) - else: - lon_lab = list(np.zeros(4)) - m.drawmapboundary() - m.drawparallels(np.arange(0, 90, 5),labels=lat_lab, xoffset=1.) - m.drawmeridians(np.arange(5, 355, 10),labels=lon_lab, yoffset=1.) - cs = m.contourf(x, y, data.isel(fcst_hour=t)-273.15, clevs, cmap=plt.get_cmap("jet"), ax=axes[t], - extend="both") - cs_c_pos = m.contour(x, y, data_diff.isel(fcst_hour=t), clevs_diff, linewidths=0.5, ax=axes[t], - colors="black") - cs_c_neg = m.contour(x, y, data_diff.isel(fcst_hour=t), clevs_diff2, linewidths=1, linestyles="dotted", - ax=axes[t], colors="black") - axes[t].set_title("{0} +{1:02d}:00".format(date0_str, int(fhh)), fontsize=7.5, pad=4) - - fig.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=-0.7, - wspace=0.05) - # add colorbar. - cbar_ax = fig.add_axes([0.3, 0.22, 0.4, 0.02]) - cbar = fig.colorbar(cs, cax=cbar_ax, orientation="horizontal") - cbar.set_label('°C') - # save to disk - plt.savefig(plt_fname, bbox_inches="tight") - def main(): parser = argparse.ArgumentParser() @@ -1142,10 +1227,12 @@ def main(): help='mode for dataset, val or test.') parser.add_argument("--batch_size", type=int, default=8, help="number of samples in batch") parser.add_argument("--num_stochastic_samples", type=int, default=1) - parser.add_argument("--stochastic_plot_id", type=int, default=0, - help="The stochastic generate images index to plot") parser.add_argument("--gpu_mem_frac", type=float, default=0.95, help="fraction of gpu memory to use") parser.add_argument("--seed", type=int, default=7) + parser.add_argument("--evaluation_metrics", "-eval_metrics", dest="eval_metrics", nargs="+", default=("mse", "psnr", "ssim","acc"), + help="Metrics to be evaluate the trained model. Must be known metrics, see Scores-class.") + parser.add_argument("--channel", "-channel", dest="channel", type=int, default=0, + help="Channel which is used for evaluation.") args = parser.parse_args() print('----------------------------------- Options ------------------------------------') @@ -1153,16 +1240,16 @@ def main(): print(k, "=", v) print('------------------------------------- End --------------------------------------') - # ML: test_instance is a bit misleading here - test_instance = Postprocess(results_dir=args.results_dir, checkpoint=args.checkpoint, mode="test", - batch_size=args.batch_size, num_stochastic_samples=args.num_stochastic_samples, - gpu_mem_frac=args.gpu_mem_frac, seed=args.seed, - stochastic_plot_id=args.stochastic_plot_id, args=args) - - test_instance() - test_instance.run() - test_instance.handle_eval_metrics() - test_instance.plot_example_forecasts(metric="mse") + # initialize postprocessing instance + postproc_instance = Postprocess(results_dir=args.results_dir, checkpoint=args.checkpoint, mode="test", + batch_size=args.batch_size, num_stochastic_samples=args.num_stochastic_samples, + gpu_mem_frac=args.gpu_mem_frac, seed=args.seed, args=args, + eval_metrics=args.eval_metrics, channel=args.channel) + # run the postprocessing + postproc_instance.run() + postproc_instance.handle_eval_metrics() + postproc_instance.plot_example_forecasts(metric=args.eval_metrics[0], channel=args.channel) + postproc_instance.plot_conditional_quantiles() if __name__ == '__main__': diff --git a/video_prediction_tools/model_modules/video_prediction/metrics.py b/video_prediction_tools/model_modules/video_prediction/metrics.py index 61c13e91b74f1f53f01ab03fa65467aafb5844b2..253dfbbd0ad5881026f2e847064593f5f6b62119 100644 --- a/video_prediction_tools/model_modules/video_prediction/metrics.py +++ b/video_prediction_tools/model_modules/video_prediction/metrics.py @@ -1,7 +1,9 @@ import tensorflow as tf #import lpips_tf -import numpy as np import math +import numpy as np +from skimage.measure import compare_ssim as ssim_ski + def mse(a, b): return tf.reduce_mean(tf.squared_difference(a, b), [-3, -2, -1]) @@ -23,6 +25,7 @@ def psnr_imgs(img1, img2, pixel_max=1.): def mse_imgs(image1,image2): mse = ((image1 - image2)**2).mean(axis=None) return mse + # def lpips(input0, input1): # if input0.shape[-1].value == 1: # input0 = tf.tile(input0, [1] * (input0.shape.ndims - 1) + [3]) @@ -32,9 +35,30 @@ def mse_imgs(image1,image2): # distance = lpips_tf.lpips(input0, input1) # return -distance -def ssim_images(image1,image2): +def ssim_images(image1, image2): """ - + Reference for calculating ssim Numpy impelmeentation for ssim https://cvnote.ddlee.cc/2019/09/12/psnr-ssim-python + https://scikit-image.org/docs/dev/auto_examples/transform/plot_ssim.html + :param image1 the reference images + :param image2 the predicte images """ - pass + ssim_pred = ssim_ski(image1, image2, + data_range = image2.max() - image2.min()) + return ssim_pred + +def acc_imgs(image1,image2,clim): + """ + Reference for calculating acc + :param image1 the reference images ?? single image or batch_size images? + :param image2 the predicte images + :param clim the climatology images + """ + img1_ = image1-clim + img2_ = image2-clim + cor1 = np.sum(img1_*img2_) + cor2 = np.sqrt(np.sum(img1_**2)*np.sum(img2_**2)) + acc = cor1/cor2 + return acc + + diff --git a/video_prediction_tools/postprocess/postprocess_plotting.py b/video_prediction_tools/postprocess/postprocess_plotting.py new file mode 100644 index 0000000000000000000000000000000000000000..27758b75599ce83b944a91caed8b5a13c0631a43 --- /dev/null +++ b/video_prediction_tools/postprocess/postprocess_plotting.py @@ -0,0 +1,255 @@ +""" +Collection of functions to create plots +""" + +__email__ = "b.gong@fz-juelich.de" +__author__ = "Michael Langguth" +__date__ = "2021-05-27" + +import os +import numpy as np +import pandas as pd +import xarray as xr +# for plotting +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from mpl_toolkits.basemap import Basemap +from general_utils import provide_default + + +def plot_cond_quantile(quantile_panel: xr.DataArray, data_marginal: xr.DataArray, plt_fname: str, opt: dict = None): + """ + Creates conditional quantile plot + :param quantile_panel: quantile panel created by calculate_cond_quantiles + :param data_marginal: data array for which histogram will be plotted + :param plt_fname: name of the plot-file to be created + :param opt: options + :return: + """ + + method = plot_cond_quantile.__name__ + + if not isinstance(quantile_panel, xr.DataArray): + raise ValueError("%{0}: quantile_panel must be a DataArray".format(method)) + + if not isinstance(data_marginal, xr.DataArray): + raise ValueError("%{0}: data_marginal must be a DataArray".format(method)) + + if list(quantile_panel.coords) != ["bin_center", "quantile"]: + raise ValueError("%{0}: The coordinates of quantile_panel must be ['bin_center', 'quantile']".format(method)) + + if opt is None: + opt = {} + + print("%{0}: Start creating conditional quantile plot in file '{1}'".format(method, plt_fname)) + + bins_c = quantile_panel["bin_center"] + bin_width = bins_c[1] - bins_c[0] + bins = np.arange(bins_c[0]-bin_width/2., bins_c[-1]+1.5*bin_width/2, bin_width) + quantiles = quantile_panel["quantile"] + nquantiles = len(quantiles) + if nquantiles%2 != 1: + raise ValueError("%{0}: Number of quantiles must be odd.".format(method)) + + ls_all = get_ls_mirrored(int(nquantiles/2)) + lw_all = list(np.full(nquantiles, 2.)) + lw_all[int(nquantiles/2)] = 1.5 + + # start plotting + figsize = provide_default(opt, "figsize", (12, 6)) + fs_title = provide_default(opt, "fs_axis_title", 16) + fs_label = provide_default(opt, "fs_axis_label", fs_title-2) + plt_title = provide_default(opt, "plt_title", "") + fig, ax = plt.subplots(figsize=figsize) + + # plot reference line + ax.plot(bins_c, bins_c, color='k', label='reference 1:1', linewidth=1.) + # plot conditional quantiles + for iq in np.arange(nquantiles): + ax.plot(bins_c, quantile_panel.isel(quantile=iq), ls=ls_all[iq], color="k", lw=lw_all[iq], + label="{0:d}th quantile".format(int(quantiles[iq]*100.))) + # plot histogram of marginal distribution + ax2 = ax.twinx() + xr.plot.hist(data_marginal, ax=ax2, bins=bins, color="k", alpha=0.3) + ax2.set_yscale("log") + + xlabel = "{0} [{1}]".format(provide_default(quantile_panel.attrs, "cond_var_name", "conditiong variable"), + provide_default(quantile_panel.attrs, "cond_var_unit", "unknown")) + ylabel = "{0} [{1}]".format(provide_default(quantile_panel.attrs, "tar_var_name", "target variable"), + provide_default(quantile_panel.attrs, "tar_var_unit", "unknown")) + + ax.set_ylabel(ylabel, fontsize=fs_title) + ax2.set_ylabel("counts", fontsize=fs_title) + ax.set_xlabel(xlabel, fontsize=fs_title) + # ensure that histogram extends to the lower half of the plot + y2_max_power = int(np.log10(ax2.get_ylim()[1])) + ax2.set(ylim=(1.e00, np.power(10, y2_max_power*4)), yticks=np.logspace(0, y2_max_power+1, y2_max_power+2)) + ax2.set_title(plt_title) + + ax.tick_params(axis="both", labelsize=fs_label) + ax2.tick_params(axis="both", labelsize=fs_label) + + fig.savefig(plt_fname) + plt.close("all") + + +def plot_avg_eval_metrics(eval_ds, eval_metrics, fcst_prod_dict, varname, out_dir): + """ + Plots error-metrics averaged over all predictions to file incl. 90%-confidence interval that is estimated by + block bootstrapping. + :param eval_ds: The dataset storing all evaluation metrics for each forecast (produced by init_metric_ds-method) + :param eval_metrics: list of evaluation metrics + :param fcst_prod_dict: dictionary of forecast products, e.g. {"persistence": "pfcst"} + :param varname: the variable name for which the evaluation metrics are available + :param out_dir: output directory to save the lots + :return: a bunch of plots as png-files + """ + method = plot_avg_eval_metrics.__name__ + + # settings for block bootstrapping + # sanity checks + if not isinstance(eval_ds, xr.Dataset): + raise ValueError("%{0}: Argument 'eval_ds' must be a xarray dataset.".format(method)) + + if not isinstance(fcst_prod_dict, dict): + raise ValueError("%{0}: Argument 'fcst_prod_dict' must be dictionary with short names of forecast product" + + "as key and long names as value.".format(method)) + + try: + nhours = np.shape(eval_ds.coords["fcst_hour"])[0] + except Exception as err: + print("%{0}: Input argument 'eval_ds' appears to be unproper.".format(method)) + raise err + + nmodels = len(fcst_prod_dict.values()) + colors = ["blue", "red", "black", "grey"] + for metric in eval_metrics: + # create a new figure object + fig = plt.figure(figsize=(6, 4)) + ax = plt.axes([0.1, 0.15, 0.75, 0.75]) + hours = np.arange(1, nhours + 1) + + for ifcst, fcst_prod in enumerate(fcst_prod_dict.keys()): + metric_name = "{0}_{1}_{2}".format(varname, fcst_prod, metric) + try: + metric2plt = eval_ds[metric_name + "_avg"] + metric_boot = eval_ds[metric_name + "_bootstrapped"] + except Exception as err: + print("%{0}: Could not retrieve {1} and/or {2} from evaluation metric dataset." + .format(method, metric_name, metric_name + "_boot")) + raise err + # plot the data + metric2plt_min = metric_boot.quantile(0.05, dim="iboot") + metric2plt_max = metric_boot.quantile(0.95, dim="iboot") + plt.plot(hours, metric2plt, label=fcst_prod, color=colors[ifcst], marker="o") + plt.fill_between(hours, metric2plt_min, metric2plt_max, facecolor=colors[ifcst], alpha=0.3) + # configure plot + plt.xticks(hours) + # automatic y-limits for PSNR wich can be negative and positive + if metric != "psnr": ax.set_ylim(0., None) + legend = ax.legend(loc="upper right", bbox_to_anchor=(1.35, 1)) + ax.set_xlabel("Lead time [hours]") + ax.set_ylabel(metric.upper()) + plt_fname = os.path.join(out_dir, "evaluation_{0}".format(metric)) + print("Saving basic evaluation plot in terms of {1} to '{2}'".format(method, metric, plt_fname)) + plt.savefig(plt_fname,bbox_inches="tight") + + plt.close() + + return True + + +def create_geo_contour_plot(data, data_diff, varname, plt_fname): + """ + Creates filled contour plot of forecast data and also draws contours for differences. + ML: So far, only plotting of the 2m temperature is supported (with 12 predicted hours/frames) + :param data: the forecasted data array to be plotted + :param data_diff: the reference data ('ground truth') + :param varname: the name of the variable + :param plt_fname: the filename to the store the plot + :return: - + """ + method = create_geo_contour_plot.__name__ + + try: + coords = data.coords + # handle coordinates and forecast times + lat, lon = coords["lat"], coords["lon"] + date0 = pd.to_datetime(coords["init_time"].data) + fhhs = coords["fcst_hour"].data + except Exception as err: + print("%{0}: Could not retrieve expected coordinates lat, lon and time_forecast from data.".format(method)) + raise err + + lons, lats = np.meshgrid(lon, lat) + + date0_str = date0.strftime("%Y-%m-%d %H:%M UTC") + + # check data to be plotted since programme is not generic so far + if np.shape(fhhs)[0] != 12: + raise ValueError("%{0}: Currently, only 12 hour forecast can be handled properly.".format(method)) + + if varname != "2t": + raise ValueError("%{0}: Currently, only 2m temperature is plotted nicely properly.".format(method)) + + # define levels + clevs = np.arange(-10., 40., 1.) + clevs_diff = np.arange(0.5, 10.5, 2.) + clevs_diff2 = np.arange(-10.5, -0.5, 2.) + + # create fig and subplot axes + fig, axes = plt.subplots(2, 6, sharex=True, sharey=True, figsize=(12, 6)) + axes = axes.flatten() + + # create all subplots + for t, fhh in enumerate(fhhs): + m = Basemap(projection='cyl', llcrnrlat=np.min(lat), urcrnrlat=np.max(lat), + llcrnrlon=np.min(lon), urcrnrlon=np.max(lon), resolution='l', ax=axes[t]) + m.drawcoastlines() + x, y = m(lons, lats) + if t % 6 == 0: + lat_lab = [1, 0, 0, 0] + axes[t].set_ylabel(u'Latitude', labelpad=30) + else: + lat_lab = list(np.zeros(4)) + if t / 6 >= 1: + lon_lab = [0, 0, 0, 1] + axes[t].set_xlabel(u'Longitude', labelpad=15) + else: + lon_lab = list(np.zeros(4)) + m.drawmapboundary() + m.drawparallels(np.arange(0, 90, 5), labels=lat_lab, xoffset=1.) + m.drawmeridians(np.arange(5, 355, 10), labels=lon_lab, yoffset=1.) + cs = m.contourf(x, y, data.isel(fcst_hour=t) - 273.15, clevs, cmap=plt.get_cmap("jet"), ax=axes[t], + extend="both") + cs_c_pos = m.contour(x, y, data_diff.isel(fcst_hour=t), clevs_diff, linewidths=0.5, ax=axes[t], + colors="black") + cs_c_neg = m.contour(x, y, data_diff.isel(fcst_hour=t), clevs_diff2, linewidths=1, linestyles="dotted", + ax=axes[t], colors="black") + axes[t].set_title("{0} +{1:02d}:00".format(date0_str, int(fhh)), fontsize=7.5, pad=4) + + fig.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=-0.7, + wspace=0.05) + # add colorbar. + cbar_ax = fig.add_axes([0.3, 0.22, 0.4, 0.02]) + cbar = fig.colorbar(cs, cax=cbar_ax, orientation="horizontal") + cbar.set_label('°C') + # save to disk + plt.savefig(plt_fname, bbox_inches="tight") + + +# auxiliary functions +def get_ls_mirrored(n, ls_base=("--", ":")): + + nls_base = len(ls_base) + lss = [] + for ilw in np.arange(n): + if ilw < nls_base: + lss.append(ls_base[ilw]) + else: + lss.append("-") + + lss = lss + ["-"] + lss[::-1] + + return lss diff --git a/video_prediction_tools/postprocess/statistical_evaluation.py b/video_prediction_tools/postprocess/statistical_evaluation.py index d91b12cea79305f09fb7f8eaa3cbd00421da3d65..7ed9091d2c0a1684c579d891d057d46fee94eb1d 100644 --- a/video_prediction_tools/postprocess/statistical_evaluation.py +++ b/video_prediction_tools/postprocess/statistical_evaluation.py @@ -1,18 +1,100 @@ -from typing import Union, Tuple, Dict, List +""" +Collection of auxiliary functions for statistical evaluation and class for Score-functions +""" + +__email__ = "b.gong@fz-juelich.de" +__author__ = "Michael Langguth" +__date__ = "2021-05-xx" + import numpy as np import xarray as xr -import pandas as pd from typing import Union, List +from skimage.measure import compare_ssim as ssim +import datetime +import pandas as pd try: from tqdm import tqdm l_tqdm = True except: l_tqdm = False +from general_utils import provide_default # basic data types da_or_ds = Union[xr.DataArray, xr.Dataset] +def calculate_cond_quantiles(data_fcst: xr.DataArray, data_ref: xr.DataArray, factorization="calibration_refinement", + quantiles=(0.05, 0.5, 0.95)): + """ + Calculate conditional quantiles of forecast and observation/reference data with selected factorization + :param data_fcst: forecast data array + :param data_ref: observational/reference data array + :param factorization: factorization: "likelihood-base_rate" p(m|o) or "calibration_refinement" p(o|m)-> default + :param quantiles: conditional quantiles + :return quantile_panel: conditional quantiles of p(m|o) or p(o|m) + """ + method = calculate_cond_quantiles.__name__ + + # sanity checks + if not isinstance(data_fcst, xr.DataArray): + raise ValueError("%{0}: data_fcst must be a DataArray.".format(method)) + + if not isinstance(data_ref, xr.DataArray): + raise ValueError("%{0}: data_ref must be a DataArray.".format(method)) + + if not (list(data_fcst.coords) == list(data_ref.coords) and list(data_fcst.dims) == list(data_ref.dims)): + raise ValueError("%{0}: Coordinates and dimensions of data_fcst and data_ref must be the same".format(method)) + + nquantiles = len(quantiles) + if not nquantiles >= 3: + raise ValueError("%{0}: quantiles must be a list/tuple of at least three float values ([0..1])".format(method)) + + if factorization == "calibration_refinement": + data_cond = data_fcst + data_tar = data_ref + elif factorization == "likelihood-base_rate": + data_cond = data_ref + data_tar = data_fcst + else: + raise ValueError("%{0}: Choose either 'calibration_refinement' or 'likelihood-base_rate' for factorization" + .format(method)) + + # get and set some basic attributes + data_cond_longname = provide_default(data_cond.attrs, "longname", "conditioning_variable") + data_cond_unit = provide_default(data_cond.attrs, "unit", "unknown") + + data_tar_longname = provide_default(data_tar.attrs, "longname", "target_variable") + data_tar_unit = provide_default(data_cond.attrs, "unit", "unknown") + + # get bins for conditioning + data_cond_min, data_cond_max = np.floor(np.min(data_cond)), np.ceil(np.max(data_cond)) + bins = list(np.arange(int(data_cond_min), int(data_cond_max) + 1)) + bins_c = 0.5 * (np.asarray(bins[0:-1]) + np.asarray(bins[1:])) + nbins = len(bins) - 1 + + # get all possible bins from target and conditioning variable + data_all_min, data_all_max = np.minimum(data_cond_min, np.floor(np.min(data_tar))),\ + np.maximum(data_cond_max, np.ceil(np.max(data_tar))) + bins_all = list(np.arange(int(data_all_min), int(data_all_max) + 1)) + bins_c_all = 0.5 * (np.asarray(bins_all[0:-1]) + np.asarray(bins_all[1:])) + # initialize quantile data array + quantile_panel = xr.DataArray(np.full((len(bins_c_all), nquantiles), np.nan), + coords={"bin_center": bins_c_all, "quantile": list(quantiles)}, + dims=["bin_center", "quantile"], + attrs={"cond_var_name": data_cond_longname, "cond_var_unit": data_cond_unit, + "tar_var_name": data_tar_longname, "tar_var_unit": data_tar_unit}) + + print("%{0}: Start caclulating conditional quantiles for all {1:d} bins.".format(method, nbins)) + # fill the quantile data array + for i in np.arange(nbins): + # conditioning of ground truth based on forecast + data_cropped = data_tar.where(np.logical_and(data_cond >= bins[i], data_cond < bins[i + 1])) + # quantile-calculation + quantile_panel.loc[dict(bin_center=bins_c[i])] = data_cropped.quantile(quantiles) + + return quantile_panel, data_cond + + def avg_metrics(metric: da_or_ds, dim_name: str): """ Averages metric over given dimension @@ -120,18 +202,17 @@ class Scores: Class to calculate scores and skill scores. """ - known_scores = ["mse", "psnr"] + known_scores = ["mse", "psnr","ssim", "acc"] def __init__(self, score_name: str, dims: List[str]): """ Initialize score instance. - :param score_name: name of score taht is queried + :param score_name: name of score that is queried :param dims: list of dimension over which the score shall operate :return: Score instance """ method = Scores.__init__.__name__ - - self.metrics_dict = {"mse": self.calc_mse_batch , "psnr": self.calc_psnr_batch} + self.metrics_dict = {"mse": self.calc_mse_batch , "psnr": self.calc_psnr_batch, "ssim":self.calc_ssim_batch, "acc":self.calc_acc_batch} if set(self.metrics_dict.keys()) != set(Scores.known_scores): raise ValueError("%{0}: Known scores must coincide with keys of metrics_dict.".format(method)) self.score_name = self.set_score_name(score_name) @@ -139,23 +220,25 @@ class Scores: # attributes set when run_calculation is called self.avg_dims = dims - def run_calculation(self, model_data, ref_data, dims2avg=None, **kwargs): - - method = Scores.run_calculation.__name__ - - model_data, ref_data = Scores.set_model_and_ref_data(model_data, ref_data, dims2avg=dims2avg) - - try: - if self.avg_dims is None: - result = self.score_func(model_data, ref_data, **kwargs) - else: - result = self.score_func(model_data, ref_data, **kwargs) - except Exception as err: - print("%{0}: Calculation of '{1}' was not successful. Inspect error message!".format(method, - self.score_name)) - raise err - - return result + # ML 2021-06-10: The following method is not runnable and yet, it is unclear if it is needed at all. + # Thus, it is commented out for potential later use (in case that it won't be discarded). + # def run_calculation(self, model_data, ref_data, dims2avg=None, **kwargs): + # + # method = Scores.run_calculation.__name__ + # + # model_data, ref_data = Scores.set_model_and_ref_data(model_data, ref_data, dims2avg=dims2avg) + # + # try: + # # if self.avg_dims is None: + # result = self.score_func(model_data, ref_data, **kwargs) + # # else: + # # result = self.score_func(model_data, ref_data, **kwargs) + # except Exception as err: + # print("%{0}: Calculation of '{1}' was not successful. Inspect error message!".format(method, + # self.score_name)) + # raise err + # + # return result def set_score_name(self, score_name): @@ -174,7 +257,7 @@ class Scores: Calculate mse of forecast data w.r.t. reference data :param data_fcst: forecasted data (xarray with dimensions [batch, lat, lon]) :param data_ref: reference data (xarray with dimensions [batch, lat, lon]) - :return: averaged mse for each batch example + :return: averaged mse for each batch example, [batch,fore_hours] """ method = Scores.calc_mse_batch.__name__ @@ -193,12 +276,12 @@ class Scores: def calc_psnr_batch(self, data_fcst, data_ref, **kwargs): """ - Calculate mse of forecast data w.r.t. reference data - :param data_fcst: forecasted data (xarray with dimensions [batch, lat, lon]) - :param data_ref: reference data (xarray with dimensions [batch, lat, lon]) - :return: averaged mse for each batch example + Calculate psnr of forecast data w.r.t. reference data + :param data_fcst: forecasted data (xarray with dimensions [batch,fore_hours, lat, lon]) + :param data_ref: reference data (xarray with dimensions [batch, fore_hours, lat, lon]) + :return: averaged psnr for each batch example [batch, fore_hours] """ - method = Scores.calc_mse_batch.__name__ + method = Scores.calc_psnr_batch.__name__ if "pixel_max" in kwargs: pixel_max = kwargs.get("pixel_max") @@ -214,4 +297,60 @@ class Scores: return psnr + def calc_ssim_batch(self, data_fcst, data_ref, **kwargs): + """ + Calculate ssim ealuation metric of forecast data w.r.t reference data + :param data_fcst: forecasted data (xarray with dimensions [batch, fore_hours, lat, lon]) + :param data_ref: reference data (xarray with dimensions [batch, fore_hours, lat, lon]) + :return: averaged ssim for each batch example, shape is [batch,fore_hours] + """ + method = Scores.calc_ssim_batch.__name__ + batch_size = np.array(data_ref).shape[0] + fore_hours = np.array(data_fcst).shape[1] + ssim_pred = [[ssim(data_ref[i,j,:,:],data_fcst[i,j,:,:]) for j in range(fore_hours)] for i in range(batch_size)] + return ssim_pred + + def calc_acc_batch(self, data_fcst, data_ref, **kwargs): + """ + Calculate acc ealuation metric of forecast data w.r.t reference data + :param data_fcst: forecasted data (xarray with dimensions [batch, fore_hours, lat, lon]) + :param data_ref: reference data (xarray with dimensions [batch, fore_hours, lat, lon]) + :param data_clim: climatology data (xarray with dimensions [monthly, hourly, lat, lon]) + :return: averaged acc for each batch example [batch, fore_hours] + """ + method = Scores.calc_acc_batch.__name__ + if "data_clim" in kwargs: + data_clim = kwargs["data_clim"] + else: + raise KeyError("%{0}: climatological data must be parsed to calculate the ACC.".format(method)) + + #print(data_fcst) + #print('data_clim shape: ',data_clim.shape) + batch_size = data_fcst.shape[0] + fore_hours = data_fcst.shape[1] + #print('batch_size: ',batch_size) + #print('fore_hours: ',fore_hours) + acc = np.ones([batch_size,fore_hours])*np.nan + for i in range(batch_size): + for j in range(fore_hours): + img_fcst = data_fcst[i,j,:,:] + img_ref = data_ref[i,j,:,:] + # get the forecast time + print('img_fcst.init_time: ',img_fcst.init_time) + fcst_time = xr.Dataset({'time': pd.to_datetime(img_fcst.init_time.data) + datetime.timedelta(hours=j)}) + print('fcst_time: ',fcst_time.time) + img_month = fcst_time.time.dt.month + img_hour = fcst_time.time.dt.hour + img_clim = data_clim.sel(month=img_month, hour=img_hour) + + ### HAVE TO SELECT FORM CLIMATE DATA DIRECTLY; done + #time_idx = (img_month-1)*24+img_hour + #img_clim = data_clim[time_idx,:,:] + + img1_ = img_ref - img_clim + img2_ = img_fcst - img_clim + cor1 = np.sum(img1_*img2_) + cor2 = np.sqrt(np.sum(img1_**2)*np.sum(img2_**2)) + acc[i,j] = cor1/cor2 + return acc diff --git a/video_prediction_tools/utils/general_utils.py b/video_prediction_tools/utils/general_utils.py index bc42307c4c1afb36c9c57d34c82c4a2fc2551cd6..5d0fb397ef4be4d9c1e8bd3aad6d80bdc1a9925b 100644 --- a/video_prediction_tools/utils/general_utils.py +++ b/video_prediction_tools/utils/general_utils.py @@ -1,14 +1,19 @@ """ Some auxilary routines which may are used throughout the project. Provides: * get_unique_vars - * - + * add_str_to_path + * is_integer + * isw + * check_str_in_list + * check_dir + * provide_default + * get_era5_atts """ # import modules import os -import sys import numpy as np +import xarray as xr # routines def get_unique_vars(varnames): @@ -19,7 +24,7 @@ def get_unique_vars(varnames): vars_uni, varsind = np.unique(varnames, return_index=True) nvars_uni = len(vars_uni) - return (vars_uni, varsind, nvars_uni) + return vars_uni, varsind, nvars_uni def add_str_to_path(path_in, add_str): @@ -36,14 +41,14 @@ def add_str_to_path(path_in, add_str): if (not line_str.endswith(add_str)) or \ (not line_str.endswith(add_str.rstrip("/"))): - line_str = line_str + add_str + "/" + line_str = "{0}{1}/".format(line_str, add_str) else: - print(add_str + " is already part of " + line_str + ". No change is performed.") + print("{0} is already part of {1}. No change is performed.".format(add_str, line_str)) if l_linebreak: # re-add carriage return to string if required - return (line_str + "\n") + return "{0} \n".format(line_str) else: - return (line_str) + return line_str def is_integer(n): @@ -94,6 +99,7 @@ def check_str_in_list(list_in, str2check, labort=True): :param str2check: string or list of strings to be checked if they are part of list_in :return: True if existence of all strings was confirmed """ + method = check_str_in_list.__name__ stat = False if isinstance(str2check, str): @@ -102,18 +108,106 @@ def check_str_in_list(list_in, str2check, labort=True): assert np.all([isinstance(str1, str) for str1 in str2check]) == True, \ "Not all elements of str2check are strings" else: - raise ValueError("str2check argument must be either a string or a list of strings") + raise ValueError("%{0}: str2check argument must be either a string or a list of strings".format(method)) stat_element = [True if str1 in list_in else False for str1 in str2check] if not np.all(stat_element): - print("The following elements are not part of the input list:") + print("%{0}: The following elements are not part of the input list:".format(method)) inds_miss = np.where(stat_element)[0] for i in inds_miss: print("* index {0:d}: {1}".format(i, str2check[i])) if labort: - raise ValueError("Could not find all expected strings in list.") + raise ValueError("%{0}: Could not find all expected strings in list.".format(method)) else: stat = True return stat + + +def check_dir(path2dir: str, lcreate=False): + """ + Checks if path2dir exists and create it if desired + :param path2dir: + :param lcreate: create directory if it is not existing + :return: True in case of success + """ + method = check_dir.__name__ + + if (path2dir is None) or not isinstance(path2dir, str): + raise ValueError("%{0}: path2dir must be a string defining a pat to a directory.".format(method)) + + elif os.path.isdir(path2dir): + return True + else: + if lcreate: + try: + os.makedirs(path2dir) + except Exception as err: + print("%{0}: Failed to create directory '{1}'".format(method, path2dir)) + raise err + print("%{0}: Created directory '{1}'".format(method, path2dir)) + return True + else: + raise NotADirectoryError("%{0}: Directory '{1}' does not exist".format(method, path2dir)) + + +def provide_default(dict_in, keyname, default=None, required=False): + """ + Returns values of key from input dictionary or alternatively its default + + :param dict_in: input dictionary + :param keyname: name of key which should be added to dict_in if it is not already existing + :param default: default value of key (returned if keyname is not present in dict_in) + :param required: Forces existence of keyname in dict_in (otherwise, an error is returned) + :return: value of requested key or its default retrieved from dict_in + """ + method = provide_default.__name__ + + if not required and default is None: + raise ValueError("%{0}: Provide default when existence of key in dictionary is not required.".format(method)) + + if keyname not in dict_in.keys(): + if required: + print(dict_in) + raise ValueError("%{0}: Could not find '{1}' in input dictionary.".format(method, keyname)) + return default + else: + return dict_in[keyname] + + +def get_era5_varatts(data_arr: xr.DataArray, name: str): + """ + Writes longname and unit to data arrays given their name is known + :param data_arr: the data array + :param name: the name of the variable + :return: data array with added attributes 'longname' and 'unit' if they are known + """ + + era5_varname_map = {"2t": "2m temperature", "t_850": "850 hPa temperature", "tcc": "total cloud cover", + "msl": "mean sealevel pressure", "10u": "10m u-wind", "10v": "10m v-wind"} + era5_varunit_map = {"2t": "K", "t_850": "K", "tcc": "%", + "msl": "Pa", "10u": "m/s", "10v": "m/s"} + + name_splitted = name.split("_") + if "fcst" in name: + addstr = "from {0} model".format(name_splitted[1]) + elif "ref" in name: + addstr = "from ERA5 reanalysis" + else: + addstr = "" + + longname = provide_default(era5_varname_map, name_splitted[0], -1) + if longname == -1: + pass + else: + data_arr.attrs["longname"] = "{0} {1}".format(longname, addstr) + + unit = provide_default(era5_varunit_map, name_splitted[0], -1) + if unit == -1: + pass + else: + data_arr.attrs["unit"] = unit + + return data_arr + diff --git a/video_prediction_tools/utils/metadata.py b/video_prediction_tools/utils/metadata.py index 8df45d8436e0003b894a0d897cf5fee7ad4436fd..98c096f36a16c83f97e6a15867537c4b29d7aee1 100644 --- a/video_prediction_tools/utils/metadata.py +++ b/video_prediction_tools/utils/metadata.py @@ -9,6 +9,7 @@ __date__ = "2020-xx-xx" import os import sys import time +import xarray as xr import numpy as np import json from general_utils import is_integer, add_str_to_path, check_str_in_list, isw @@ -254,6 +255,24 @@ class MetaData: # note: the naming of the variables starts with var1, thus add 1 to the iterator self.variables = [list_of_dict_aux[ivar]["var" + str(ivar + 1)] for ivar in range(len(list_of_dict_aux))] + def get_coord_array(self): + """ + Returns data arrays of latitudes and longitudes + :return lats_da: data array of latitudes + :return lons_da: data array of longitudes + """ + method = MetaData.get_coord_array.__name__ + + if not hasattr(self, "lat") or not hasattr(self, "lon"): + raise AttributeError("%{0}: lat and lon are still not set.".format(method)) + + lats_da = xr.DataArray(self.lat, coords={"lat": self.lat}, dims="lat", + attrs={"units": "degrees_east"}) + lons_da = xr.DataArray(self.lon, coords={"lon": self.lon}, dims="lon", + attrs={"units": "degrees_north"}) + + return lats_da, lons_da + def write_dirs_to_batch_scripts(self, batch_script): """ Method for automatic extension of path variables in Batch scripts by the experiment directory which is saved diff --git a/video_prediction_tools/utils/runscript_generator/setup_runscript_templates.sh b/video_prediction_tools/utils/runscript_generator/setup_runscript_templates.sh index 1629277e050b0362e5fe76301d6c3456de8ba014..ba2bf2b06095532c72be6cbc42975c292d2230cf 100755 --- a/video_prediction_tools/utils/runscript_generator/setup_runscript_templates.sh +++ b/video_prediction_tools/utils/runscript_generator/setup_runscript_templates.sh @@ -16,6 +16,7 @@ # default value for base directory base_data_dir_default=/p/project/deepacf/deeprain/video_prediction_shared_folder/ +# base_data_dir_default=/p/scratch/deepacf/ji4/ # some further directory paths CURR_DIR_FULL="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" # retrieves the location of this script BASE_DIR="$(dirname "$(dirname "${CURR_DIR_FULL}")")"