Correction to filename in calc_climatolgical_mean.ipynb.

a3cf40de · Michael Langguth · 78f4953c · a3cf40de
Commit a3cf40de authored 4 years ago by Michael Langguth
--- a/Jupyter_Notebooks/calc_climatolgical_mean.ipynb
+++ b/Jupyter_Notebooks/calc_climatolgical_mean.ipynb
@@ -3,7 +3,7 @@
  {
   "cell_type": "code",
   "execution_count": 15,
-   "id": "governing-strengthening",
+   "id": "annoying-jamaica",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -16,7 +16,7 @@
  {
   "cell_type": "code",
   "execution_count": 16,
-   "id": "colonial-chocolate",
+   "id": "legislative-portugal",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -32,7 +32,7 @@
  {
   "cell_type": "code",
   "execution_count": 17,
-   "id": "naval-behalf",
+   "id": "through-cornell",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -44,7 +44,7 @@
  {
   "cell_type": "code",
   "execution_count": 18,
-   "id": "photographic-avenue",
+   "id": "steady-implement",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -56,7 +56,7 @@
  {
   "cell_type": "code",
   "execution_count": 19,
-   "id": "vocal-cholesterol",
+   "id": "universal-neutral",
   "metadata": {},
   "outputs": [
    {
@@ -207,7 +207,7 @@
  {
   "cell_type": "code",
   "execution_count": 20,
-   "id": "intelligent-florida",
+   "id": "signed-edmonton",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -216,7 +216,7 @@
  },
  {
   "cell_type": "markdown",
-   "id": "divided-feelings",
+   "id": "sustainable-significance",
   "metadata": {},
   "source": [
    "This works, but it takes about 3 minutes to process 30 years of data. <br>\n",
@@ -231,12 +231,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 40,
-   "id": "authorized-ethics",
+   "execution_count": 42,
+   "id": "rental-suite",
   "metadata": {},
   "outputs": [],
   "source": [
-    "datafile_cdo = os.path.join(datadir, \"mutilyears_1970-1999.nc\")\n",
+    "datafile_cdo = os.path.join(datadir, \"climatology_t2m_1970-1999.nc\")\n",
    "\n",
    "with xr.open_dataset(datafile_cdo) as dfile:\n",
    "    t2m_hourly_cdo = dfile[\"T2M\"]\n",
@@ -245,8 +245,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 41,
-   "id": "greater-resistance",
+   "execution_count": 43,
+   "id": "better-adventure",
   "metadata": {},
   "outputs": [
    {
@@ -274,7 +274,7 @@
  },
  {
   "cell_type": "markdown",
-   "id": "western-thriller",
+   "id": "weird-sociology",
   "metadata": {},
   "source": [
    "Thus, the maximum difference is in the $\\mathcal{O} (10^{-3})$ which can be neglected for our application."
@@ -283,7 +283,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "hydraulic-appearance",
+   "id": "running-monday",
   "metadata": {},
   "outputs": [],
   "source": []

-%% Cell type:code id:governing-strengthening tags:
+%% Cell type:code id:annoying-jamaica tags:

 ``` python
 import os, sys, time
 import xarray as xr
 import pandas as pd
 import datetime as dt
 ```

-%% Cell type:code id:colonial-chocolate tags:
+%% Cell type:code id:legislative-portugal tags:

 ``` python
 datadir = "/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly"

 datafile = "1970-1999_t2m.nc"

 datafile= os.path.join(datadir, datafile)

 datafile="/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly/t2m_1970_1999.nc"
 ```

-%% Cell type:code id:naval-behalf tags:
+%% Cell type:code id:through-cornell tags:

 ``` python
 with xr.open_dataset(datafile) as dfile:
    t2m_all = dfile["var167"]
    coords = t2m_all.coords
 ```

-%% Cell type:code id:photographic-avenue tags:
+%% Cell type:code id:steady-implement tags:

 ``` python
 ntimes = len(coords["time"])

 t2m_all = t2m_all.chunk({"time": ntimes, "lat":100, "lon":100})
 ```

-%% Cell type:code id:vocal-cholesterol tags:
+%% Cell type:code id:universal-neutral tags:

 ``` python
 # define a function with the hourly calculation:
 def hour_mean(x):
     return x.groupby('time.hour').mean('time')

 time0 = time.time()
 t2m_hourly = t2m_all.groupby("time.month").apply(hour_mean)

 print("Registering averaging took {0:.2f}".format(time.time()-time0))

 #print(t2m_hourly.values)

 print("Performing averaging took {0:.2f}".format(time.time()-time0))

 ```

 %% Output

    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]
    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]
    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]
    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]
    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]
    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]
    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]

    Registering averaging took 1.08
    Performing averaging took 1.08

    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]
    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]
    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]
    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]
    /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
    chunk and silence this warning, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
        ...     array[indexer]
    
    To avoid creating the large chunks, set the option
        >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        ...     array[indexer]
      return self.array[key]

-%% Cell type:code id:intelligent-florida tags:
+%% Cell type:code id:signed-edmonton tags:

 ``` python
 t2m_hourly = t2m_hourly.compute()
 ```

-%% Cell type:markdown id:divided-feelings tags:
+%% Cell type:markdown id:sustainable-significance tags:

 This works, but it takes about 3 minutes to process 30 years of data. <br>
 However, the same operation is possible with CDO and only takes 36s to finish on Juwels. <br>
 The two following shell commands (after loading CDO 1.9.8 and ecCodes 2.18.0) are:
 ```
 clim_files=($(for year in {1991..2020}; do echo "${year}_t2m.grb"; done))
 cdo -t ecmwf -f nc ensavg ${clim_files[@]} mutilyears_1991-2020.nc
 ```
 In the following, we check the correctness of the data by computing the difference btween the data from a CDO-generated file against the data produced above. We choose the mean temperature in January at 12 UTC as an example.

-%% Cell type:code id:authorized-ethics tags:
+%% Cell type:code id:rental-suite tags:

 ``` python
-datafile_cdo = os.path.join(datadir, "mutilyears_1970-1999.nc")
+datafile_cdo = os.path.join(datadir, "climatology_t2m_1970-1999.nc")

 with xr.open_dataset(datafile_cdo) as dfile:
    t2m_hourly_cdo = dfile["T2M"]

 ```

-%% Cell type:code id:greater-resistance tags:
+%% Cell type:code id:better-adventure tags:

 ``` python
 import numpy as np
 test1 = t2m_hourly.sel(month=1, hour=12)
 test2 = t2m_hourly_cdo.sel(time="1979-01-01 12:00")

 diff = np.abs(test1-test2)

 print(np.max(diff))
 ```

 %% Output

    <xarray.DataArray ()>
    array(0.00097656, dtype=float32)
    Coordinates:
        hour     int64 12
        month    int64 1
        time     datetime64[ns] 1979-01-01T12:00:00

-%% Cell type:markdown id:western-thriller tags:
+%% Cell type:markdown id:weird-sociology tags:

 Thus, the maximum difference is in the $\mathcal{O} (10^{-3})$ which can be neglected for our application.

-%% Cell type:code id:hydraulic-appearance tags:
+%% Cell type:code id:running-monday tags:

 ``` python
 ```