Skip to content
Snippets Groups Projects
Commit a3cf40de authored by Michael Langguth's avatar Michael Langguth
Browse files

Correction to filename in calc_climatolgical_mean.ipynb.

parent 78f4953c
Branches
Tags
No related merge requests found
Pipeline #68607 passed
%% Cell type:code id:governing-strengthening tags: %% Cell type:code id:annoying-jamaica tags:
``` python ``` python
import os, sys, time import os, sys, time
import xarray as xr import xarray as xr
import pandas as pd import pandas as pd
import datetime as dt import datetime as dt
``` ```
%% Cell type:code id:colonial-chocolate tags: %% Cell type:code id:legislative-portugal tags:
``` python ``` python
datadir = "/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly" datadir = "/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly"
datafile = "1970-1999_t2m.nc" datafile = "1970-1999_t2m.nc"
datafile= os.path.join(datadir, datafile) datafile= os.path.join(datadir, datafile)
datafile="/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly/t2m_1970_1999.nc" datafile="/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly/t2m_1970_1999.nc"
``` ```
%% Cell type:code id:naval-behalf tags: %% Cell type:code id:through-cornell tags:
``` python ``` python
with xr.open_dataset(datafile) as dfile: with xr.open_dataset(datafile) as dfile:
t2m_all = dfile["var167"] t2m_all = dfile["var167"]
coords = t2m_all.coords coords = t2m_all.coords
``` ```
%% Cell type:code id:photographic-avenue tags: %% Cell type:code id:steady-implement tags:
``` python ``` python
ntimes = len(coords["time"]) ntimes = len(coords["time"])
t2m_all = t2m_all.chunk({"time": ntimes, "lat":100, "lon":100}) t2m_all = t2m_all.chunk({"time": ntimes, "lat":100, "lon":100})
``` ```
%% Cell type:code id:vocal-cholesterol tags: %% Cell type:code id:universal-neutral tags:
``` python ``` python
# define a function with the hourly calculation: # define a function with the hourly calculation:
def hour_mean(x): def hour_mean(x):
return x.groupby('time.hour').mean('time') return x.groupby('time.hour').mean('time')
time0 = time.time() time0 = time.time()
t2m_hourly = t2m_all.groupby("time.month").apply(hour_mean) t2m_hourly = t2m_all.groupby("time.month").apply(hour_mean)
print("Registering averaging took {0:.2f}".format(time.time()-time0)) print("Registering averaging took {0:.2f}".format(time.time()-time0))
#print(t2m_hourly.values) #print(t2m_hourly.values)
print("Performing averaging took {0:.2f}".format(time.time()-time0)) print("Performing averaging took {0:.2f}".format(time.time()-time0))
``` ```
%% Output %% Output
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
Registering averaging took 1.08 Registering averaging took 1.08
Performing averaging took 1.08 Performing averaging took 1.08
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large /p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer] ... array[indexer]
To avoid creating the large chunks, set the option To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}): >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer] ... array[indexer]
return self.array[key] return self.array[key]
%% Cell type:code id:intelligent-florida tags: %% Cell type:code id:signed-edmonton tags:
``` python ``` python
t2m_hourly = t2m_hourly.compute() t2m_hourly = t2m_hourly.compute()
``` ```
%% Cell type:markdown id:divided-feelings tags: %% Cell type:markdown id:sustainable-significance tags:
This works, but it takes about 3 minutes to process 30 years of data. <br> This works, but it takes about 3 minutes to process 30 years of data. <br>
However, the same operation is possible with CDO and only takes 36s to finish on Juwels. <br> However, the same operation is possible with CDO and only takes 36s to finish on Juwels. <br>
The two following shell commands (after loading CDO 1.9.8 and ecCodes 2.18.0) are: The two following shell commands (after loading CDO 1.9.8 and ecCodes 2.18.0) are:
``` ```
clim_files=($(for year in {1991..2020}; do echo "${year}_t2m.grb"; done)) clim_files=($(for year in {1991..2020}; do echo "${year}_t2m.grb"; done))
cdo -t ecmwf -f nc ensavg ${clim_files[@]} mutilyears_1991-2020.nc cdo -t ecmwf -f nc ensavg ${clim_files[@]} mutilyears_1991-2020.nc
``` ```
In the following, we check the correctness of the data by computing the difference btween the data from a CDO-generated file against the data produced above. We choose the mean temperature in January at 12 UTC as an example. In the following, we check the correctness of the data by computing the difference btween the data from a CDO-generated file against the data produced above. We choose the mean temperature in January at 12 UTC as an example.
%% Cell type:code id:authorized-ethics tags: %% Cell type:code id:rental-suite tags:
``` python ``` python
datafile_cdo = os.path.join(datadir, "mutilyears_1970-1999.nc") datafile_cdo = os.path.join(datadir, "climatology_t2m_1970-1999.nc")
with xr.open_dataset(datafile_cdo) as dfile: with xr.open_dataset(datafile_cdo) as dfile:
t2m_hourly_cdo = dfile["T2M"] t2m_hourly_cdo = dfile["T2M"]
``` ```
%% Cell type:code id:greater-resistance tags: %% Cell type:code id:better-adventure tags:
``` python ``` python
import numpy as np import numpy as np
test1 = t2m_hourly.sel(month=1, hour=12) test1 = t2m_hourly.sel(month=1, hour=12)
test2 = t2m_hourly_cdo.sel(time="1979-01-01 12:00") test2 = t2m_hourly_cdo.sel(time="1979-01-01 12:00")
diff = np.abs(test1-test2) diff = np.abs(test1-test2)
print(np.max(diff)) print(np.max(diff))
``` ```
%% Output %% Output
<xarray.DataArray ()> <xarray.DataArray ()>
array(0.00097656, dtype=float32) array(0.00097656, dtype=float32)
Coordinates: Coordinates:
hour int64 12 hour int64 12
month int64 1 month int64 1
time datetime64[ns] 1979-01-01T12:00:00 time datetime64[ns] 1979-01-01T12:00:00
%% Cell type:markdown id:western-thriller tags: %% Cell type:markdown id:weird-sociology tags:
Thus, the maximum difference is in the $\mathcal{O} (10^{-3})$ which can be neglected for our application. Thus, the maximum difference is in the $\mathcal{O} (10^{-3})$ which can be neglected for our application.
%% Cell type:code id:hydraulic-appearance tags: %% Cell type:code id:running-monday tags:
``` python ``` python
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment