Skip to content
Snippets Groups Projects
Commit 27860c59 authored by Michael Langguth's avatar Michael Langguth
Browse files

Commented version of Jupyter Notebook for conditional quantile plots.

parent f3300d42
No related branches found
No related tags found
No related merge requests found
Pipeline #68520 passed
%% Cell type:code id:under-cooler tags: %% Cell type:code id:downtown-archive tags:
``` python ``` python
import os, sys import os, sys
import glob import glob
import datetime as dt import datetime as dt
import numpy as np import numpy as np
import xarray as xr import xarray as xr
import time import time
``` ```
%% Cell type:code id:becoming-dover tags: %% Cell type:code id:perfect-julian tags:
``` python ``` python
# exemplary model to evaluate
forecast_path = "/p/home/jusers/langguth1/juwels/video_prediction_shared_folder/results/era5-Y2007-2019M01to12-80x48-3960N0180E-2t_tcc_t_850_langguth1/savp/20210505T131220_mache1_karim_savp_smreg_cv3_3" forecast_path = "/p/home/jusers/langguth1/juwels/video_prediction_shared_folder/results/era5-Y2007-2019M01to12-80x48-3960N0180E-2t_tcc_t_850_langguth1/savp/20210505T131220_mache1_karim_savp_smreg_cv3_3"
fnames= os.path.join(forecast_path, "vfp_date_*sample_ind_*.nc" ) fnames= os.path.join(forecast_path, "vfp_date_*sample_ind_*.nc" )
# get a list of all forecast files
fnames = glob.glob(fnames) fnames = glob.glob(fnames)
print(len(fnames))
# randomly open one file to take a look at its content
dfile = xr.open_dataset(fnames[99]) dfile = xr.open_dataset(fnames[99])
print(dfile.data_vars) print(dfile.data_vars)
#print(dfile["init_time"]) #print(dfile["init_time"])
#print(dfile["2t_savp_fcst"][2]) #print(dfile["2t_savp_fcst"][2])
``` ```
%% Output %% Output
8440 8440
Data variables: Data variables:
2t_in (in_hour, lat, lon) float32 ... 2t_in (in_hour, lat, lon) float32 ...
tcc_in (in_hour, lat, lon) float32 ... tcc_in (in_hour, lat, lon) float32 ...
t_850_in (in_hour, lat, lon) float32 ... t_850_in (in_hour, lat, lon) float32 ...
2t_ref (fcst_hour, lat, lon) float32 ... 2t_ref (fcst_hour, lat, lon) float32 ...
tcc_ref (fcst_hour, lat, lon) float32 ... tcc_ref (fcst_hour, lat, lon) float32 ...
t_850_ref (fcst_hour, lat, lon) float32 ... t_850_ref (fcst_hour, lat, lon) float32 ...
2t_savp_fcst (fcst_hour, lat, lon) float32 ... 2t_savp_fcst (fcst_hour, lat, lon) float32 ...
tcc_savp_fcst (fcst_hour, lat, lon) float32 ... tcc_savp_fcst (fcst_hour, lat, lon) float32 ...
t_850_savp_fcst (fcst_hour, lat, lon) float32 ... t_850_savp_fcst (fcst_hour, lat, lon) float32 ...
2t_persistence_fcst (fcst_hour, lat, lon) float64 ... 2t_persistence_fcst (fcst_hour, lat, lon) float64 ...
tcc_persistence_fcst (fcst_hour, lat, lon) float64 ... tcc_persistence_fcst (fcst_hour, lat, lon) float64 ...
t_850_persistence_fcst (fcst_hour, lat, lon) float64 ... t_850_persistence_fcst (fcst_hour, lat, lon) float64 ...
%% Cell type:code id:editorial-bunny tags: %% Cell type:code id:strong-ghana tags:
``` python ``` python
# some auxiliary functions to enhance data query with open_mfdataset
def non_interst_vars(ds): def non_interst_vars(ds):
""" """
Creates list of variables that are not of interest. For this, vars2proc must be defined at global scope Creates list of variables that are not of interest. For this, vars2proc must be defined at global scope
:param ds: the dataset :param ds: the dataset
:return: list of variables in dataset that are not of interest :return: list of variables in dataset that are not of interest
""" """
return [v for v in ds.data_vars return [v for v in ds.data_vars
if v not in vars2proc] if v not in vars2proc]
# #
# ==================================================================================================== # ====================================================================================================
def get_relevant_vars(ds): def get_relevant_vars(ds):
""" """
Drops variables that are not of interest from dataset and also shrinks data to cells of interest. Drops variables that are not of interest from dataset and also shrinks data to cells of interest.
For this, ncells must be a dimension of the dataset and dmask_ref_inds must be defined at gloabl scope For this, ncells must be a dimension of the dataset and dmask_ref_inds must be defined at gloabl scope
:param ds: the dataset :param ds: the dataset
:return: dataset with non-interesting variables dropped and data shrinked to region of interest :return: dataset with non-interesting variables dropped and data shrinked to region of interest
""" """
return ds.drop(non_interst_vars(ds)).isel(fcst_hour=11) return ds.drop(non_interst_vars(ds)).isel(fcst_hour=11)
``` ```
%% Cell type:code id:great-metro tags: %% Cell type:code id:simplified-cinema tags:
``` python ``` python
# choose variable of interest and load data into memory (i.e. the dataset is not a dask-array anymore!!!)
vars2proc = ["2t_savp_fcst", "2t_ref"] vars2proc = ["2t_savp_fcst", "2t_ref"]
time0 = time.time() time0 = time.time()
with xr.open_mfdataset(fnames, decode_cf=True, combine="nested", concat_dim=["init_time"], compat="broadcast_equals", preprocess=get_relevant_vars) as dfiles: with xr.open_mfdataset(fnames, decode_cf=True, combine="nested", concat_dim=["init_time"], compat="broadcast_equals", preprocess=get_relevant_vars) as dfiles:
data = dfiles.load() data = dfiles.load()
#times0 = dfiles["time_forecast"]
print("Registering and loading data took {0:.2f} seconds".format(time.time()- time0)) print("Registering and loading data took {0:.2f} seconds".format(time.time()- time0))
``` ```
%% Output %% Output
Registering and loading data took 1057.31 seconds Registering and loading data took 1057.31 seconds
%% Cell type:code id:hindu-wesley tags: %% Cell type:code id:appropriate-springer tags:
``` python ``` python
data_correct = xr.Dataset({"2t_savp_fcst": data["2t_savp_fcst"], "2t_ref": data["2t_ref"]}) # Take a look at the data
print(data_correct) print(data)
``` ```
%% Output %% Output
<xarray.Dataset> <xarray.Dataset>
Dimensions: (init_time: 8440, lat: 48, lon: 80) Dimensions: (init_time: 8440, lat: 48, lon: 80)
Coordinates: Coordinates:
* init_time (init_time) datetime64[ns] 2010-08-20T05:00:00 ... 2010-03-... * init_time (init_time) datetime64[ns] 2010-08-20T05:00:00 ... 2010-03-...
* lat (lat) float64 53.7 53.4 53.1 52.8 52.5 ... 40.5 40.2 39.9 39.6 * lat (lat) float64 53.7 53.4 53.1 52.8 52.5 ... 40.5 40.2 39.9 39.6
* lon (lon) float64 1.8 2.1 2.4 2.7 3.0 ... 24.3 24.6 24.9 25.2 25.5 * lon (lon) float64 1.8 2.1 2.4 2.7 3.0 ... 24.3 24.6 24.9 25.2 25.5
fcst_hour int64 12 fcst_hour int64 12
Data variables: Data variables:
2t_savp_fcst (init_time, lat, lon) float32 291.3 291.8 ... 288.5 288.2 2t_savp_fcst (init_time, lat, lon) float32 291.3 291.8 ... 288.5 288.2
2t_ref (init_time, lat, lon) float32 292.2 292.1 ... 288.5 288.6 2t_ref (init_time, lat, lon) float32 292.2 292.1 ... 288.5 288.6
%% Cell type:code id:sweet-happening tags: %% Cell type:code id:governmental-astrology tags:
``` python ``` python
data_fcst, data_ref = data_correct["2t_savp_fcst"], data_correct["2t_ref"] # get the vaiables of interest as data arrays
data_fcst, data_ref = data["2t_savp_fcst"], data["2t_ref"]
# create the bins for which quantiles are plotted based on forecasts (=conditioning variable)
fcst_min, fcst_max = np.floor(np.min(data_fcst)), np.ceil(np.max(data_fcst)) fcst_min, fcst_max = np.floor(np.min(data_fcst)), np.ceil(np.max(data_fcst))
x_bins = list(np.arange(int(fcst_min), int(fcst_max) + 1)) x_bins = list(np.arange(int(fcst_min), int(fcst_max) + 1))
# center point of bins
x_bins_c = 0.5*(np.asarray(x_bins[0:-1]) + np.asarray(x_bins[1:])) x_bins_c = 0.5*(np.asarray(x_bins[0:-1]) + np.asarray(x_bins[1:]))
nbins = len(x_bins) - 1 nbins = len(x_bins) - 1
print(x_bins)
``` ```
%% Output %% Output
[242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313] [242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313]
%% Cell type:code id:incorporate-flooring tags: %% Cell type:code id:wound-maria tags:
``` python ``` python
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
# set the quantiles and initialize data array
quantiles = [0.05, 0.5, 0.95] quantiles = [0.05, 0.5, 0.95]
nquantiles = len(quantiles) nquantiles = len(quantiles)
quantile_panel = xr.DataArray(np.full((nbins, nquantiles), np.nan), coords={"bin_center": x_bins_c, "quantile": quantiles}, quantile_panel = xr.DataArray(np.full((nbins, nquantiles), np.nan), coords={"bin_center": x_bins_c, "quantile": quantiles},
dims=["bin_center", "quantile"]) dims=["bin_center", "quantile"])
# populate the quantile data array
for i in np.arange(nbins): for i in np.arange(nbins):
# conditioning of ground truth based on forecast
data_cropped = data_correct["2t_ref"].where(np.logical_and(data_correct["2t_savp_fcst"] >= x_bins[i], data_cropped = data_correct["2t_ref"].where(np.logical_and(data_correct["2t_savp_fcst"] >= x_bins[i],
data_correct["2t_savp_fcst"] < x_bins[i+1])) data_correct["2t_savp_fcst"] < x_bins[i+1]))
# quantile-calculation
quantile_panel.loc[dict(bin_center=x_bins_c[i])] = data_cropped.quantile([0.05, 0.5, 0.95]) quantile_panel.loc[dict(bin_center=x_bins_c[i])] = data_cropped.quantile([0.05, 0.5, 0.95])
# transform
x_bins_c = x_bins_c - 273.15 x_bins_c = x_bins_c - 273.15
quantile_panel = quantile_panel - 273.15 quantile_panel = quantile_panel - 273.15
``` ```
%% Cell type:code id:brilliant-aberdeen tags: %% Cell type:code id:lightweight-settlement tags:
``` python ``` python
fig, ax = plt.subplots(figsize=(9,6)) # create plot of conditional forecast
fig, ax = plt.subplots(figsize=(12,6))
ls_all = ["--", "-", "--"] ls_all = ["--", "-", "--"]
lw_all = [2., 1.5, 2.] lw_all = [2., 1.5, 2.]
ax.plot(x_bins_c, x_bins_c, color='k', label='reference 1:1', linewidth=1.) ax.plot(x_bins_c, x_bins_c, color='k', label='reference 1:1', linewidth=1.)
for i in np.arange(3): for i in np.arange(3):
ax.plot(x_bins_c, quantile_panel.isel(quantile=i), ls=ls_all[i], color="k", lw=lw_all[i]) ax.plot(x_bins_c, quantile_panel.isel(quantile=i), ls=ls_all[i], color="k", lw=lw_all[i])
ax.set_ylabel("2m temperature from ERA5 [°C]", fontsize=16) ax.set_ylabel("2m temperature from ERA5 [°C]", fontsize=16)
ax.set_xlabel("Predicted 2m temperature from SAVP [°C]", fontsize=16) ax.set_xlabel("Predicted 2m temperature from SAVP [°C]", fontsize=16)
ax.tick_params(axis="both", labelsize=14) ax.tick_params(axis="both", labelsize=14)
fig.savefig("./first_cond_quantile.png") fig.savefig("./first_cond_quantile.png")
``` ```
%% Output %% Output
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-1-94561a8add4c> in <module>
1 # create plot of conditional forecast
----> 2 fig, ax = plt.subplots(figsize=(12,6))
3
4 ls_all = ["--", "-", "--"]
5 lw_all = [2., 1.5, 2.]
NameError: name 'plt' is not defined
%% Cell type:code id:relevant-freight tags: %% Cell type:code id:selective-dubai tags:
``` python ``` python
#data_grouped = data_correct.groupby_bins("2t_savp_fcst", x_bins)#.groups #data_grouped = data_correct.groupby_bins("2t_savp_fcst", x_bins)#.groups
``` ```
%% Cell type:code id:ordered-cambridge tags: %% Cell type:code id:pending-formula tags:
``` python ``` python
inds_of_bins = np.digitize(data_fcst, x_bins, right=True) inds_of_bins = np.digitize(data_fcst, x_bins, right=True)
print(inds_of_bins) print(inds_of_bins)
``` ```
%% Output %% Output
[[[22 22 22 ... 18 18 18] [[[22 22 22 ... 18 18 18]
[22 22 22 ... 18 18 18] [22 22 22 ... 18 18 18]
[22 22 22 ... 19 19 19] [22 22 22 ... 19 19 19]
... ...
[29 29 29 ... 30 30 30] [29 29 29 ... 30 30 30]
[29 30 29 ... 31 31 31] [29 30 29 ... 31 31 31]
[29 30 29 ... 31 31 31]] [29 30 29 ... 31 31 31]]
[[21 21 21 ... 20 20 20] [[21 21 21 ... 20 20 20]
[20 21 21 ... 20 20 20] [20 21 21 ... 20 20 20]
[20 21 21 ... 20 20 20] [20 21 21 ... 20 20 20]
... ...
[30 30 30 ... 31 31 31] [30 30 30 ... 31 31 31]
[30 30 30 ... 31 31 31] [30 30 30 ... 31 31 31]
[30 30 30 ... 31 31 31]] [30 30 30 ... 31 31 31]]
[[21 21 21 ... 21 21 21] [[21 21 21 ... 21 21 21]
[21 21 21 ... 21 21 21] [21 21 21 ... 21 21 21]
[21 21 21 ... 21 21 21] [21 21 21 ... 21 21 21]
... ...
[28 28 28 ... 31 31 31] [28 28 28 ... 31 31 31]
[28 28 28 ... 32 32 31] [28 28 28 ... 32 32 31]
[28 29 29 ... 32 32 32]] [28 29 29 ... 32 32 32]]
... ...
[[22 22 22 ... 20 20 20] [[22 22 22 ... 20 20 20]
[22 22 22 ... 20 20 20] [22 22 22 ... 20 20 20]
[22 21 21 ... 20 20 20] [22 21 21 ... 20 20 20]
... ...
[29 29 29 ... 31 31 31] [29 29 29 ... 31 31 31]
[29 29 29 ... 32 32 32] [29 29 29 ... 32 32 32]
[30 30 29 ... 32 32 32]] [30 30 29 ... 32 32 32]]
[[21 21 21 ... 20 20 20] [[21 21 21 ... 20 20 20]
[20 21 21 ... 20 20 20] [20 21 21 ... 20 20 20]
[20 20 21 ... 20 20 20] [20 20 21 ... 20 20 20]
... ...
[30 30 30 ... 31 31 31] [30 30 30 ... 31 31 31]
[30 30 29 ... 31 31 31] [30 30 29 ... 31 31 31]
[30 30 30 ... 31 31 31]] [30 30 30 ... 31 31 31]]
[[22 22 22 ... 24 24 24] [[22 22 22 ... 24 24 24]
[22 22 22 ... 24 23 24] [22 22 22 ... 24 23 24]
[22 22 22 ... 24 24 24] [22 22 22 ... 24 24 24]
... ...
[27 27 27 ... 31 31 31] [27 27 27 ... 31 31 31]
[28 28 28 ... 32 32 32] [28 28 28 ... 32 32 32]
[28 28 28 ... 32 32 32]]] [28 28 28 ... 32 32 32]]]
%% Cell type:code id:furnished-customer tags: %% Cell type:code id:congressional-closure tags:
``` python ``` python
def calc_quantile(x, dim ="init_time"): def calc_quantile(x, dim ="init_time"):
return x.quantile(0.99) return x.quantile(0.99)
cond_quantile1 = data_grouped.map(calc_quantile) cond_quantile1 = data_grouped.map(calc_quantile)
#cond_quantile2 = data_grouped.map(calc_quantile) #cond_quantile2 = data_grouped.map(calc_quantile)
print(cond_quantile1["quantile"]) print(cond_quantile1["quantile"])
print(cond_quantile1["2t_savp_fcst"]) print(cond_quantile1["2t_savp_fcst"])
#print(cond_quantile2["2t_ref"]) #print(cond_quantile2["2t_ref"])
#print(cond_quantile2["2t_savp_fcst"]) #print(cond_quantile2["2t_savp_fcst"])
``` ```
%% Output %% Output
<xarray.DataArray '2t_ref' (2t_savp_fcst_bins: 37)> <xarray.DataArray '2t_ref' (2t_savp_fcst_bins: 37)>
array([259.60351562, 264.13945557, 264.74759033, 265.45030518, array([259.60351562, 264.13945557, 264.74759033, 265.45030518,
266.47970703, 267.3628302 , 268.44342804, 269.80157959, 266.47970703, 267.3628302 , 268.44342804, 269.80157959,
270.4291217 , 271.22656982, 272.41841827, 274.18320801, 270.4291217 , 271.22656982, 272.41841827, 274.18320801,
274.74815369, 275.68839111, 276.3840918 , 277.0491394 , 274.74815369, 275.68839111, 276.3840918 , 277.0491394 ,
277.99171387, 279.1111615 , 280.24440918, 281.56947693, 277.99171387, 279.1111615 , 280.24440918, 281.56947693,
282.817146 , 284.15313873, 285.25139038, 286.46736084, 282.817146 , 284.15313873, 285.25139038, 286.46736084,
287.11281006, 287.56309875, 288.39205322, 289.28383789, 287.11281006, 287.56309875, 288.39205322, 289.28383789,
290.12092529, 291.00213623, 291.93958588, 292.7901001 , 290.12092529, 291.00213623, 291.93958588, 292.7901001 ,
294.50114746, 295.28106201, 295.7451416 , 296.17975464, 294.50114746, 295.28106201, 295.7451416 , 296.17975464,
295.94475342]) 295.94475342])
Coordinates: Coordinates:
* 2t_savp_fcst_bins (2t_savp_fcst_bins) object (260, 261] ... (296, 297] * 2t_savp_fcst_bins (2t_savp_fcst_bins) object (260, 261] ... (296, 297]
quantile float64 0.99 quantile float64 0.99
<xarray.DataArray '2t_savp_fcst' (2t_savp_fcst_bins: 37)> <xarray.DataArray '2t_savp_fcst' (2t_savp_fcst_bins: 37)>
array([260.51538086, 261.99571045, 262.96671509, 263.99466095, array([260.51538086, 261.99571045, 262.96671509, 263.99466095,
264.98212372, 265.99100769, 266.99321747, 267.99145386, 264.98212372, 265.99100769, 266.99321747, 267.99145386,
268.99003754, 269.9897348 , 270.99363922, 271.99260651, 268.99003754, 269.9897348 , 270.99363922, 271.99260651,
272.98925781, 273.99296265, 274.99265747, 275.9934906 , 272.98925781, 273.99296265, 274.99265747, 275.9934906 ,
276.99263123, 277.99108887, 278.98980103, 279.99055573, 276.99263123, 277.99108887, 278.98980103, 279.99055573,
280.98829712, 281.99064941, 282.98851074, 283.98887085, 280.98829712, 281.99064941, 282.98851074, 283.98887085,
284.99008545, 285.99044281, 286.99019897, 287.9892334 , 284.99008545, 285.99044281, 286.99019897, 287.9892334 ,
288.98918335, 289.98908264, 290.98603363, 291.96720215, 288.98918335, 289.98908264, 290.98603363, 291.96720215,
292.98072205, 293.98917358, 294.99038391, 295.9605835 , 292.98072205, 293.98917358, 294.99038391, 295.9605835 ,
296.59046722]) 296.59046722])
Coordinates: Coordinates:
* 2t_savp_fcst_bins (2t_savp_fcst_bins) object (260, 261] ... (296, 297] * 2t_savp_fcst_bins (2t_savp_fcst_bins) object (260, 261] ... (296, 297]
quantile float64 0.99 quantile float64 0.99
%% Cell type:code id:electrical-evening tags: %% Cell type:code id:mathematical-alfred tags:
``` python ``` python
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment