Skip to content
Snippets Groups Projects
Commit 2e28ae87 authored by Michael Langguth's avatar Michael Langguth
Browse files

Merge branch 'develop' into

michael_issue#109_conditional_quantile_plots. When solvingthe
merge-issues of main_visualize_postprocess.py, some typing-indictaors
have been added to the init-function of the class.
parents 056e1d34 2be05d6e
No related branches found
No related tags found
No related merge requests found
Pipeline #69696 passed
%% Cell type:code id:annoying-jamaica tags:
``` python
import os, sys, time
import xarray as xr
import pandas as pd
import datetime as dt
```
%% Cell type:code id:legislative-portugal tags:
``` python
datadir = "/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly"
datafile = "1970-1999_t2m.nc"
datafile= os.path.join(datadir, datafile)
datafile="/p/scratch/deepacf/video_prediction_shared_folder/preprocessedData/T2monthly/t2m_1970_1999.nc"
```
%% Cell type:code id:through-cornell tags:
``` python
with xr.open_dataset(datafile) as dfile:
t2m_all = dfile["var167"]
coords = t2m_all.coords
```
%% Cell type:code id:steady-implement tags:
``` python
ntimes = len(coords["time"])
t2m_all = t2m_all.chunk({"time": ntimes, "lat":100, "lon":100})
```
%% Cell type:code id:universal-neutral tags:
``` python
# define a function with the hourly calculation:
def hour_mean(x):
return x.groupby('time.hour').mean('time')
time0 = time.time()
t2m_hourly = t2m_all.groupby("time.month").apply(hour_mean)
print("Registering averaging took {0:.2f}".format(time.time()-time0))
#print(t2m_hourly.values)
print("Performing averaging took {0:.2f}".format(time.time()-time0))
```
%% Output
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
Registering averaging took 1.08
Performing averaging took 1.08
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
/p/software/hdfml/stages/2020/software/Jupyter/2020.2.6-gcccoremkl-9.3.0-2020.2.254-Python-3.8.5/lib/python3.8/site-packages/xarray/core/indexing.py:1369: PerformanceWarning: Slicing is producing a large chunk. To accept the large
chunk and silence this warning, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
... array[indexer]
To avoid creating the large chunks, set the option
>>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
... array[indexer]
return self.array[key]
%% Cell type:code id:signed-edmonton tags:
``` python
t2m_hourly = t2m_hourly.compute()
```
%% Cell type:markdown id:sustainable-significance tags:
This works, but it takes about 3 minutes to process 30 years of data. <br>
However, the same operation is possible with CDO and only takes 36s to finish on Juwels. <br>
The two following shell commands (after loading CDO 1.9.8 and ecCodes 2.18.0) are:
```
clim_files=($(for year in {1991..2020}; do echo "${year}_t2m.grb"; done))
cdo -t ecmwf -f nc ensavg ${clim_files[@]} mutilyears_1991-2020.nc
```
In the following, we check the correctness of the data by computing the difference btween the data from a CDO-generated file against the data produced above. We choose the mean temperature in January at 12 UTC as an example.
%% Cell type:code id:rental-suite tags:
``` python
datafile_cdo = os.path.join(datadir, "climatology_t2m_1970-1999.nc")
with xr.open_dataset(datafile_cdo) as dfile:
t2m_hourly_cdo = dfile["T2M"]
```
%% Cell type:code id:better-adventure tags:
``` python
import numpy as np
test1 = t2m_hourly.sel(month=1, hour=12)
test2 = t2m_hourly_cdo.sel(time="1979-01-01 12:00")
diff = np.abs(test1-test2)
print(np.max(diff))
```
%% Output
<xarray.DataArray ()>
array(0.00097656, dtype=float32)
Coordinates:
hour int64 12
month int64 1
time datetime64[ns] 1979-01-01T12:00:00
%% Cell type:markdown id:weird-sociology tags:
Thus, the maximum difference is in the $\mathcal{O} (10^{-3})$ which can be neglected for our application.
%% Cell type:code id:running-monday tags:
``` python
```
__email__ = "b.gong@fz-juelich.de"
__author__ = "Yan Ji, Bing Gong"
__date__ = "2021-05-26"
"""
Use the monthly average vaules to calculate the climitological values from the datas sources that donwload from ECMWF : /p/fastdata/slmet/slmet111/met_data/ecmwf/era5/grib/monthly
"""
import os
import time
import glob
import json
class Calc_climatology(object):
def __init__(self, input_dir="/p/fastdata/slmet/slmet111/met_data/ecmwf/era5/grib/monthly", output_clim=None, region:list=[10,20], json_path=None):
self.input_dir = input_dir
self.output_clim = output_clim
self.region = region
self.lats = None
self.lons = None
self.json_path = json_path
@staticmethod
def calc_avg_per_year(grib_fl: str=None):
"""
:param grib_fl: the relative path of the monthly average values
:return: None
"""
return None
def cal_avg_all_files(self):
"""
Average by time for all the grib files
:return: None
"""
method = Calc_climatology.cal_avg_all_files.__name__
multiyears_path = os.path.join(self.output_clim,"multiyears.grb")
climatology_path = os.path.join(self.output_clim,"climatology.grb")
grib_files = glob.glob(os.path.join(self.input_dir,"*t2m.grb"))
if grib_files:
print ("{0}: There are {1} monthly years grib files".format(method,len(grib_files)))
# merge all the files into one grib file
os.system("cdo mergetime {0} {1}".format(grib_files,multiyears_path))
# average by time
os.system("cdo timavg {0} {1}".format(multiyears_path,climatology_path))
else:
FileExistsError("%{0}: The monthly years grib files do not exit in the input directory %{1}".format(method,self.input_dir))
return None
def get_lat_lon_from_json(self):
"""
Get lons and lats from json file
:return: list of lats, and lons
"""
method = Calc_climatology.get_lat_lon_from_json.__name__
if not os.path.exists(self.json_path):
raise FileExistsError("{0}: The json file {1} does not exist".format(method,self.json_path))
with open(self.json_path) as fl:
meta_data = json.load(fl)
if "coordinates" not in list(meta_data.keys()):
raise KeyError("{0}: 'coordinates' is not one of the keys for metadata,json".format(method))
else:
meta_coords = meta_data["coordinates"]
self.lats = meta_coords["lat"]
self.lons = meta_coords["lon"]
return self.lats, self.lons
def get_region_climate(self):
"""
Get the climitollgical values from the selected regions
:return: None
"""
pass
def __call__(self, *args, **kwargs):
if os.path.exists(os.path.join(self.output_clim,"climatology.grb")):
pass
else:
self.cal_avg_all_files()
self.get_lat_lon_from_json()
self.get_region_climate()
if __name__ == '__main__':
exp = Calc_climatology(json_path="/p/project/deepacf/deeprain/video_prediction_shared_folder/preprocessedData/era5-Y2007-2019M01to12-92x56-3840N0000E-2t_tcc_t_850/metadata.json")
exp()
...@@ -18,9 +18,8 @@ import datetime as dt ...@@ -18,9 +18,8 @@ import datetime as dt
import json import json
from typing import Union, List from typing import Union, List
# own modules # own modules
from general_utils import get_era5_varatts
from normalization import Norm_data from normalization import Norm_data
from general_utils import check_dir from general_utils import get_era5_varatts, check_dir
from metadata import MetaData as MetaData from metadata import MetaData as MetaData
from main_scripts.main_train_models import * from main_scripts.main_train_models import *
from data_preprocess.preprocess_data_step2 import * from data_preprocess.preprocess_data_step2 import *
...@@ -30,9 +29,10 @@ from postprocess_plotting import plot_avg_eval_metrics, plot_cond_quantile, crea ...@@ -30,9 +29,10 @@ from postprocess_plotting import plot_avg_eval_metrics, plot_cond_quantile, crea
class Postprocess(TrainModel): class Postprocess(TrainModel):
def __init__(self, results_dir=None, checkpoint=None, mode="test", batch_size=None, num_stochastic_samples=1, def __init__(self, results_dir: str = None, checkpoint: str= None, mode: str = "test", batch_size: int = None,
stochastic_plot_id=0, gpu_mem_frac=None, seed=None, channel=0, args=None, run_mode="deterministic", num_stochastic_samples: int = 1, stochastic_plot_id: int = 0, gpu_mem_frac: float = None,
eval_metrics=None): seed: int = None, channel: int = 0, args=None, run_mode: str = "deterministic",
eval_metrics: List = ("mse", "psnr", "ssim")):
""" """
Initialization of the class instance for postprocessing (generation of forecasts from trained model + Initialization of the class instance for postprocessing (generation of forecasts from trained model +
basic evauation). basic evauation).
...@@ -443,7 +443,7 @@ class Postprocess(TrainModel): ...@@ -443,7 +443,7 @@ class Postprocess(TrainModel):
self.stochastic_loss_all_batches = np.mean(np.array(self.stochastic_loss_all_batches), axis=0) self.stochastic_loss_all_batches = np.mean(np.array(self.stochastic_loss_all_batches), axis=0)
assert len(np.array(self.persistent_loss_all_batches).shape) == 1 assert len(np.array(self.persistent_loss_all_batches).shape) == 1
assert np.array(self.persistent_loss_all_batches).shape[0] == self.future_length assert np.array(self.persistent_loss_all_batches).shape[0] == self.future_length
print("Bug here:", np.array(self.stochastic_loss_all_batches).shape)
assert len(np.array(self.stochastic_loss_all_batches).shape) == 2 assert len(np.array(self.stochastic_loss_all_batches).shape) == 2
assert np.array(self.stochastic_loss_all_batches).shape[0] == self.num_stochastic_samples assert np.array(self.stochastic_loss_all_batches).shape[0] == self.num_stochastic_samples
...@@ -597,7 +597,7 @@ class Postprocess(TrainModel): ...@@ -597,7 +597,7 @@ class Postprocess(TrainModel):
# dictionary of implemented evaluation metrics # dictionary of implemented evaluation metrics
dims = ["lat", "lon"] dims = ["lat", "lon"]
known_eval_metrics = {"mse": Scores("mse", dims), "psnr": Scores("psnr", dims)} known_eval_metrics = {"mse": Scores("mse", dims), "psnr": Scores("psnr", dims),"ssim": Scores("ssim",dims)}
# generate list of functions that calculate requested evaluation metrics # generate list of functions that calculate requested evaluation metrics
if set(self.eval_metrics).issubset(known_eval_metrics.keys()): if set(self.eval_metrics).issubset(known_eval_metrics.keys()):
......
import tensorflow as tf import tensorflow as tf
#import lpips_tf #import lpips_tf
import numpy as np
import math import math
from skimage.measure import compare_ssim as ssim_ski
def mse(a, b): def mse(a, b):
return tf.reduce_mean(tf.squared_difference(a, b), [-3, -2, -1]) return tf.reduce_mean(tf.squared_difference(a, b), [-3, -2, -1])
...@@ -23,6 +24,7 @@ def psnr_imgs(img1, img2, pixel_max=1.): ...@@ -23,6 +24,7 @@ def psnr_imgs(img1, img2, pixel_max=1.):
def mse_imgs(image1,image2): def mse_imgs(image1,image2):
mse = ((image1 - image2)**2).mean(axis=None) mse = ((image1 - image2)**2).mean(axis=None)
return mse return mse
# def lpips(input0, input1): # def lpips(input0, input1):
# if input0.shape[-1].value == 1: # if input0.shape[-1].value == 1:
# input0 = tf.tile(input0, [1] * (input0.shape.ndims - 1) + [3]) # input0 = tf.tile(input0, [1] * (input0.shape.ndims - 1) + [3])
...@@ -34,7 +36,12 @@ def mse_imgs(image1,image2): ...@@ -34,7 +36,12 @@ def mse_imgs(image1,image2):
def ssim_images(image1, image2): def ssim_images(image1, image2):
""" """
Reference for calculating ssim
Numpy impelmeentation for ssim https://cvnote.ddlee.cc/2019/09/12/psnr-ssim-python Numpy impelmeentation for ssim https://cvnote.ddlee.cc/2019/09/12/psnr-ssim-python
https://scikit-image.org/docs/dev/auto_examples/transform/plot_ssim.html
:param image1 the reference images
:param image2 the predicte images
""" """
pass ssim_pred = ssim_ski(image1, image2,
data_range = image2.max() - image2.min())
return ssim_pred
...@@ -8,8 +8,9 @@ __date__ = "2021-05-xx" ...@@ -8,8 +8,9 @@ __date__ = "2021-05-xx"
import numpy as np import numpy as np
import xarray as xr import xarray as xr
import pandas as pd
from typing import Union, List from typing import Union, List
from skimage.measure import compare_ssim as ssim
try: try:
from tqdm import tqdm from tqdm import tqdm
l_tqdm = True l_tqdm = True
...@@ -200,18 +201,18 @@ class Scores: ...@@ -200,18 +201,18 @@ class Scores:
Class to calculate scores and skill scores. Class to calculate scores and skill scores.
""" """
known_scores = ["mse", "psnr"] known_scores = ["mse", "psnr","ssim"]
def __init__(self, score_name: str, dims: List[str]): def __init__(self, score_name: str, dims: List[str]):
""" """
Initialize score instance. Initialize score instance.
:param score_name: name of score taht is queried :param score_name: name of score that is queried
:param dims: list of dimension over which the score shall operate :param dims: list of dimension over which the score shall operate
:return: Score instance :return: Score instance
""" """
method = Scores.__init__.__name__ method = Scores.__init__.__name__
self.metrics_dict = {"mse": self.calc_mse_batch , "psnr": self.calc_psnr_batch} self.metrics_dict = {"mse": self.calc_mse_batch , "psnr": self.calc_psnr_batch, "ssim":self.calc_ssim_batch}
if set(self.metrics_dict.keys()) != set(Scores.known_scores): if set(self.metrics_dict.keys()) != set(Scores.known_scores):
raise ValueError("%{0}: Known scores must coincide with keys of metrics_dict.".format(method)) raise ValueError("%{0}: Known scores must coincide with keys of metrics_dict.".format(method))
self.score_name = self.set_score_name(score_name) self.score_name = self.set_score_name(score_name)
...@@ -273,10 +274,10 @@ class Scores: ...@@ -273,10 +274,10 @@ class Scores:
def calc_psnr_batch(self, data_fcst, data_ref, **kwargs): def calc_psnr_batch(self, data_fcst, data_ref, **kwargs):
""" """
Calculate mse of forecast data w.r.t. reference data Calculate psnr of forecast data w.r.t. reference data
:param data_fcst: forecasted data (xarray with dimensions [batch, lat, lon]) :param data_fcst: forecasted data (xarray with dimensions [batch, lat, lon])
:param data_ref: reference data (xarray with dimensions [batch, lat, lon]) :param data_ref: reference data (xarray with dimensions [batch, lat, lon])
:return: averaged mse for each batch example :return: averaged psnr for each batch example
""" """
method = Scores.calc_mse_batch.__name__ method = Scores.calc_mse_batch.__name__
...@@ -295,3 +296,17 @@ class Scores: ...@@ -295,3 +296,17 @@ class Scores:
return psnr return psnr
def calc_ssim_batch(self, data_fcast, data_ref, **kwargs):
"""
Calculate ssim ealuation metric of forecast data w.r.t reference data
:param data_fcst: forecasted data (xarray with dimensions [batch, lat, lon])
:param data_ref: reference data (xarray with dimensions [batch, lat, lon])
:return: averaged ssim for each batch example
"""
method = Scores.calc_ssim_batch.__name__
print("shape of data_ref:",np.array(data_ref).shape)
print("shape of data_fcast:",np.array(data_fcast).shape)
print("max values of data forecast",data_fcast.max())
print("min value of data forecadst",data_fcast.min())
ssim_pred = ssim(data_ref[0,0,:,:], data_fcast[0,0,:,:])
return ssim_pred
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment