Skip to content
Snippets Groups Projects

Felix issue112 feat seasonal cond quanitle plots

Merged Ghost User requested to merge felix_issue112_feat_seasonal-cond-quanitle-plots into develop
Files
2
@@ -189,26 +189,12 @@ class PlotStationMap(AbstractPlotClass):
@TimeTrackingWrapper
def plot_conditional_quantiles(stations: list, plot_folder: str = ".", rolling_window: int = 3, ref_name: str = 'obs',
pred_name: str = 'CNN', season: str = "", forecast_path: str = None,
plot_name_affix: str = "", units: str = "ppb"):
class PlotConditionalQuantiles(AbstractPlotClass):
"""
This plot was originally taken from Murphy, Brown and Chen (1989):
https://journals.ametsoc.org/doi/pdf/10.1175/1520-0434%281989%29004%3C0485%3ADVOTF%3E2.0.CO%3B2
:param stations: stations to include in the plot (forecast data needs to be available already)
:param plot_folder: path to save the plot (default: current directory)
:param rolling_window: the rolling window mean will smooth the plot appearance (no smoothing in bin calculation,
this is only a cosmetic step, default: 3)
:param ref_name: name of the reference data series
:param pred_name: name of the investigated data series
:param season: season name to highlight if not empty
:param forecast_path: path to save the plot file
:param plot_name_affix: name to specify this plot (e.g. 'cali-ref', default: '')
:param units: units of the forecasted values (default: ppb)
This class creates cond.quantile plots as originally proposed by Murphy, Brown and Chen (1989) [But in log scale]
Link to paper: https://journals.ametsoc.org/doi/pdf/10.1175/1520-0434%281989%29004%3C0485%3ADVOTF%3E2.0.CO%3B2
"""
# time = TimeTracking()
logging.debug(f"started plot_conditional_quantiles()")
# ignore warnings if nans appear in quantile grouping
warnings.filterwarnings("ignore", message="All-NaN slice encountered")
# ignore warnings if mean is calculated on nans
@@ -216,112 +202,235 @@ def plot_conditional_quantiles(stations: list, plot_folder: str = ".", rolling_w
# ignore warnings for y tick = 0 on log scale (instead of 0.00001 or similar)
warnings.filterwarnings("ignore", message="Attempted to set non-positive bottom ylim on a log-scaled axis.")
def load_data():
def __init__(self, stations: List, data_pred_path: str, plot_folder: str = ".", plot_per_seasons=True,
rolling_window: int = 3, model_mame: str = "CNN", obs_name: str = "obs", **kwargs):
"""
:param stations: all stations to plot
:param data_pred_path: path to dir which contains the forecasts as .nc files
:param plot_folder: path where the plots are stored
:param plot_per_seasons: if `True' create cond. quantile plots for seasons (DJF, MAM, JJA, SON) individually
:param rolling_window: smoothing of quantiles (3 is used by Murphy et al.)
:param model_mame: name of the model prediction as stored in netCDF file (for example "CNN")
:param obs_name: name of observation as stored in netCDF file (for example "obs")
:param kwargs: Some further arguments which are listed in self._opts
"""
super().__init__(plot_folder, "conditional_quantiles")
self._data_pred_path = data_pred_path
self._stations = stations
self._rolling_window = rolling_window
self._model_name = model_mame
self._obs_name = obs_name
self._opts = {"q": kwargs.get("q", [.1, .25, .5, .75, .9]),
"linetype": kwargs.get("linetype", [':', '-.', '--', '-.', ':']),
"legend": kwargs.get("legend",
['.10th and .90th quantile', '.25th and .75th quantile', '.50th quantile',
'reference 1:1']),
"data_unit": kwargs.get("data_unit", "ppb"),
}
if plot_per_seasons is True:
self.seasons = ['DJF', 'MAM', 'JJA', 'SON']
else:
self.seasons = ""
self._data = self._load_data()
self._bins = self._get_bins_from_rage_of_data()
self._plot()
def _load_data(self):
"""
This method loads forcast data
:return:
"""
logging.debug("... load data")
data_collector = []
for station in stations:
file = os.path.join(forecast_path, f"forecasts_{station}_test.nc")
for station in self._stations:
file = os.path.join(self._data_pred_path, f"forecasts_{station}_test.nc")
data_tmp = xr.open_dataarray(file)
data_collector.append(data_tmp.loc[:, :, ['CNN', 'obs', 'OLS']].assign_coords(station=station))
return xr.concat(data_collector, dim='station').transpose('index', 'type', 'ahead', 'station')
data_collector.append(data_tmp.loc[:, :, [self._model_name, self._obs_name]].assign_coords(station=station))
res = xr.concat(data_collector, dim='station').transpose('index', 'type', 'ahead', 'station')
return res
def segment_data(data):
def _segment_data(self, data, x_model):
"""
This method creates segmented data which is used for cond. quantile plots
:param data:
:param x_model:
:return:
"""
logging.debug("... segment data")
# combine index and station to multi index
data = data.stack(z=['index', 'station'])
# replace multi index by simple position index (order is not relevant anymore)
data.coords['z'] = range(len(data.coords['z']))
# segment data of pred_name into bins
data.loc[pred_name, ...] = data.loc[pred_name, ...].to_pandas().T.apply(pd.cut, bins=bins,
labels=bins[1:]).T.values
# segment data of x_model into bins
data.loc[x_model, ...] = data.loc[x_model, ...].to_pandas().T.apply(pd.cut, bins=self._bins,
labels=self._bins[1:]).T.values
return data
def create_quantile_panel(data, q):
@staticmethod
def _labels(plot_type, data_unit="ppb"):
"""
Helper method to correctly assign (x,y) labels to plots, depending on like-base or cali-ref factorization
:param plot_type:
:param data_unit:
:return:
"""
names = (f"forecast concentration (in {data_unit})", f"observed concentration (in {data_unit})")
if plot_type == "obs":
return names
else:
return names[::-1]
def _get_bins_from_rage_of_data(self):
"""
Get array of bins to use for quantiles
:return:
"""
return np.arange(0, math.ceil(self._data.max().max()) + 1, 1).astype(int)
def _create_quantile_panel(self, data, x_model, y_model):
"""
Clculate quantiles
:param data:
:param x_model:
:param y_model:
:return:
"""
logging.debug("... create quantile panel")
# create empty xarray with dims: time steps ahead, quantiles, bin index (numbers create in previous step)
quantile_panel = xr.DataArray(np.full([data.ahead.shape[0], len(q), bins[1:].shape[0]], np.nan),
coords=[data.ahead, q, bins[1:]], dims=['ahead', 'quantiles', 'categories'])
quantile_panel = xr.DataArray(
np.full([data.ahead.shape[0], len(self._opts["q"]), self._bins[1:].shape[0]], np.nan),
coords=[data.ahead, self._opts["q"], self._bins[1:]], dims=['ahead', 'quantiles', 'categories'])
# ensure that the coordinates are in the right order
quantile_panel = quantile_panel.transpose('ahead', 'quantiles', 'categories')
# calculate for each bin of the pred_name data the quantiles of the ref_name data
for bin in bins[1:]:
mask = (data.loc[pred_name, ...] == bin)
quantile_panel.loc[..., bin] = data.loc[ref_name, ...].where(mask).quantile(q, dim=['z']).T
for bin in self._bins[1:]:
mask = (data.loc[x_model, ...] == bin)
quantile_panel.loc[..., bin] = data.loc[y_model, ...].where(mask).quantile(self._opts["q"],
dim=['z']).T
return quantile_panel
def labels(plot_type, data_unit="ppb"):
names = (f"forecast concentration (in {data_unit})", f"observed concentration (in {data_unit})")
if plot_type == "obs":
return names
else:
return names[::-1]
@staticmethod
def add_affix(x):
"""
Helper method to add additional information on plot name
xlabel, ylabel = labels(ref_name, units)
opts = {"q": [.1, .25, .5, .75, .9], "linetype": [':', '-.', '--', '-.', ':'],
"legend": ['.10th and .90th quantile', '.25th and .75th quantile', '.50th quantile', 'reference 1:1'],
"xlabel": xlabel, "ylabel": ylabel}
# set name and path of the plot
base_name = "conditional_quantiles"
def add_affix(x): return f"_{x}" if len(x) > 0 else ""
plot_name = f"{base_name}{add_affix(season)}{add_affix(plot_name_affix)}_plot.pdf"
plot_path = os.path.join(os.path.abspath(plot_folder), plot_name)
# check forecast path
if forecast_path is None:
raise ValueError("Forecast path is not given but required.")
# load data and set data bins
orig_data = load_data()
bins = np.arange(0, math.ceil(orig_data.max().max()) + 1, 1).astype(int)
segmented_data = segment_data(orig_data)
quantile_panel = create_quantile_panel(segmented_data, q=opts["q"])
# init pdf output
pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
logging.debug(f"... plot path is {plot_path}")
# create plot for each time step ahead
y2_max = 0
for iteration, d in enumerate(segmented_data.ahead):
logging.debug(f"... plotting {d.values} time step(s) ahead")
# plot smoothed lines with rolling mean
smooth_data = quantile_panel.loc[d, ...].rolling(categories=rolling_window, center=True).mean().to_pandas().T
ax = smooth_data.plot(style=opts["linetype"], color='black', legend=False)
ax2 = ax.twinx()
# add reference line
ax.plot([0, bins.max()], [0, bins.max()], color='k', label='reference 1:1', linewidth=.8)
# add histogram of the segmented data (pred_name)
handles, labels = ax.get_legend_handles_labels()
segmented_data.loc[pred_name, d, :].to_pandas().hist(bins=bins, ax=ax2, color='k', alpha=.3, grid=False,
rwidth=1)
# add legend
plt.legend(handles[:3] + [handles[-1]], opts["legend"], loc='upper left', fontsize='large')
# adjust limits and set labels
ax.set(xlim=(0, bins.max()), ylim=(0, bins.max()))
ax.set_xlabel(opts["xlabel"], fontsize='x-large')
ax.tick_params(axis='x', which='major', labelsize=15)
ax.set_ylabel(opts["ylabel"], fontsize='x-large')
ax.tick_params(axis='y', which='major', labelsize=15)
ax2.yaxis.label.set_color('gray')
ax2.tick_params(axis='y', colors='gray')
ax2.yaxis.labelpad = -15
ax2.set_yscale('log')
if iteration == 0:
y2_max = ax2.get_ylim()[1] + 100
ax2.set(ylim=(0, y2_max * 10 ** 8), yticks=np.logspace(0, 4, 5))
ax2.set_ylabel(' sample size', fontsize='x-large')
ax2.tick_params(axis='y', which='major', labelsize=15)
# set title and save current figure
title = f"{d.values} time step(s) ahead{f' ({season})' if len(season) > 0 else ''}"
plt.title(title)
pdf_pages.savefig()
# close all open figures / plots
pdf_pages.close()
plt.close('all')
#logging.info(f"plot_conditional_quantiles() finished after {time}")
:param x:
:return:
"""
return f"_{x}" if len(x) > 0 else ""
def _prepare_plots(self, data, x_model, y_model):
"""
Get segmented_data and quantile_panel
:param data:
:param x_model:
:param y_model:
:return:
"""
segmented_data = self._segment_data(data, x_model)
quantile_panel = self._create_quantile_panel(segmented_data, x_model, y_model)
return segmented_data, quantile_panel
def _plot(self):
"""
Main plot call
:return:
"""
if len(self.seasons) > 0:
self._plot_seasons()
self._plot_all()
def _plot_seasons(self):
"""
Seasonal plot call
:return:
"""
for season in self.seasons:
self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._model_name,
y_model=self._obs_name, plot_name_affix="cali-ref", season=season)
self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._obs_name,
y_model=self._model_name, plot_name_affix="like-base", season=season)
def _plot_all(self):
"""
Full plot call
:return:
"""
self._plot_base(data=self._data, x_model=self._model_name, y_model=self._obs_name, plot_name_affix="cali-ref")
self._plot_base(data=self._data, x_model=self._obs_name, y_model=self._model_name, plot_name_affix="like-base")
@TimeTrackingWrapper
def _plot_base(self, data, x_model, y_model, plot_name_affix, season=""):
"""
Base method to create cond. quantile plots. Is called from _plot_all and _plot_seasonal
:param data: data which is used to create cond. quantile plot
:param x_model: name of model on x axis (can also be obs)
:param y_model: name of model on y axis (can also be obs)
:param plot_name_affix: should be `cali-ref' or `like-base'
:param season: List of seasons to use
:return:
"""
segmented_data, quantile_panel = self._prepare_plots(data, x_model, y_model)
ylabel, xlabel = self._labels(x_model, self._opts["data_unit"])
plot_name = f"{self.plot_name}{self.add_affix(season)}{self.add_affix(plot_name_affix)}_plot.pdf"
#f"{base_name}{add_affix(season)}{add_affix(plot_name_affix)}_plot.pdf"
plot_path = os.path.join(os.path.abspath(self.plot_folder), plot_name)
pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
logging.debug(f"... plot path is {plot_path}")
# create plot for each time step ahead
y2_max = 0
for iteration, d in enumerate(segmented_data.ahead):
logging.debug(f"... plotting {d.values} time step(s) ahead")
# plot smoothed lines with rolling mean
smooth_data = quantile_panel.loc[d, ...].rolling(categories=self._rolling_window,
center=True).mean().to_pandas().T
ax = smooth_data.plot(style=self._opts["linetype"], color='black', legend=False)
ax2 = ax.twinx()
# add reference line
ax.plot([0, self._bins.max()], [0, self._bins.max()], color='k', label='reference 1:1', linewidth=.8)
# add histogram of the segmented data (pred_name)
handles, labels = ax.get_legend_handles_labels()
segmented_data.loc[x_model, d, :].to_pandas().hist(bins=self._bins, ax=ax2, color='k', alpha=.3, grid=False,
rwidth=1)
# add legend
plt.legend(handles[:3] + [handles[-1]], self._opts["legend"], loc='upper left', fontsize='large')
# adjust limits and set labels
ax.set(xlim=(0, self._bins.max()), ylim=(0, self._bins.max()))
ax.set_xlabel(xlabel, fontsize='x-large')
ax.tick_params(axis='x', which='major', labelsize=15)
ax.set_ylabel(ylabel, fontsize='x-large')
ax.tick_params(axis='y', which='major', labelsize=15)
ax2.yaxis.label.set_color('gray')
ax2.tick_params(axis='y', colors='gray')
ax2.yaxis.labelpad = -15
ax2.set_yscale('log')
if iteration == 0:
y2_max = ax2.get_ylim()[1] + 100
ax2.set(ylim=(0, y2_max * 10 ** 8), yticks=np.logspace(0, 4, 5))
ax2.set_ylabel(' sample size', fontsize='x-large')
ax2.tick_params(axis='y', which='major', labelsize=15)
# set title and save current figure
title = f"{d.values} time step(s) ahead{f' ({season})' if len(season) > 0 else ''}"
plt.title(title)
pdf_pages.savefig()
# close all open figures / plots
pdf_pages.close()
plt.close('all')
@TimeTrackingWrapper
@@ -697,7 +806,6 @@ class PlotAvailability(AbstractPlotClass):
plt_dict[summary_name].update({subset: t2})
return plt_dict
def _plot(self, plt_dict):
# colors = {"train": "orange", "val": "blueishgreen", "test": "skyblue"} # color names
colors = {"train": "#e69f00", "val": "#009e73", "test": "#56b4e9"} # hex code
@@ -722,3 +830,12 @@ class PlotAvailability(AbstractPlotClass):
handles = [mpatches.Patch(color=c, label=k) for k, c in colors.items()]
lgd = plt.legend(handles=handles, bbox_to_anchor=(0, 1, 1, 0.2), loc="lower center", ncol=len(handles))
return lgd
if __name__ == "__main__":
stations = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']
path = "../../testrun_network/forecasts"
plt_path = "../../"
con_quan_cls = PlotConditionalQuantiles(stations, path, plt_path)
Loading