Skip to content
Snippets Groups Projects

Resolve "release v1.2.0"

Merged Ghost User requested to merge release_v1.2.0 into master
1 file
+ 321
14
Compare changes
  • Side-by-side
  • Inline
"""Collection of plots to evaluate a model, create overviews on data or forecasts."""
"""Collection of plots to evaluate a model, create overviews on data or forecasts."""
__author__ = "Lukas Leufen, Felix Kleinert"
__author__ = "Lukas Leufen, Felix Kleinert"
__date__ = '2019-12-17'
__date__ = '2020-11-23'
import logging
import logging
import math
import math
@@ -8,10 +8,10 @@ import os
@@ -8,10 +8,10 @@ import os
import warnings
import warnings
from typing import Dict, List, Tuple
from typing import Dict, List, Tuple
import matplotlib
import matplotlib
import matplotlib.patches as mpatches
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
 
import matplotlib.dates as mdates
import numpy as np
import numpy as np
import pandas as pd
import pandas as pd
import seaborn as sns
import seaborn as sns
@@ -75,7 +75,7 @@ class AbstractPlotClass:
@@ -75,7 +75,7 @@ class AbstractPlotClass:
"""
"""
def __init__(self, plot_folder, plot_name, resolution=500):
def __init__(self, plot_folder, plot_name, resolution=500, rc_params=None):
"""Set up plot folder and name, and plot resolution (default 500dpi)."""
"""Set up plot folder and name, and plot resolution (default 500dpi)."""
plot_folder = os.path.abspath(plot_folder)
plot_folder = os.path.abspath(plot_folder)
if not os.path.exists(plot_folder):
if not os.path.exists(plot_folder):
@@ -83,6 +83,15 @@ class AbstractPlotClass:
@@ -83,6 +83,15 @@ class AbstractPlotClass:
self.plot_folder = plot_folder
self.plot_folder = plot_folder
self.plot_name = plot_name
self.plot_name = plot_name
self.resolution = resolution
self.resolution = resolution
 
if rc_params is None:
 
rc_params = {'axes.labelsize': 'large',
 
'xtick.labelsize': 'large',
 
'ytick.labelsize': 'large',
 
'legend.fontsize': 'large',
 
'axes.titlesize': 'large',
 
}
 
self.rc_params = rc_params
 
self._update_rc_params()
def _plot(self, *args):
def _plot(self, *args):
"""Abstract plot class needs to be implemented in inheritance."""
"""Abstract plot class needs to be implemented in inheritance."""
@@ -95,6 +104,24 @@ class AbstractPlotClass:
@@ -95,6 +104,24 @@ class AbstractPlotClass:
plt.savefig(plot_name, dpi=self.resolution, **kwargs)
plt.savefig(plot_name, dpi=self.resolution, **kwargs)
plt.close('all')
plt.close('all')
 
def _update_rc_params(self):
 
plt.rcParams.update(self.rc_params)
 
 
@staticmethod
 
def _get_sampling(sampling):
 
if sampling == "daily":
 
return "D"
 
elif sampling == "hourly":
 
return "h"
 
 
@staticmethod
 
def get_dataset_colors():
 
"""
 
Standard colors used for train-, val-, and test-sets during postprocessing
 
"""
 
colors = {"train": "#e69f00", "val": "#009e73", "test": "#56b4e9"} # hex code
 
return colors
 
@TimeTrackingWrapper
@TimeTrackingWrapper
class PlotMonthlySummary(AbstractPlotClass):
class PlotMonthlySummary(AbstractPlotClass):
@@ -113,18 +140,19 @@ class PlotMonthlySummary(AbstractPlotClass):
@@ -113,18 +140,19 @@ class PlotMonthlySummary(AbstractPlotClass):
:param window_lead_time: lead time to plot, if window_lead_time is higher than the available lead time or not given
:param window_lead_time: lead time to plot, if window_lead_time is higher than the available lead time or not given
the maximum lead time from data is used. (default None -> use maximum lead time from data).
the maximum lead time from data is used. (default None -> use maximum lead time from data).
:param plot_folder: path to save the plot (default: current directory)
:param plot_folder: path to save the plot (default: current directory)
 
:param target_var_unit: unit of target var for plot legend (default= ppb)
"""
"""
def __init__(self, stations: List, data_path: str, name: str, target_var: str, window_lead_time: int = None,
def __init__(self, stations: List, data_path: str, name: str, target_var: str, window_lead_time: int = None,
plot_folder: str = "."):
plot_folder: str = ".", target_var_unit: str = 'ppb'):
"""Set attributes and create plot."""
"""Set attributes and create plot."""
super().__init__(plot_folder, "monthly_summary_box_plot")
super().__init__(plot_folder, "monthly_summary_box_plot")
self._data_path = data_path
self._data_path = data_path
self._data_name = name
self._data_name = name
self._data = self._prepare_data(stations)
self._data = self._prepare_data(stations)
self._window_lead_time = self._get_window_lead_time(window_lead_time)
self._window_lead_time = self._get_window_lead_time(window_lead_time)
self._plot(target_var)
self._plot(target_var, target_var_unit)
self._save()
self._save()
def _prepare_data(self, stations: List) -> xr.DataArray:
def _prepare_data(self, stations: List) -> xr.DataArray:
@@ -176,7 +204,12 @@ class PlotMonthlySummary(AbstractPlotClass):
@@ -176,7 +204,12 @@ class PlotMonthlySummary(AbstractPlotClass):
window_lead_time = ahead_steps
window_lead_time = ahead_steps
return min(ahead_steps, window_lead_time)
return min(ahead_steps, window_lead_time)
def _plot(self, target_var: str):
@staticmethod
 
def _spell_out_chemical_concentrations(short_name: str):
 
short2long = {'o3': 'ozone', 'no': 'nitrogen oxide', 'no2': 'nitrogen dioxide', 'nox': 'nitrogen dioxides'}
 
return f"{short2long[short_name]} concentration"
 
 
def _plot(self, target_var: str, target_var_unit: str):
"""
"""
Create a monthly grouped box plot over all stations but with separate boxes for each lead time step.
Create a monthly grouped box plot over all stations but with separate boxes for each lead time step.
@@ -189,7 +222,8 @@ class PlotMonthlySummary(AbstractPlotClass):
@@ -189,7 +222,8 @@ class PlotMonthlySummary(AbstractPlotClass):
ax = sns.boxplot(x='index', y='values', hue='ahead', data=data.compute(), whis=1., palette=color_palette,
ax = sns.boxplot(x='index', y='values', hue='ahead', data=data.compute(), whis=1., palette=color_palette,
flierprops={'marker': '.', 'markersize': 1}, showmeans=True,
flierprops={'marker': '.', 'markersize': 1}, showmeans=True,
meanprops={'markersize': 1, 'markeredgecolor': 'k'})
meanprops={'markersize': 1, 'markeredgecolor': 'k'})
ax.set(xlabel='month', ylabel=f'{target_var}')
ylabel = self._spell_out_chemical_concentrations(target_var)
 
ax.set(xlabel='month', ylabel=f'{ylabel} (in {target_var_unit})')
plt.tight_layout()
plt.tight_layout()
@@ -453,7 +487,8 @@ class PlotConditionalQuantiles(AbstractPlotClass):
@@ -453,7 +487,8 @@ class PlotConditionalQuantiles(AbstractPlotClass):
def _plot(self):
def _plot(self):
"""Start plotting routines: overall plot and seasonal (if enabled)."""
"""Start plotting routines: overall plot and seasonal (if enabled)."""
logging.info(f"start plotting {self.__class__.__name__}, scheduled number of plots: {(len(self._seasons) + 1) * 2}")
logging.info(
 
f"start plotting {self.__class__.__name__}, scheduled number of plots: {(len(self._seasons) + 1) * 2}")
if len(self._seasons) > 0:
if len(self._seasons) > 0:
self._plot_seasons()
self._plot_seasons()
@@ -504,7 +539,7 @@ class PlotConditionalQuantiles(AbstractPlotClass):
@@ -504,7 +539,7 @@ class PlotConditionalQuantiles(AbstractPlotClass):
# add histogram of the segmented data (pred_name)
# add histogram of the segmented data (pred_name)
handles, labels = ax.get_legend_handles_labels()
handles, labels = ax.get_legend_handles_labels()
segmented_data.loc[x_model, d, :].to_pandas().hist(bins=self._bins, ax=ax2, color='k', alpha=.3, grid=False,
segmented_data.loc[x_model, d, :].to_pandas().hist(bins=self._bins, ax=ax2, color='k', alpha=.3, grid=False,
rwidth=1)
rwidth=1)
# add legend
# add legend
plt.legend(handles[:3] + [handles[-1]], self._opts["legend"], loc='upper left', fontsize='large')
plt.legend(handles[:3] + [handles[-1]], self._opts["legend"], loc='upper left', fontsize='large')
# adjust limits and set labels
# adjust limits and set labels
@@ -693,20 +728,26 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
@@ -693,20 +728,26 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
"""
"""
def __init__(self, data: Dict, plot_folder: str = ".", model_setup: str = ""):
def __init__(self, data: Dict, plot_folder: str = ".", model_setup: str = "", separate_vars: List = None):
"""
"""
Set attributes and create plot.
Set attributes and create plot.
:param data: dictionary with station names as keys and 2D xarrays as values, consist on axis ahead and terms.
:param data: dictionary with station names as keys and 2D xarrays as values, consist on axis ahead and terms.
:param plot_folder: path to save the plot (default: current directory)
:param plot_folder: path to save the plot (default: current directory)
:param model_setup: architecture type to specify plot name (default "CNN")
:param model_setup: architecture type to specify plot name (default "CNN")
 
:param separate_vars: variables to plot separated (default: ['o3'])
"""
"""
super().__init__(plot_folder, f"skill_score_bootstrap_{model_setup}")
super().__init__(plot_folder, f"skill_score_bootstrap_{model_setup}")
 
if separate_vars is None:
 
separate_vars = ['o3']
self._labels = None
self._labels = None
self._x_name = "boot_var"
self._x_name = "boot_var"
self._data = self._prepare_data(data)
self._data = self._prepare_data(data)
self._plot()
self._plot()
self._save()
self._save()
 
self.plot_name += '_separated'
 
self._plot(separate_vars=separate_vars)
 
self._save(bbox_inches='tight')
def _prepare_data(self, data: Dict) -> pd.DataFrame:
def _prepare_data(self, data: Dict) -> pd.DataFrame:
"""
"""
@@ -719,11 +760,30 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
@@ -719,11 +760,30 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
:return: pre-processed data set
:return: pre-processed data set
"""
"""
data = helpers.dict_to_xarray(data, "station").sortby(self._x_name)
data = helpers.dict_to_xarray(data, "station").sortby(self._x_name)
 
new_boot_coords = self._return_vars_without_number_tag(data.coords['boot_var'].values, split_by='_', keep=1)
 
data = data.assign_coords({'boot_var': new_boot_coords})
self._labels = [str(i) + "d" for i in data.coords["ahead"].values]
self._labels = [str(i) + "d" for i in data.coords["ahead"].values]
if "station" not in data.dims:
if "station" not in data.dims:
data = data.expand_dims("station")
data = data.expand_dims("station")
return data.to_dataframe("data").reset_index(level=[0, 1, 2])
return data.to_dataframe("data").reset_index(level=[0, 1, 2])
 
def _return_vars_without_number_tag(self, values, split_by, keep):
 
arr = np.array([v.split(split_by) for v in values])
 
num = arr[:, 0]
 
new_val = arr[:, keep]
 
if self._all_values_are_equal(num, axis=0):
 
return new_val
 
else:
 
raise NotImplementedError
 
 
 
@staticmethod
 
def _all_values_are_equal(arr, axis=0):
 
if np.all(arr == arr[0], axis=axis):
 
return True
 
else:
 
return False
 
def _label_add(self, score_only: bool):
def _label_add(self, score_only: bool):
"""
"""
Add the phrase "terms and " if score_only is disabled or empty string (if score_only=True).
Add the phrase "terms and " if score_only is disabled or empty string (if score_only=True).
@@ -733,12 +793,111 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
@@ -733,12 +793,111 @@ class PlotBootstrapSkillScore(AbstractPlotClass):
"""
"""
return "" if score_only else "terms and "
return "" if score_only else "terms and "
def _plot(self):
def _plot(self, separate_vars=None):
"""Plot climatological skill score."""
"""Plot climatological skill score."""
 
if separate_vars is None:
 
self._plot_all_variables()
 
else:
 
self._plot_selected_variables(separate_vars)
 
 
def _plot_selected_variables(self, separate_vars: List):
 
# if separate_vars is None:
 
# separate_vars = ['o3']
 
data = self._data
 
self.raise_error_if_separate_vars_do_not_exist(data, separate_vars)
 
all_variables = self._get_unique_values_from_column_of_df(data, 'boot_var')
 
# remaining_vars = helpers.list_pop(all_variables, separate_vars) #remove_items
 
remaining_vars = helpers.remove_items(all_variables, separate_vars)
 
data_first = self._select_data(df=data, variables=separate_vars, column_name='boot_var')
 
data_second = self._select_data(df=data, variables=remaining_vars, column_name='boot_var')
 
 
fig, ax = plt.subplots(nrows=1, ncols=2,
 
gridspec_kw={'width_ratios': [len(separate_vars),
 
len(remaining_vars)
 
]
 
}
 
)
 
if len(separate_vars) > 1:
 
first_box_width = .8
 
else:
 
first_box_width = 2.
 
 
sns.boxplot(x=self._x_name, y="data", hue="ahead", data=data_first, ax=ax[0], whis=1., palette="Blues_d",
 
showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
 
flierprops={"marker": "."}, width=first_box_width
 
)
 
ax[0].set(ylabel=f"skill score", xlabel="")
 
 
sns.boxplot(x=self._x_name, y="data", hue="ahead", data=data_second, ax=ax[1], whis=1., palette="Blues_d",
 
showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"},
 
flierprops={"marker": "."},
 
)
 
ax[1].set(ylabel="", xlabel="")
 
ax[1].yaxis.tick_right()
 
handles, _ = ax[1].get_legend_handles_labels()
 
for sax in ax:
 
matplotlib.pyplot.sca(sax)
 
sax.axhline(y=0, color="grey", linewidth=.5)
 
plt.xticks(rotation=45, ha='right')
 
sax.legend_.remove()
 
 
fig.legend(handles, self._labels, loc='upper center', ncol=len(handles) + 1, )
 
 
def align_yaxis(ax1, ax2):
 
"""
 
Align zeros of the two axes, zooming them out by same ratio
 
 
This function is copy pasted from https://stackoverflow.com/a/41259922
 
"""
 
axes = (ax1, ax2)
 
extrema = [ax.get_ylim() for ax in axes]
 
tops = [extr[1] / (extr[1] - extr[0]) for extr in extrema]
 
# Ensure that plots (intervals) are ordered bottom to top:
 
if tops[0] > tops[1]:
 
axes, extrema, tops = [list(reversed(l)) for l in (axes, extrema, tops)]
 
 
# How much would the plot overflow if we kept current zoom levels?
 
tot_span = tops[1] + 1 - tops[0]
 
 
b_new_t = extrema[0][0] + tot_span * (extrema[0][1] - extrema[0][0])
 
t_new_b = extrema[1][1] - tot_span * (extrema[1][1] - extrema[1][0])
 
axes[0].set_ylim(extrema[0][0], b_new_t)
 
axes[1].set_ylim(t_new_b, extrema[1][1])
 
 
align_yaxis(ax[0], ax[1])
 
align_yaxis(ax[0], ax[1])
 
 
@staticmethod
 
def _select_data(df: pd.DataFrame, variables: List[str], column_name: str) -> pd.DataFrame:
 
for i, variable in enumerate(variables):
 
if i == 0:
 
selected_data = df.loc[df[column_name] == variable]
 
else:
 
tmp_var = df.loc[df[column_name] == variable]
 
selected_data = pd.concat([selected_data, tmp_var], axis=0)
 
return selected_data
 
 
def raise_error_if_separate_vars_do_not_exist(self, data, separate_vars):
 
if not self._variables_exist_in_df(df=data, variables=separate_vars):
 
raise ValueError(f"At least one entry of `separate_vars' does not exist in `self.data' ")
 
 
@staticmethod
 
def _get_unique_values_from_column_of_df(df: pd.DataFrame, column_name: str) -> List:
 
return list(df[column_name].unique())
 
 
def _variables_exist_in_df(self, df: pd.DataFrame, variables: List[str], column_name: str = 'boot_var'):
 
vars_in_df = set(self._get_unique_values_from_column_of_df(df, column_name))
 
return set(variables).issubset(vars_in_df)
 
 
def _plot_all_variables(self):
 
"""
 
 
"""
fig, ax = plt.subplots()
fig, ax = plt.subplots()
sns.boxplot(x=self._x_name, y="data", hue="ahead", data=self._data, ax=ax, whis=1., palette="Blues_d",
sns.boxplot(x=self._x_name, y="data", hue="ahead", data=self._data, ax=ax, whis=1., palette="Blues_d",
showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."})
ax.axhline(y=0, color="grey", linewidth=.5)
ax.axhline(y=0, color="grey", linewidth=.5)
 
plt.xticks(rotation=45)
ax.set(ylabel=f"skill score", xlabel="", title="summary of all stations")
ax.set(ylabel=f"skill score", xlabel="", title="summary of all stations")
handles, _ = ax.get_legend_handles_labels()
handles, _ = ax.get_legend_handles_labels()
ax.legend(handles, self._labels)
ax.legend(handles, self._labels)
@@ -859,6 +1018,7 @@ class PlotTimeSeries:
@@ -859,6 +1018,7 @@ class PlotTimeSeries:
def _get_time_range(data):
def _get_time_range(data):
def f(x, f_x):
def f(x, f_x):
return pd.to_datetime(f_x(x.index.values)).year
return pd.to_datetime(f_x(x.index.values)).year
 
return f(data, min), f(data, max)
return f(data, min), f(data, max)
@staticmethod
@staticmethod
@@ -978,9 +1138,7 @@ class PlotAvailability(AbstractPlotClass):
@@ -978,9 +1138,7 @@ class PlotAvailability(AbstractPlotClass):
return plt_dict
return plt_dict
def _plot(self, plt_dict):
def _plot(self, plt_dict):
# colors = {"train": "orange", "val": "blueishgreen", "test": "skyblue"} # color names
colors = self.get_dataset_colors()
colors = {"train": "#e69f00", "val": "#009e73", "test": "#56b4e9"} # hex code
# colors = {"train": (230, 159, 0), "val": (0, 158, 115), "test": (86, 180, 233)} # in rgb but as abs values
pos = 0
pos = 0
height = 0.8 # should be <= 1
height = 0.8 # should be <= 1
yticklabels = []
yticklabels = []
@@ -1025,6 +1183,155 @@ class PlotSeparationOfScales(AbstractPlotClass):
@@ -1025,6 +1183,155 @@ class PlotSeparationOfScales(AbstractPlotClass):
self._save()
self._save()
 
@TimeTrackingWrapper
 
class PlotAvailabilityHistogram(AbstractPlotClass):
 
"""
 
Create data availability plots as histogram.
 
 
Each entry of each generator is checked for `notnull()` values along all the datetime axis (boolean).
 
Calling this class creates two different types of histograms where each generator
 
 
1) data_availability_histogram: datetime (xaxis) vs. number of stations with availabile data (yaxis)
 
2) data_availability_histogram_cumulative: number of samples (xaxis) vs. number of stations having at least number
 
of samples (yaxis)
 
 
"""
 
 
def __init__(self, generators: Dict[str, DataCollection], plot_folder: str = ".", sampling="daily",
 
subset_dim: str = 'DataSet', temporal_dim: str = 'datetime', history_dim: str = 'window',
 
station_dim: str = 'Stations', target_dim='variables'):
 
 
super().__init__(plot_folder, "data_availability_histogram")
 
self.freq = self._get_sampling(sampling)
 
self.subset_dim = subset_dim
 
self.temporal_dim = temporal_dim
 
self.history_dim = history_dim
 
self.station_dim = station_dim
 
self.target_dim = target_dim
 
self._prepare_data(generators)
 
 
for plt_type in self.allowed_plot_types:
 
plot_name_tmp = self.plot_name
 
self.plot_name += '_' + plt_type
 
self._plot(plt_type=plt_type)
 
self._save()
 
self.plot_name = plot_name_tmp
 
 
@property
 
def allowed_plot_types(self):
 
plot_types = ['hist', 'hist_cum']
 
return plot_types
 
 
def _prepare_data(self, generators: Dict[str, DataCollection]):
 
"""
 
Prepares data to be used by plot methods.
 
 
Creates xarrays which are sums of valid data (boolean sums) across i) station_dim and ii) temporal_dim
 
"""
 
avail_data_time_sum = {}
 
avail_data_station_sum = {}
 
dataset_time_interval = {}
 
for subset, generator in generators.items():
 
avail_list = []
 
for station in generator:
 
station_data_x = station.get_X(as_numpy=False)[0]
 
station_data_x = station_data_x.loc[{self.history_dim: 0, # select recent window frame
 
self.target_dim: station_data_x[self.target_dim].values[0]}]
 
avail_list.append(station_data_x.notnull())
 
avail_data = xr.concat(avail_list, dim=self.station_dim).notnull()
 
avail_data_time_sum[subset] = avail_data.sum(dim=self.station_dim)
 
avail_data_station_sum[subset] = avail_data.sum(dim=self.temporal_dim)
 
dataset_time_interval[subset] = self._get_first_and_last_indexelement_from_xarray(
 
avail_data_time_sum[subset], dim_name=self.temporal_dim, return_type='as_dict'
 
)
 
avail_data_amount = xr.concat(avail_data_time_sum.values(), pd.Index(avail_data_time_sum.keys(),
 
name=self.subset_dim)
 
)
 
full_time_index = self._make_full_time_index(avail_data_amount.coords[self.temporal_dim].values, freq=self.freq)
 
self.avail_data_cum_sum = xr.concat(avail_data_station_sum.values(), pd.Index(avail_data_station_sum.keys(),
 
name=self.subset_dim))
 
self.avail_data_amount = avail_data_amount.reindex({self.temporal_dim: full_time_index})
 
self.dataset_time_interval = dataset_time_interval
 
 
@staticmethod
 
def _get_first_and_last_indexelement_from_xarray(xarray, dim_name, return_type='as_tuple'):
 
if isinstance(xarray, xr.DataArray):
 
first = xarray.coords[dim_name].values[0]
 
last = xarray.coords[dim_name].values[-1]
 
if return_type == 'as_tuple':
 
return first, last
 
elif return_type == 'as_dict':
 
return {'first': first, 'last': last}
 
else:
 
raise TypeError(f"return_type must be 'as_tuple' or 'as_dict', but is '{return_type}'")
 
else:
 
raise TypeError(f"xarray must be of type xr.DataArray, but is of type {type(xarray)}")
 
 
@staticmethod
 
def _make_full_time_index(irregular_time_index, freq):
 
full_time_index = pd.date_range(start=irregular_time_index[0], end=irregular_time_index[-1], freq=freq)
 
return full_time_index
 
 
def _plot(self, plt_type='hist', *args):
 
if plt_type == 'hist':
 
self._plot_hist()
 
elif plt_type == 'hist_cum':
 
self._plot_hist_cum()
 
else:
 
raise ValueError(f"plt_type mus be 'hist' or 'hist_cum', but is {type}")
 
 
def _plot_hist(self, *args):
 
colors = self.get_dataset_colors()
 
fig, axes = plt.subplots(figsize=(10, 3))
 
for i, subset in enumerate(self.dataset_time_interval.keys()):
 
plot_dataset = self.avail_data_amount.sel({self.subset_dim: subset,
 
self.temporal_dim: slice(
 
self.dataset_time_interval[subset]['first'],
 
self.dataset_time_interval[subset]['last']
 
)
 
}
 
)
 
 
plot_dataset.plot.step(color=colors[subset], ax=axes, label=subset)
 
plt.fill_between(plot_dataset.coords[self.temporal_dim].values, plot_dataset.values, color=colors[subset])
 
 
lgd = fig.legend(loc="upper right", ncol=len(self.dataset_time_interval),
 
facecolor='white', framealpha=1, edgecolor='black')
 
for lgd_line in lgd.get_lines():
 
lgd_line.set_linewidth(4.0)
 
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
 
plt.title('')
 
plt.ylabel('Number of samples')
 
plt.tight_layout()
 
 
def _plot_hist_cum(self, *args):
 
colors = self.get_dataset_colors()
 
fig, axes = plt.subplots(figsize=(10, 3))
 
n_bins = int(self.avail_data_cum_sum.max().values)
 
bins = np.arange(0, n_bins+1)
 
descending_subsets = self.avail_data_cum_sum.max(dim=self.station_dim).sortby(
 
self.avail_data_cum_sum.max(dim=self.station_dim), ascending=False
 
).coords[self.subset_dim].values
 
 
for subset in descending_subsets:
 
self.avail_data_cum_sum.sel({self.subset_dim: subset}).plot.hist(ax=axes,
 
bins=bins,
 
label=subset,
 
cumulative=-1,
 
color=colors[subset],
 
# alpha=.5
 
)
 
 
lgd = fig.legend(loc="upper right", ncol=len(self.dataset_time_interval),
 
facecolor='white', framealpha=1, edgecolor='black')
 
plt.title('')
 
plt.ylabel('Number of stations')
 
plt.xlabel('Number of samples')
 
plt.xlim((bins[0], bins[-1]))
 
plt.tight_layout()
 
 
 
if __name__ == "__main__":
if __name__ == "__main__":
stations = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']
stations = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']
path = "../../testrun_network/forecasts"
path = "../../testrun_network/forecasts"
Loading