Skip to content
Snippets Groups Projects

Resolve "histogram of inputs and targets"

Merged Ghost User requested to merge lukas_issue299_feat_histogram_plots into develop
1 file
+ 46
21
Compare changes
  • Side-by-side
  • Inline
@@ -454,6 +454,8 @@ class PlotDataHistogram(AbstractPlotClass): # pragma: no cover
self.window_dim = window_dim
self.inputs, self.targets = self._get_inputs_targets(generators, self.variables_dim)
self.bins = {}
self.interval_width = {}
self.bin_edges = {}
# input plots
self._calculate_hist(generators, self.inputs, input_data=True)
@@ -476,38 +478,58 @@ class PlotDataHistogram(AbstractPlotClass): # pragma: no cover
return inputs, targets
def _calculate_hist(self, generators, variables, input_data=True):
n_bins = 100
for set_type, generator in generators.items():
bins = {}
n_bins = 100
interval_width = None
bin_edges = None
tmp_bins = {}
tmp_edges = {}
end = {}
start = {}
f = lambda x: x.get_X(as_numpy=False)[0] if input_data is True else x.get_Y(as_numpy=False)
for gen in generator:
w = min(abs(f(gen).coords[self.window_dim].values))
data = f(gen).sel({self.window_dim: w})
res, interval_width, bin_edges = f_proc_hist(data, variables, n_bins, self.variables_dim)
res, _, g_edges = f_proc_hist(data, variables, n_bins, self.variables_dim)
for var in variables:
n_var = bins.get(var, np.zeros(n_bins))
n_var += res[var]
bins[var] = n_var
b = tmp_bins.get(var, [])
b.append(res[var])
tmp_bins[var] = b
e = tmp_edges.get(var, [])
e.append(g_edges[var])
tmp_edges[var] = e
end[var] = max([end.get(var, g_edges[var].max()), g_edges[var].max()])
start[var] = min([start.get(var, g_edges[var].min()), g_edges[var].min()])
# interpolate and aggregate
bins = {}
edges = {}
interval_width = {}
for var in variables:
bin_edges = np.linspace(start[var], end[var], n_bins + 1)
interval_width[var] = bin_edges[1] - bin_edges[0]
for i, e in enumerate(tmp_bins[var]):
bins_interp = np.interp(bin_edges[:-1], tmp_edges[var][i][:-1], e, left=0, right=0)
bins[var] = bins.get(var, np.zeros(n_bins)) + bins_interp
edges[var] = bin_edges
self.bins[set_type] = bins
self.interval_width = interval_width
self.bin_edges = bin_edges
self.interval_width[set_type] = interval_width
self.bin_edges[set_type] = edges
def _plot(self, add_name, subset):
plot_path = os.path.join(os.path.abspath(self.plot_folder), f"{self.plot_name}_{subset}_{add_name}.pdf")
pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
bins = self.bins[subset]
bin_edges = self.bin_edges[subset]
interval_width = self.interval_width[subset]
colors = self.get_dataset_colors()
for var in bins.keys():
fig, ax = plt.subplots()
hist_var = bins[var]
n_var = sum(hist_var)
weights = hist_var / (self.interval_width * n_var)
ax.hist(self.bin_edges[:-1], self.bin_edges, weights=weights, color=colors[subset])
weights = hist_var / (interval_width[var] * n_var)
ax.hist(bin_edges[var][:-1], bin_edges[var], weights=weights, color=colors[subset])
ax.set_ylabel("probability density")
ax.set_xlabel(f"values ({subset})")
ax.set_title(f"Histogram ({var}, n={int(n_var)})")
ax.set_xlabel(f"values")
ax.set_title(f"histogram {var} ({subset}, n={int(n_var)})")
pdf_pages.savefig()
# close all open figures / plots
pdf_pages.close()
@@ -522,14 +544,16 @@ class PlotDataHistogram(AbstractPlotClass): # pragma: no cover
fig, ax = plt.subplots()
for subset in self.bins.keys():
hist_var = self.bins[subset][var]
interval_width = self.interval_width[subset][var]
bin_edges = self.bin_edges[subset][var]
n_var = sum(hist_var)
weights = hist_var / (self.interval_width * n_var)
ax.plot(self.bin_edges[:-1] + 0.5 * self.interval_width, weights, label=f"{subset}",
weights = hist_var / (interval_width * n_var)
ax.plot(bin_edges[:-1] + 0.5 * interval_width, weights, label=f"{subset}",
c=colors[subset])
ax.set_ylabel("probability density")
ax.set_xlabel(f"{var}")
ax.set_xlabel("values")
ax.legend(loc="upper right")
ax.set_title(f"Histogram")
ax.set_title(f"histogram {var}")
pdf_pages.savefig()
# close all open figures / plots
pdf_pages.close()
@@ -815,9 +839,10 @@ def f_proc_2(g, m, pos, variables_dim, time_dim):
def f_proc_hist(data, variables, n_bins, variables_dim):
res = {}
bin_edges = {}
interval_width = {}
for var in variables:
d = data.sel({variables_dim: var}).squeeze() if len(data.shape) > 1 else data
hist, bin_edges = np.histogram(d.values, n_bins, range=(-4, 4))
interval_width = (bin_edges[1] - bin_edges[0])
res[var] = hist
res[var], bin_edges[var] = np.histogram(d.values, n_bins)
interval_width[var] = bin_edges[var][1] - bin_edges[var][0]
return res, interval_width, bin_edges
Loading