Skip to content
Snippets Groups Projects
Commit 03ef6301 authored by leufen1's avatar leufen1
Browse files

hist edges are now dynamically adjusted per variable

parent 6f02e87b
No related branches found
No related tags found
5 merge requests!319add all changes of dev into release v1.4.0 branch,!318Resolve "release v1.4.0",!299Draft: Merge default data handler and preprocessing support parameter use_multiprocessing....,!288Resolve "histogram of inputs and targets",!259Draft: Resolve "WRF-Datahandler should inherit from SingleStationDatahandler"
Pipeline #65602 failed
......@@ -454,6 +454,8 @@ class PlotDataHistogram(AbstractPlotClass): # pragma: no cover
self.window_dim = window_dim
self.inputs, self.targets = self._get_inputs_targets(generators, self.variables_dim)
self.bins = {}
self.interval_width = {}
self.bin_edges = {}
# input plots
self._calculate_hist(generators, self.inputs, input_data=True)
......@@ -476,38 +478,58 @@ class PlotDataHistogram(AbstractPlotClass): # pragma: no cover
return inputs, targets
def _calculate_hist(self, generators, variables, input_data=True):
for set_type, generator in generators.items():
bins = {}
n_bins = 100
interval_width = None
bin_edges = None
for set_type, generator in generators.items():
tmp_bins = {}
tmp_edges = {}
end = {}
start = {}
f = lambda x: x.get_X(as_numpy=False)[0] if input_data is True else x.get_Y(as_numpy=False)
for gen in generator:
w = min(abs(f(gen).coords[self.window_dim].values))
data = f(gen).sel({self.window_dim: w})
res, interval_width, bin_edges = f_proc_hist(data, variables, n_bins, self.variables_dim)
res, _, g_edges = f_proc_hist(data, variables, n_bins, self.variables_dim)
for var in variables:
n_var = bins.get(var, np.zeros(n_bins))
n_var += res[var]
bins[var] = n_var
b = tmp_bins.get(var, [])
b.append(res[var])
tmp_bins[var] = b
e = tmp_edges.get(var, [])
e.append(g_edges[var])
tmp_edges[var] = e
end[var] = max([end.get(var, g_edges[var].max()), g_edges[var].max()])
start[var] = min([start.get(var, g_edges[var].min()), g_edges[var].min()])
# interpolate and aggregate
bins = {}
edges = {}
interval_width = {}
for var in variables:
bin_edges = np.linspace(start[var], end[var], n_bins + 1)
interval_width[var] = bin_edges[1] - bin_edges[0]
for i, e in enumerate(tmp_bins[var]):
bins_interp = np.interp(bin_edges[:-1], tmp_edges[var][i][:-1], e, left=0, right=0)
bins[var] = bins.get(var, np.zeros(n_bins)) + bins_interp
edges[var] = bin_edges
self.bins[set_type] = bins
self.interval_width = interval_width
self.bin_edges = bin_edges
self.interval_width[set_type] = interval_width
self.bin_edges[set_type] = edges
def _plot(self, add_name, subset):
plot_path = os.path.join(os.path.abspath(self.plot_folder), f"{self.plot_name}_{subset}_{add_name}.pdf")
pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
bins = self.bins[subset]
bin_edges = self.bin_edges[subset]
interval_width = self.interval_width[subset]
colors = self.get_dataset_colors()
for var in bins.keys():
fig, ax = plt.subplots()
hist_var = bins[var]
n_var = sum(hist_var)
weights = hist_var / (self.interval_width * n_var)
ax.hist(self.bin_edges[:-1], self.bin_edges, weights=weights, color=colors[subset])
weights = hist_var / (interval_width[var] * n_var)
ax.hist(bin_edges[var][:-1], bin_edges[var], weights=weights, color=colors[subset])
ax.set_ylabel("probability density")
ax.set_xlabel(f"values ({subset})")
ax.set_title(f"Histogram ({var}, n={int(n_var)})")
ax.set_xlabel(f"values")
ax.set_title(f"histogram {var} ({subset}, n={int(n_var)})")
pdf_pages.savefig()
# close all open figures / plots
pdf_pages.close()
......@@ -522,14 +544,16 @@ class PlotDataHistogram(AbstractPlotClass): # pragma: no cover
fig, ax = plt.subplots()
for subset in self.bins.keys():
hist_var = self.bins[subset][var]
interval_width = self.interval_width[subset][var]
bin_edges = self.bin_edges[subset][var]
n_var = sum(hist_var)
weights = hist_var / (self.interval_width * n_var)
ax.plot(self.bin_edges[:-1] + 0.5 * self.interval_width, weights, label=f"{subset}",
weights = hist_var / (interval_width * n_var)
ax.plot(bin_edges[:-1] + 0.5 * interval_width, weights, label=f"{subset}",
c=colors[subset])
ax.set_ylabel("probability density")
ax.set_xlabel(f"{var}")
ax.set_xlabel("values")
ax.legend(loc="upper right")
ax.set_title(f"Histogram")
ax.set_title(f"histogram {var}")
pdf_pages.savefig()
# close all open figures / plots
pdf_pages.close()
......@@ -815,9 +839,10 @@ def f_proc_2(g, m, pos, variables_dim, time_dim):
def f_proc_hist(data, variables, n_bins, variables_dim):
res = {}
bin_edges = {}
interval_width = {}
for var in variables:
d = data.sel({variables_dim: var}).squeeze() if len(data.shape) > 1 else data
hist, bin_edges = np.histogram(d.values, n_bins, range=(-4, 4))
interval_width = (bin_edges[1] - bin_edges[0])
res[var] = hist
res[var], bin_edges[var] = np.histogram(d.values, n_bins)
interval_width[var] = bin_edges[var][1] - bin_edges[var][0]
return res, interval_width, bin_edges
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment