diff --git a/BLcourse2.3/01_one_dim.py b/BLcourse2.3/01_one_dim.py index 2d0a90394fe5aca797dae5843176f401c3b3418b..536ef47b43120e33e4064edc62fb5b0110dba868 100644 --- a/BLcourse2.3/01_one_dim.py +++ b/BLcourse2.3/01_one_dim.py @@ -33,11 +33,11 @@ # # Imports, helpers, setup -# + -# %matplotlib widget -# - +# ##%matplotlib notebook +# ##%matplotlib widget +# %matplotlib inline -# - +# + import math from collections import defaultdict from pprint import pprint @@ -47,7 +47,7 @@ import gpytorch from matplotlib import pyplot as plt from matplotlib import is_interactive -from utils import extract_model_params, plot_samples, ExactGPModel +from utils import extract_model_params, plot_samples # Default float32 results in slightly noisy prior samples. Less so with @@ -121,6 +121,30 @@ plt.scatter(X_train, y_train, marker="o", color="tab:blue") # + +class ExactGPModel(gpytorch.models.ExactGP): + """API: + + model.forward() prior f_pred + model() posterior f_pred + + likelihood(model.forward()) prior with noise y_pred + likelihood(model()) posterior with noise y_pred + """ + + def __init__(self, X_train, y_train, likelihood): + super().__init__(X_train, y_train, likelihood) + self.mean_module = gpytorch.means.ConstantMean() + self.covar_module = gpytorch.kernels.ScaleKernel( + gpytorch.kernels.RBFKernel() + ) + + def forward(self, x): + """The prior, defined in terms of the mean and covariance function.""" + mean_x = self.mean_module(x) + covar_x = self.covar_module(x) + return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) + + likelihood = gpytorch.likelihoods.GaussianLikelihood() model = ExactGPModel(X_train, y_train, likelihood) # - @@ -236,7 +260,6 @@ for ii in range(n_iter): history["loss"].append(loss.item()) # - -# + # Plot hyper params and loss (neg. log marginal likelihood) convergence ncols = len(history) fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 5, 5)) @@ -244,12 +267,9 @@ for ax, (p_name, p_lst) in zip(axs, history.items()): ax.plot(p_lst) ax.set_title(p_name) ax.set_xlabel("iterations") -# - -# + # Values of optimized hyper params pprint(extract_model_params(model, raw=False)) -# - # # Run prediction # @@ -337,3 +357,4 @@ with torch.no_grad(): # When running as script if not is_interactive(): plt.show() +# - diff --git a/BLcourse2.3/02_two_dim.py b/BLcourse2.3/02_two_dim.py index 0e7333177178ed8e9380b0c0e1567f6965e1c343..fbe9a7e7d118a222f8abc63f5083426e5331eeb7 100644 --- a/BLcourse2.3/02_two_dim.py +++ b/BLcourse2.3/02_two_dim.py @@ -14,7 +14,9 @@ # --- # + -# %matplotlib widget +# %matplotlib notebook +# ##%matplotlib widget +# ##%matplotlib inline # - # + @@ -26,33 +28,32 @@ import gpytorch from matplotlib import pyplot as plt from matplotlib import is_interactive from mpl_toolkits.mplot3d import Axes3D +import numpy as np from sklearn.preprocessing import StandardScaler -from utils import extract_model_params, plot_samples, ExactGPModel +from utils import extract_model_params, plot_samples, fig_ax_3d +# - -# + torch.set_default_dtype(torch.float64) torch.manual_seed(123) -# - # # Generate toy 2D data -# + class MexicanHat: def __init__(self, xlim, ylim, nx, ny, mode, **kwds): self.xlim = xlim self.ylim = ylim self.nx = nx self.ny = ny - self.xg, self.yg = self.get_xy_grid() - self.XG, self.YG = torch.meshgrid(self.xg, self.yg, indexing="ij") - self.X = self.make_X(mode) + self.xg, self.yg = self._get_xy_grid() + self.XG, self.YG = self._get_meshgrids(self.xg, self.yg) + self.X = self._make_X(mode) self.z = self.func(self.X) - def make_X(self, mode="grid"): + def _make_X(self, mode="grid"): if mode == "grid": X = torch.empty((self.nx * self.ny, 2)) X[:, 0] = self.XG.flatten() @@ -63,15 +64,19 @@ class MexicanHat: X[:, 1] = X[:, 1] * (self.ylim[1] - self.ylim[0]) + self.ylim[0] return X - def get_xy_grid(self): + def _get_xy_grid(self): x = torch.linspace(self.xlim[0], self.xlim[1], self.nx) y = torch.linspace(self.ylim[0], self.ylim[1], self.ny) return x, y + @staticmethod + def _get_meshgrids(xg, yg): + return torch.meshgrid(xg, yg, indexing="ij") + @staticmethod def func(X): r = torch.sqrt((X**2).sum(axis=1)) - return torch.sin(r) / r * 10 + return torch.sin(r) / r @staticmethod def n2t(x): @@ -79,25 +84,23 @@ class MexicanHat: def apply_scalers(self, x_scaler, y_scaler): self.X = self.n2t(x_scaler.transform(self.X)) - Xg = x_scaler.transform(torch.stack((self.xg, self.yg), dim=1)) - self.xg = self.n2t(Xg[:, 0]) - self.yg = self.n2t(Xg[:, 1]) - self.XG, self.YG = torch.meshgrid(self.xg, self.yg, indexing="ij") + Xtmp = x_scaler.transform(torch.stack((self.xg, self.yg), dim=1)) + self.XG, self.YG = self._get_meshgrids( + self.n2t(Xtmp[:, 0]), self.n2t(Xtmp[:, 1]) + ) self.z = self.n2t(y_scaler.transform(self.z[:, None])[:, 0]) -# - - # + data_train = MexicanHat( - xlim=[-15, 5], ylim=[-15, 25], nx=20, ny=20, mode="rand" + xlim=[-15, 25], ylim=[-15, 5], nx=20, ny=20, mode="rand" ) x_scaler = StandardScaler().fit(data_train.X) y_scaler = StandardScaler().fit(data_train.z[:, None]) data_train.apply_scalers(x_scaler, y_scaler) data_pred = MexicanHat( - xlim=[-15, 5], ylim=[-15, 25], nx=100, ny=100, mode="grid" + xlim=[-15, 25], ylim=[-15, 5], nx=100, ny=100, mode="grid" ) data_pred.apply_scalers(x_scaler, y_scaler) @@ -106,41 +109,99 @@ X_train = data_train.X # inputs for prediction and plotting X_pred = data_pred.X +# - -# noise-free train data -y_train = data_train.z +# # Exercise 1 -# noisy train data -##y_train = data_train.z + torch.randn(size=data_train.z.shape) / 5 +# + +use_noise = False +use_gap = False +# - + +# # Exercise 2 + +# + +##use_noise = True +##use_gap = False +# - + +# # Exercise 3 + +# + +##use_noise = False +##use_gap = True +# - + +# + +if use_noise: + # noisy train data + noise_std = 0.2 + noise_dist = torch.distributions.Normal(loc=0, scale=noise_std) + y_train = data_train.z + noise_dist.sample_n(len(data_train.z)) +else: + # noise-free train data + noise_std = 0 + y_train = data_train.z # - # + # Cut out part of the train data to create out-of-distribution predictions -##mask = (X_train[:, 0] < -5) & (X_train[:, 1] < 0) -##X_train = X_train[~mask, :] -##y_train = y_train[~mask] + +if use_gap: + mask = (X_train[:, 0] > 0) & (X_train[:, 1] < 0) + X_train = X_train[~mask, :] + y_train = y_train[~mask] # - -fig = plt.figure() -ax = fig.add_subplot(projection="3d") -ax.plot_surface( +fig, ax = fig_ax_3d() +s0 = ax.plot_surface( data_pred.XG, data_pred.YG, data_pred.z.reshape((data_pred.nx, data_pred.ny)), color="tab:grey", alpha=0.5, ) -ax.scatter( - xs=X_train[:, 0], ys=X_train[:, 1], zs=y_train, color="tab:blue", alpha=0.5 +s1 = ax.scatter( + xs=X_train[:, 0], + ys=X_train[:, 1], + zs=y_train, + color="tab:blue", + alpha=0.5, ) ax.set_xlabel("X_0") ax.set_ylabel("X_1") # # Define GP model + +# + +class ExactGPModel(gpytorch.models.ExactGP): + """API: + + model.forward() prior f_pred + model() posterior f_pred + + likelihood(model.forward()) prior with noise y_pred + likelihood(model()) posterior with noise y_pred + """ + + def __init__(self, X_train, y_train, likelihood): + super().__init__(X_train, y_train, likelihood) + self.mean_module = gpytorch.means.ConstantMean() + self.covar_module = gpytorch.kernels.ScaleKernel( + gpytorch.kernels.RBFKernel() + ) + + def forward(self, x): + """The prior, defined in terms of the mean and covariance function.""" + mean_x = self.mean_module(x) + covar_x = self.covar_module(x) + return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) + + likelihood = gpytorch.likelihoods.GaussianLikelihood() model = ExactGPModel(X_train, y_train, likelihood) - +# - # Inspect the model print(model) @@ -182,19 +243,15 @@ for ii in range(n_iter): history["loss"].append(loss.item()) # - -# + ncols = len(history) fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 5, 5)) for ax, (p_name, p_lst) in zip(axs, history.items()): ax.plot(p_lst) ax.set_title(p_name) ax.set_xlabel("iterations") -# - -# + # Values of optimized hyper params pprint(extract_model_params(model, raw=False)) -# - # # Run prediction @@ -236,40 +293,84 @@ ncols = 3 fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 7, 5)) vmax = post_pred_y.stddev.max() +cs = [] -c0 = axs[0].contourf( - data_pred.XG, - data_pred.YG, - torch.abs(post_pred_y.mean - data_pred.z).reshape( - (data_pred.nx, data_pred.ny) - ), +cs.append( + axs[0].contourf( + data_pred.XG, + data_pred.YG, + torch.abs(post_pred_y.mean - data_pred.z).reshape( + (data_pred.nx, data_pred.ny) + ), + ) ) axs[0].set_title("|y_pred - y_true|") -c1 = axs[1].contourf( - data_pred.XG, - data_pred.YG, - post_pred_f.stddev.reshape((data_pred.nx, data_pred.ny)), - vmax=vmax, +cs.append( + axs[1].contourf( + data_pred.XG, + data_pred.YG, + post_pred_f.stddev.reshape((data_pred.nx, data_pred.ny)), + vmin=0, + vmax=vmax, + ) ) axs[1].set_title("f_std (epistemic)") -c2 = axs[2].contourf( - data_pred.XG, - data_pred.YG, - post_pred_y.stddev.reshape((data_pred.nx, data_pred.ny)), - vmax=vmax, +cs.append( + axs[2].contourf( + data_pred.XG, + data_pred.YG, + post_pred_y.stddev.reshape((data_pred.nx, data_pred.ny)), + vmin=0, + vmax=vmax, + ) ) axs[2].set_title("y_std (epistemic + aleatoric)") -for ax, c in zip(axs, [c0, c1, c2]): +for ax, c in zip(axs, cs): ax.set_xlabel("X_0") ax.set_ylabel("X_1") ax.scatter(x=X_train[:, 0], y=X_train[:, 1], color="white", alpha=0.2) fig.colorbar(c, ax=ax) +# - +# # Check learned noise + +# + +print((post_pred_y.stddev**2 - post_pred_f.stddev**2).mean().sqrt()) +print( + np.sqrt( + extract_model_params(model, raw=False)["likelihood.noise_covar.noise"] + ) +) +print(noise_std) +# - + +# # Plot confidence bands + +# + +y_mean = post_pred_y.mean.reshape((data_pred.nx, data_pred.ny)) +y_std = post_pred_y.stddev.reshape((data_pred.nx, data_pred.ny)) +upper = y_mean + 2 * y_std +lower = y_mean - 2 * y_std + +fig, ax = fig_ax_3d() +for Z, color in [(upper, "tab:green"), (lower, "tab:red")]: + ax.plot_surface( + data_pred.XG, + data_pred.YG, + Z, + color=color, + alpha=0.5, + ) + +contour_z = lower.min() - 1 +zlim = ax.get_xlim() +ax.set_zlim((contour_z, zlim[1] + abs(contour_z))) +ax.contourf(data_pred.XG, data_pred.YG, y_std, zdir="z", offset=contour_z) +# - # When running as script if not is_interactive(): plt.show() -# - diff --git a/BLcourse2.3/utils.py b/BLcourse2.3/utils.py index e46ecafe5f3ff494ba332cb94e6216eefa2a48ff..fc512d45c86aeb380087bd297aed0b3b29d84d7b 100644 --- a/BLcourse2.3/utils.py +++ b/BLcourse2.3/utils.py @@ -1,4 +1,4 @@ -import gpytorch +from matplotlib import pyplot as plt def extract_model_params(model, raw=False) -> dict: @@ -28,6 +28,12 @@ def extract_model_params(model, raw=False) -> dict: return out +def fig_ax_3d(): + fig = plt.figure() + ax = fig.add_subplot(projection="3d") + return fig, ax + + def plot_samples(ax, X_pred, samples, label=None, **kwds): plot_kwds = dict(color="tab:green", alpha=0.3) plot_kwds.update(kwds) @@ -37,26 +43,3 @@ def plot_samples(ax, X_pred, samples, label=None, **kwds): else: ax.plot(X_pred, samples[0, :], **plot_kwds, label=label) ax.plot(X_pred, samples[1:, :].T, **plot_kwds, label="_") - - -class ExactGPModel(gpytorch.models.ExactGP): - """API: - - model.forward() prior f_pred - model() posterior f_pred - - likelihood(model.forward()) prior with noise y_pred - likelihood(model()) posterior with noise y_pred - """ - - def __init__(self, X_train, y_train, likelihood): - super().__init__(X_train, y_train, likelihood) - self.mean_module = gpytorch.means.ConstantMean() - self.covar_module = gpytorch.kernels.ScaleKernel( - gpytorch.kernels.RBFKernel() - ) - - def forward(self, x): - mean_x = self.mean_module(x) - covar_x = self.covar_module(x) - return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)