Skip to content
Snippets Groups Projects
Commit eb91d868 authored by Steve Schmerler's avatar Steve Schmerler
Browse files

Finalize notebooks, move ExactGPModel back

parent a20f7d92
Branches
No related tags found
1 merge request!1BLcourse2.3Add GP part
...@@ -33,11 +33,11 @@ ...@@ -33,11 +33,11 @@
# # Imports, helpers, setup # # Imports, helpers, setup
# + # ##%matplotlib notebook
# %matplotlib widget # ##%matplotlib widget
# - # %matplotlib inline
# - # +
import math import math
from collections import defaultdict from collections import defaultdict
from pprint import pprint from pprint import pprint
...@@ -47,7 +47,7 @@ import gpytorch ...@@ -47,7 +47,7 @@ import gpytorch
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from matplotlib import is_interactive from matplotlib import is_interactive
from utils import extract_model_params, plot_samples, ExactGPModel from utils import extract_model_params, plot_samples
# Default float32 results in slightly noisy prior samples. Less so with # Default float32 results in slightly noisy prior samples. Less so with
...@@ -121,6 +121,30 @@ plt.scatter(X_train, y_train, marker="o", color="tab:blue") ...@@ -121,6 +121,30 @@ plt.scatter(X_train, y_train, marker="o", color="tab:blue")
# + # +
class ExactGPModel(gpytorch.models.ExactGP):
"""API:
model.forward() prior f_pred
model() posterior f_pred
likelihood(model.forward()) prior with noise y_pred
likelihood(model()) posterior with noise y_pred
"""
def __init__(self, X_train, y_train, likelihood):
super().__init__(X_train, y_train, likelihood)
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.ScaleKernel(
gpytorch.kernels.RBFKernel()
)
def forward(self, x):
"""The prior, defined in terms of the mean and covariance function."""
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
likelihood = gpytorch.likelihoods.GaussianLikelihood() likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = ExactGPModel(X_train, y_train, likelihood) model = ExactGPModel(X_train, y_train, likelihood)
# - # -
...@@ -236,7 +260,6 @@ for ii in range(n_iter): ...@@ -236,7 +260,6 @@ for ii in range(n_iter):
history["loss"].append(loss.item()) history["loss"].append(loss.item())
# - # -
# +
# Plot hyper params and loss (neg. log marginal likelihood) convergence # Plot hyper params and loss (neg. log marginal likelihood) convergence
ncols = len(history) ncols = len(history)
fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 5, 5)) fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 5, 5))
...@@ -244,12 +267,9 @@ for ax, (p_name, p_lst) in zip(axs, history.items()): ...@@ -244,12 +267,9 @@ for ax, (p_name, p_lst) in zip(axs, history.items()):
ax.plot(p_lst) ax.plot(p_lst)
ax.set_title(p_name) ax.set_title(p_name)
ax.set_xlabel("iterations") ax.set_xlabel("iterations")
# -
# +
# Values of optimized hyper params # Values of optimized hyper params
pprint(extract_model_params(model, raw=False)) pprint(extract_model_params(model, raw=False))
# -
# # Run prediction # # Run prediction
# #
...@@ -337,3 +357,4 @@ with torch.no_grad(): ...@@ -337,3 +357,4 @@ with torch.no_grad():
# When running as script # When running as script
if not is_interactive(): if not is_interactive():
plt.show() plt.show()
# -
...@@ -14,7 +14,9 @@ ...@@ -14,7 +14,9 @@
# --- # ---
# + # +
# %matplotlib widget # %matplotlib notebook
# ##%matplotlib widget
# ##%matplotlib inline
# - # -
# + # +
...@@ -26,33 +28,32 @@ import gpytorch ...@@ -26,33 +28,32 @@ import gpytorch
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from matplotlib import is_interactive from matplotlib import is_interactive
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
from utils import extract_model_params, plot_samples, ExactGPModel from utils import extract_model_params, plot_samples, fig_ax_3d
# -
# +
torch.set_default_dtype(torch.float64) torch.set_default_dtype(torch.float64)
torch.manual_seed(123) torch.manual_seed(123)
# -
# # Generate toy 2D data # # Generate toy 2D data
# +
class MexicanHat: class MexicanHat:
def __init__(self, xlim, ylim, nx, ny, mode, **kwds): def __init__(self, xlim, ylim, nx, ny, mode, **kwds):
self.xlim = xlim self.xlim = xlim
self.ylim = ylim self.ylim = ylim
self.nx = nx self.nx = nx
self.ny = ny self.ny = ny
self.xg, self.yg = self.get_xy_grid() self.xg, self.yg = self._get_xy_grid()
self.XG, self.YG = torch.meshgrid(self.xg, self.yg, indexing="ij") self.XG, self.YG = self._get_meshgrids(self.xg, self.yg)
self.X = self.make_X(mode) self.X = self._make_X(mode)
self.z = self.func(self.X) self.z = self.func(self.X)
def make_X(self, mode="grid"): def _make_X(self, mode="grid"):
if mode == "grid": if mode == "grid":
X = torch.empty((self.nx * self.ny, 2)) X = torch.empty((self.nx * self.ny, 2))
X[:, 0] = self.XG.flatten() X[:, 0] = self.XG.flatten()
...@@ -63,15 +64,19 @@ class MexicanHat: ...@@ -63,15 +64,19 @@ class MexicanHat:
X[:, 1] = X[:, 1] * (self.ylim[1] - self.ylim[0]) + self.ylim[0] X[:, 1] = X[:, 1] * (self.ylim[1] - self.ylim[0]) + self.ylim[0]
return X return X
def get_xy_grid(self): def _get_xy_grid(self):
x = torch.linspace(self.xlim[0], self.xlim[1], self.nx) x = torch.linspace(self.xlim[0], self.xlim[1], self.nx)
y = torch.linspace(self.ylim[0], self.ylim[1], self.ny) y = torch.linspace(self.ylim[0], self.ylim[1], self.ny)
return x, y return x, y
@staticmethod
def _get_meshgrids(xg, yg):
return torch.meshgrid(xg, yg, indexing="ij")
@staticmethod @staticmethod
def func(X): def func(X):
r = torch.sqrt((X**2).sum(axis=1)) r = torch.sqrt((X**2).sum(axis=1))
return torch.sin(r) / r * 10 return torch.sin(r) / r
@staticmethod @staticmethod
def n2t(x): def n2t(x):
...@@ -79,25 +84,23 @@ class MexicanHat: ...@@ -79,25 +84,23 @@ class MexicanHat:
def apply_scalers(self, x_scaler, y_scaler): def apply_scalers(self, x_scaler, y_scaler):
self.X = self.n2t(x_scaler.transform(self.X)) self.X = self.n2t(x_scaler.transform(self.X))
Xg = x_scaler.transform(torch.stack((self.xg, self.yg), dim=1)) Xtmp = x_scaler.transform(torch.stack((self.xg, self.yg), dim=1))
self.xg = self.n2t(Xg[:, 0]) self.XG, self.YG = self._get_meshgrids(
self.yg = self.n2t(Xg[:, 1]) self.n2t(Xtmp[:, 0]), self.n2t(Xtmp[:, 1])
self.XG, self.YG = torch.meshgrid(self.xg, self.yg, indexing="ij") )
self.z = self.n2t(y_scaler.transform(self.z[:, None])[:, 0]) self.z = self.n2t(y_scaler.transform(self.z[:, None])[:, 0])
# -
# + # +
data_train = MexicanHat( data_train = MexicanHat(
xlim=[-15, 5], ylim=[-15, 25], nx=20, ny=20, mode="rand" xlim=[-15, 25], ylim=[-15, 5], nx=20, ny=20, mode="rand"
) )
x_scaler = StandardScaler().fit(data_train.X) x_scaler = StandardScaler().fit(data_train.X)
y_scaler = StandardScaler().fit(data_train.z[:, None]) y_scaler = StandardScaler().fit(data_train.z[:, None])
data_train.apply_scalers(x_scaler, y_scaler) data_train.apply_scalers(x_scaler, y_scaler)
data_pred = MexicanHat( data_pred = MexicanHat(
xlim=[-15, 5], ylim=[-15, 25], nx=100, ny=100, mode="grid" xlim=[-15, 25], ylim=[-15, 5], nx=100, ny=100, mode="grid"
) )
data_pred.apply_scalers(x_scaler, y_scaler) data_pred.apply_scalers(x_scaler, y_scaler)
...@@ -106,41 +109,99 @@ X_train = data_train.X ...@@ -106,41 +109,99 @@ X_train = data_train.X
# inputs for prediction and plotting # inputs for prediction and plotting
X_pred = data_pred.X X_pred = data_pred.X
# -
# noise-free train data # # Exercise 1
y_train = data_train.z
# +
use_noise = False
use_gap = False
# -
# # Exercise 2
# +
##use_noise = True
##use_gap = False
# -
# # Exercise 3
# +
##use_noise = False
##use_gap = True
# -
# +
if use_noise:
# noisy train data # noisy train data
##y_train = data_train.z + torch.randn(size=data_train.z.shape) / 5 noise_std = 0.2
noise_dist = torch.distributions.Normal(loc=0, scale=noise_std)
y_train = data_train.z + noise_dist.sample_n(len(data_train.z))
else:
# noise-free train data
noise_std = 0
y_train = data_train.z
# - # -
# + # +
# Cut out part of the train data to create out-of-distribution predictions # Cut out part of the train data to create out-of-distribution predictions
##mask = (X_train[:, 0] < -5) & (X_train[:, 1] < 0)
##X_train = X_train[~mask, :] if use_gap:
##y_train = y_train[~mask] mask = (X_train[:, 0] > 0) & (X_train[:, 1] < 0)
X_train = X_train[~mask, :]
y_train = y_train[~mask]
# - # -
fig = plt.figure() fig, ax = fig_ax_3d()
ax = fig.add_subplot(projection="3d") s0 = ax.plot_surface(
ax.plot_surface(
data_pred.XG, data_pred.XG,
data_pred.YG, data_pred.YG,
data_pred.z.reshape((data_pred.nx, data_pred.ny)), data_pred.z.reshape((data_pred.nx, data_pred.ny)),
color="tab:grey", color="tab:grey",
alpha=0.5, alpha=0.5,
) )
ax.scatter( s1 = ax.scatter(
xs=X_train[:, 0], ys=X_train[:, 1], zs=y_train, color="tab:blue", alpha=0.5 xs=X_train[:, 0],
ys=X_train[:, 1],
zs=y_train,
color="tab:blue",
alpha=0.5,
) )
ax.set_xlabel("X_0") ax.set_xlabel("X_0")
ax.set_ylabel("X_1") ax.set_ylabel("X_1")
# # Define GP model # # Define GP model
# +
class ExactGPModel(gpytorch.models.ExactGP):
"""API:
model.forward() prior f_pred
model() posterior f_pred
likelihood(model.forward()) prior with noise y_pred
likelihood(model()) posterior with noise y_pred
"""
def __init__(self, X_train, y_train, likelihood):
super().__init__(X_train, y_train, likelihood)
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.ScaleKernel(
gpytorch.kernels.RBFKernel()
)
def forward(self, x):
"""The prior, defined in terms of the mean and covariance function."""
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
likelihood = gpytorch.likelihoods.GaussianLikelihood() likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = ExactGPModel(X_train, y_train, likelihood) model = ExactGPModel(X_train, y_train, likelihood)
# -
# Inspect the model # Inspect the model
print(model) print(model)
...@@ -182,19 +243,15 @@ for ii in range(n_iter): ...@@ -182,19 +243,15 @@ for ii in range(n_iter):
history["loss"].append(loss.item()) history["loss"].append(loss.item())
# - # -
# +
ncols = len(history) ncols = len(history)
fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 5, 5)) fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 5, 5))
for ax, (p_name, p_lst) in zip(axs, history.items()): for ax, (p_name, p_lst) in zip(axs, history.items()):
ax.plot(p_lst) ax.plot(p_lst)
ax.set_title(p_name) ax.set_title(p_name)
ax.set_xlabel("iterations") ax.set_xlabel("iterations")
# -
# +
# Values of optimized hyper params # Values of optimized hyper params
pprint(extract_model_params(model, raw=False)) pprint(extract_model_params(model, raw=False))
# -
# # Run prediction # # Run prediction
...@@ -236,40 +293,84 @@ ncols = 3 ...@@ -236,40 +293,84 @@ ncols = 3
fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 7, 5)) fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 7, 5))
vmax = post_pred_y.stddev.max() vmax = post_pred_y.stddev.max()
cs = []
c0 = axs[0].contourf( cs.append(
axs[0].contourf(
data_pred.XG, data_pred.XG,
data_pred.YG, data_pred.YG,
torch.abs(post_pred_y.mean - data_pred.z).reshape( torch.abs(post_pred_y.mean - data_pred.z).reshape(
(data_pred.nx, data_pred.ny) (data_pred.nx, data_pred.ny)
), ),
) )
)
axs[0].set_title("|y_pred - y_true|") axs[0].set_title("|y_pred - y_true|")
c1 = axs[1].contourf( cs.append(
axs[1].contourf(
data_pred.XG, data_pred.XG,
data_pred.YG, data_pred.YG,
post_pred_f.stddev.reshape((data_pred.nx, data_pred.ny)), post_pred_f.stddev.reshape((data_pred.nx, data_pred.ny)),
vmin=0,
vmax=vmax, vmax=vmax,
) )
)
axs[1].set_title("f_std (epistemic)") axs[1].set_title("f_std (epistemic)")
c2 = axs[2].contourf( cs.append(
axs[2].contourf(
data_pred.XG, data_pred.XG,
data_pred.YG, data_pred.YG,
post_pred_y.stddev.reshape((data_pred.nx, data_pred.ny)), post_pred_y.stddev.reshape((data_pred.nx, data_pred.ny)),
vmin=0,
vmax=vmax, vmax=vmax,
) )
)
axs[2].set_title("y_std (epistemic + aleatoric)") axs[2].set_title("y_std (epistemic + aleatoric)")
for ax, c in zip(axs, [c0, c1, c2]): for ax, c in zip(axs, cs):
ax.set_xlabel("X_0") ax.set_xlabel("X_0")
ax.set_ylabel("X_1") ax.set_ylabel("X_1")
ax.scatter(x=X_train[:, 0], y=X_train[:, 1], color="white", alpha=0.2) ax.scatter(x=X_train[:, 0], y=X_train[:, 1], color="white", alpha=0.2)
fig.colorbar(c, ax=ax) fig.colorbar(c, ax=ax)
# -
# # Check learned noise
# +
print((post_pred_y.stddev**2 - post_pred_f.stddev**2).mean().sqrt())
print(
np.sqrt(
extract_model_params(model, raw=False)["likelihood.noise_covar.noise"]
)
)
print(noise_std)
# -
# # Plot confidence bands
# +
y_mean = post_pred_y.mean.reshape((data_pred.nx, data_pred.ny))
y_std = post_pred_y.stddev.reshape((data_pred.nx, data_pred.ny))
upper = y_mean + 2 * y_std
lower = y_mean - 2 * y_std
fig, ax = fig_ax_3d()
for Z, color in [(upper, "tab:green"), (lower, "tab:red")]:
ax.plot_surface(
data_pred.XG,
data_pred.YG,
Z,
color=color,
alpha=0.5,
)
contour_z = lower.min() - 1
zlim = ax.get_xlim()
ax.set_zlim((contour_z, zlim[1] + abs(contour_z)))
ax.contourf(data_pred.XG, data_pred.YG, y_std, zdir="z", offset=contour_z)
# -
# When running as script # When running as script
if not is_interactive(): if not is_interactive():
plt.show() plt.show()
# -
import gpytorch from matplotlib import pyplot as plt
def extract_model_params(model, raw=False) -> dict: def extract_model_params(model, raw=False) -> dict:
...@@ -28,6 +28,12 @@ def extract_model_params(model, raw=False) -> dict: ...@@ -28,6 +28,12 @@ def extract_model_params(model, raw=False) -> dict:
return out return out
def fig_ax_3d():
fig = plt.figure()
ax = fig.add_subplot(projection="3d")
return fig, ax
def plot_samples(ax, X_pred, samples, label=None, **kwds): def plot_samples(ax, X_pred, samples, label=None, **kwds):
plot_kwds = dict(color="tab:green", alpha=0.3) plot_kwds = dict(color="tab:green", alpha=0.3)
plot_kwds.update(kwds) plot_kwds.update(kwds)
...@@ -37,26 +43,3 @@ def plot_samples(ax, X_pred, samples, label=None, **kwds): ...@@ -37,26 +43,3 @@ def plot_samples(ax, X_pred, samples, label=None, **kwds):
else: else:
ax.plot(X_pred, samples[0, :], **plot_kwds, label=label) ax.plot(X_pred, samples[0, :], **plot_kwds, label=label)
ax.plot(X_pred, samples[1:, :].T, **plot_kwds, label="_") ax.plot(X_pred, samples[1:, :].T, **plot_kwds, label="_")
class ExactGPModel(gpytorch.models.ExactGP):
"""API:
model.forward() prior f_pred
model() posterior f_pred
likelihood(model.forward()) prior with noise y_pred
likelihood(model()) posterior with noise y_pred
"""
def __init__(self, X_train, y_train, likelihood):
super().__init__(X_train, y_train, likelihood)
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.ScaleKernel(
gpytorch.kernels.RBFKernel()
)
def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment