diff --git a/BLcourse2.3/01_one_dim.py b/BLcourse2.3/01_one_dim.py
index 2d0a90394fe5aca797dae5843176f401c3b3418b..536ef47b43120e33e4064edc62fb5b0110dba868 100644
--- a/BLcourse2.3/01_one_dim.py
+++ b/BLcourse2.3/01_one_dim.py
@@ -33,11 +33,11 @@
 
 # # Imports, helpers, setup
 
-# +
-# %matplotlib widget
-# -
+# ##%matplotlib notebook
+# ##%matplotlib widget
+# %matplotlib inline
 
-# -
+# +
 import math
 from collections import defaultdict
 from pprint import pprint
@@ -47,7 +47,7 @@ import gpytorch
 from matplotlib import pyplot as plt
 from matplotlib import is_interactive
 
-from utils import extract_model_params, plot_samples, ExactGPModel
+from utils import extract_model_params, plot_samples
 
 
 # Default float32 results in slightly noisy prior samples. Less so with
@@ -121,6 +121,30 @@ plt.scatter(X_train, y_train, marker="o", color="tab:blue")
 
 
 # +
+class ExactGPModel(gpytorch.models.ExactGP):
+    """API:
+
+    model.forward()             prior                   f_pred
+    model()                     posterior               f_pred
+
+    likelihood(model.forward()) prior with noise        y_pred
+    likelihood(model())         posterior with noise    y_pred
+    """
+
+    def __init__(self, X_train, y_train, likelihood):
+        super().__init__(X_train, y_train, likelihood)
+        self.mean_module = gpytorch.means.ConstantMean()
+        self.covar_module = gpytorch.kernels.ScaleKernel(
+            gpytorch.kernels.RBFKernel()
+        )
+
+    def forward(self, x):
+        """The prior, defined in terms of the mean and covariance function."""
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+
+
 likelihood = gpytorch.likelihoods.GaussianLikelihood()
 model = ExactGPModel(X_train, y_train, likelihood)
 # -
@@ -236,7 +260,6 @@ for ii in range(n_iter):
     history["loss"].append(loss.item())
 # -
 
-# +
 # Plot hyper params and loss (neg. log marginal likelihood) convergence
 ncols = len(history)
 fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 5, 5))
@@ -244,12 +267,9 @@ for ax, (p_name, p_lst) in zip(axs, history.items()):
     ax.plot(p_lst)
     ax.set_title(p_name)
     ax.set_xlabel("iterations")
-# -
 
-# +
 # Values of optimized hyper params
 pprint(extract_model_params(model, raw=False))
-# -
 
 # # Run prediction
 #
@@ -337,3 +357,4 @@ with torch.no_grad():
 # When running as script
 if not is_interactive():
     plt.show()
+# -
diff --git a/BLcourse2.3/02_two_dim.py b/BLcourse2.3/02_two_dim.py
index 0e7333177178ed8e9380b0c0e1567f6965e1c343..fbe9a7e7d118a222f8abc63f5083426e5331eeb7 100644
--- a/BLcourse2.3/02_two_dim.py
+++ b/BLcourse2.3/02_two_dim.py
@@ -14,7 +14,9 @@
 # ---
 
 # +
-# %matplotlib widget
+# %matplotlib notebook
+# ##%matplotlib widget
+# ##%matplotlib inline
 # -
 
 # +
@@ -26,33 +28,32 @@ import gpytorch
 from matplotlib import pyplot as plt
 from matplotlib import is_interactive
 from mpl_toolkits.mplot3d import Axes3D
+import numpy as np
 
 from sklearn.preprocessing import StandardScaler
 
-from utils import extract_model_params, plot_samples, ExactGPModel
+from utils import extract_model_params, plot_samples, fig_ax_3d
+# -
 
 
-# +
 torch.set_default_dtype(torch.float64)
 torch.manual_seed(123)
-# -
 
 # # Generate toy 2D data
 
 
-# +
 class MexicanHat:
     def __init__(self, xlim, ylim, nx, ny, mode, **kwds):
         self.xlim = xlim
         self.ylim = ylim
         self.nx = nx
         self.ny = ny
-        self.xg, self.yg = self.get_xy_grid()
-        self.XG, self.YG = torch.meshgrid(self.xg, self.yg, indexing="ij")
-        self.X = self.make_X(mode)
+        self.xg, self.yg = self._get_xy_grid()
+        self.XG, self.YG = self._get_meshgrids(self.xg, self.yg)
+        self.X = self._make_X(mode)
         self.z = self.func(self.X)
 
-    def make_X(self, mode="grid"):
+    def _make_X(self, mode="grid"):
         if mode == "grid":
             X = torch.empty((self.nx * self.ny, 2))
             X[:, 0] = self.XG.flatten()
@@ -63,15 +64,19 @@ class MexicanHat:
             X[:, 1] = X[:, 1] * (self.ylim[1] - self.ylim[0]) + self.ylim[0]
         return X
 
-    def get_xy_grid(self):
+    def _get_xy_grid(self):
         x = torch.linspace(self.xlim[0], self.xlim[1], self.nx)
         y = torch.linspace(self.ylim[0], self.ylim[1], self.ny)
         return x, y
 
+    @staticmethod
+    def _get_meshgrids(xg, yg):
+        return torch.meshgrid(xg, yg, indexing="ij")
+
     @staticmethod
     def func(X):
         r = torch.sqrt((X**2).sum(axis=1))
-        return torch.sin(r) / r * 10
+        return torch.sin(r) / r
 
     @staticmethod
     def n2t(x):
@@ -79,25 +84,23 @@ class MexicanHat:
 
     def apply_scalers(self, x_scaler, y_scaler):
         self.X = self.n2t(x_scaler.transform(self.X))
-        Xg = x_scaler.transform(torch.stack((self.xg, self.yg), dim=1))
-        self.xg = self.n2t(Xg[:, 0])
-        self.yg = self.n2t(Xg[:, 1])
-        self.XG, self.YG = torch.meshgrid(self.xg, self.yg, indexing="ij")
+        Xtmp = x_scaler.transform(torch.stack((self.xg, self.yg), dim=1))
+        self.XG, self.YG = self._get_meshgrids(
+            self.n2t(Xtmp[:, 0]), self.n2t(Xtmp[:, 1])
+        )
         self.z = self.n2t(y_scaler.transform(self.z[:, None])[:, 0])
 
 
-# -
-
 # +
 data_train = MexicanHat(
-    xlim=[-15, 5], ylim=[-15, 25], nx=20, ny=20, mode="rand"
+    xlim=[-15, 25], ylim=[-15, 5], nx=20, ny=20, mode="rand"
 )
 x_scaler = StandardScaler().fit(data_train.X)
 y_scaler = StandardScaler().fit(data_train.z[:, None])
 data_train.apply_scalers(x_scaler, y_scaler)
 
 data_pred = MexicanHat(
-    xlim=[-15, 5], ylim=[-15, 25], nx=100, ny=100, mode="grid"
+    xlim=[-15, 25], ylim=[-15, 5], nx=100, ny=100, mode="grid"
 )
 data_pred.apply_scalers(x_scaler, y_scaler)
 
@@ -106,41 +109,99 @@ X_train = data_train.X
 
 # inputs for prediction and plotting
 X_pred = data_pred.X
+# -
 
-# noise-free train data
-y_train = data_train.z
+# # Exercise 1
 
-# noisy train data
-##y_train = data_train.z + torch.randn(size=data_train.z.shape) / 5
+# +
+use_noise = False
+use_gap = False
+# -
+
+# # Exercise 2
+
+# +
+##use_noise = True
+##use_gap = False
+# -
+
+# # Exercise 3
+
+# +
+##use_noise = False
+##use_gap = True
+# -
+
+# +
+if use_noise:
+    # noisy train data
+    noise_std = 0.2
+    noise_dist = torch.distributions.Normal(loc=0, scale=noise_std)
+    y_train = data_train.z + noise_dist.sample_n(len(data_train.z))
+else:
+    # noise-free train data
+    noise_std = 0
+    y_train = data_train.z
 # -
 
 # +
 # Cut out part of the train data to create out-of-distribution predictions
-##mask = (X_train[:, 0] < -5) & (X_train[:, 1] < 0)
-##X_train = X_train[~mask, :]
-##y_train = y_train[~mask]
+
+if use_gap:
+    mask = (X_train[:, 0] > 0) & (X_train[:, 1] < 0)
+    X_train = X_train[~mask, :]
+    y_train = y_train[~mask]
 # -
 
-fig = plt.figure()
-ax = fig.add_subplot(projection="3d")
-ax.plot_surface(
+fig, ax = fig_ax_3d()
+s0 = ax.plot_surface(
     data_pred.XG,
     data_pred.YG,
     data_pred.z.reshape((data_pred.nx, data_pred.ny)),
     color="tab:grey",
     alpha=0.5,
 )
-ax.scatter(
-    xs=X_train[:, 0], ys=X_train[:, 1], zs=y_train, color="tab:blue", alpha=0.5
+s1 = ax.scatter(
+    xs=X_train[:, 0],
+    ys=X_train[:, 1],
+    zs=y_train,
+    color="tab:blue",
+    alpha=0.5,
 )
 ax.set_xlabel("X_0")
 ax.set_ylabel("X_1")
 
 # # Define GP model
 
+
+# +
+class ExactGPModel(gpytorch.models.ExactGP):
+    """API:
+
+    model.forward()             prior                   f_pred
+    model()                     posterior               f_pred
+
+    likelihood(model.forward()) prior with noise        y_pred
+    likelihood(model())         posterior with noise    y_pred
+    """
+
+    def __init__(self, X_train, y_train, likelihood):
+        super().__init__(X_train, y_train, likelihood)
+        self.mean_module = gpytorch.means.ConstantMean()
+        self.covar_module = gpytorch.kernels.ScaleKernel(
+            gpytorch.kernels.RBFKernel()
+        )
+
+    def forward(self, x):
+        """The prior, defined in terms of the mean and covariance function."""
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+
+
 likelihood = gpytorch.likelihoods.GaussianLikelihood()
 model = ExactGPModel(X_train, y_train, likelihood)
-
+# -
 
 # Inspect the model
 print(model)
@@ -182,19 +243,15 @@ for ii in range(n_iter):
     history["loss"].append(loss.item())
 # -
 
-# +
 ncols = len(history)
 fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 5, 5))
 for ax, (p_name, p_lst) in zip(axs, history.items()):
     ax.plot(p_lst)
     ax.set_title(p_name)
     ax.set_xlabel("iterations")
-# -
 
-# +
 # Values of optimized hyper params
 pprint(extract_model_params(model, raw=False))
-# -
 
 # # Run prediction
 
@@ -236,40 +293,84 @@ ncols = 3
 fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 7, 5))
 
 vmax = post_pred_y.stddev.max()
+cs = []
 
-c0 = axs[0].contourf(
-    data_pred.XG,
-    data_pred.YG,
-    torch.abs(post_pred_y.mean - data_pred.z).reshape(
-        (data_pred.nx, data_pred.ny)
-    ),
+cs.append(
+    axs[0].contourf(
+        data_pred.XG,
+        data_pred.YG,
+        torch.abs(post_pred_y.mean - data_pred.z).reshape(
+            (data_pred.nx, data_pred.ny)
+        ),
+    )
 )
 axs[0].set_title("|y_pred - y_true|")
 
-c1 = axs[1].contourf(
-    data_pred.XG,
-    data_pred.YG,
-    post_pred_f.stddev.reshape((data_pred.nx, data_pred.ny)),
-    vmax=vmax,
+cs.append(
+    axs[1].contourf(
+        data_pred.XG,
+        data_pred.YG,
+        post_pred_f.stddev.reshape((data_pred.nx, data_pred.ny)),
+        vmin=0,
+        vmax=vmax,
+    )
 )
 axs[1].set_title("f_std (epistemic)")
 
-c2 = axs[2].contourf(
-    data_pred.XG,
-    data_pred.YG,
-    post_pred_y.stddev.reshape((data_pred.nx, data_pred.ny)),
-    vmax=vmax,
+cs.append(
+    axs[2].contourf(
+        data_pred.XG,
+        data_pred.YG,
+        post_pred_y.stddev.reshape((data_pred.nx, data_pred.ny)),
+        vmin=0,
+        vmax=vmax,
+    )
 )
 axs[2].set_title("y_std (epistemic + aleatoric)")
 
-for ax, c in zip(axs, [c0, c1, c2]):
+for ax, c in zip(axs, cs):
     ax.set_xlabel("X_0")
     ax.set_ylabel("X_1")
     ax.scatter(x=X_train[:, 0], y=X_train[:, 1], color="white", alpha=0.2)
     fig.colorbar(c, ax=ax)
+# -
 
+# # Check learned noise
+
+# +
+print((post_pred_y.stddev**2 - post_pred_f.stddev**2).mean().sqrt())
+print(
+    np.sqrt(
+        extract_model_params(model, raw=False)["likelihood.noise_covar.noise"]
+    )
+)
+print(noise_std)
+# -
+
+# # Plot confidence bands
+
+# +
+y_mean = post_pred_y.mean.reshape((data_pred.nx, data_pred.ny))
+y_std = post_pred_y.stddev.reshape((data_pred.nx, data_pred.ny))
+upper = y_mean + 2 * y_std
+lower = y_mean - 2 * y_std
+
+fig, ax = fig_ax_3d()
+for Z, color in [(upper, "tab:green"), (lower, "tab:red")]:
+    ax.plot_surface(
+        data_pred.XG,
+        data_pred.YG,
+        Z,
+        color=color,
+        alpha=0.5,
+    )
+
+contour_z = lower.min() - 1
+zlim = ax.get_xlim()
+ax.set_zlim((contour_z, zlim[1] + abs(contour_z)))
+ax.contourf(data_pred.XG, data_pred.YG, y_std, zdir="z", offset=contour_z)
+# -
 
 # When running as script
 if not is_interactive():
     plt.show()
-# -
diff --git a/BLcourse2.3/utils.py b/BLcourse2.3/utils.py
index e46ecafe5f3ff494ba332cb94e6216eefa2a48ff..fc512d45c86aeb380087bd297aed0b3b29d84d7b 100644
--- a/BLcourse2.3/utils.py
+++ b/BLcourse2.3/utils.py
@@ -1,4 +1,4 @@
-import gpytorch
+from matplotlib import pyplot as plt
 
 
 def extract_model_params(model, raw=False) -> dict:
@@ -28,6 +28,12 @@ def extract_model_params(model, raw=False) -> dict:
         return out
 
 
+def fig_ax_3d():
+    fig = plt.figure()
+    ax = fig.add_subplot(projection="3d")
+    return fig, ax
+
+
 def plot_samples(ax, X_pred, samples, label=None, **kwds):
     plot_kwds = dict(color="tab:green", alpha=0.3)
     plot_kwds.update(kwds)
@@ -37,26 +43,3 @@ def plot_samples(ax, X_pred, samples, label=None, **kwds):
     else:
         ax.plot(X_pred, samples[0, :], **plot_kwds, label=label)
         ax.plot(X_pred, samples[1:, :].T, **plot_kwds, label="_")
-
-
-class ExactGPModel(gpytorch.models.ExactGP):
-    """API:
-
-    model.forward()             prior                   f_pred
-    model()                     posterior               f_pred
-
-    likelihood(model.forward()) prior with noise        y_pred
-    likelihood(model())         posterior with noise    y_pred
-    """
-
-    def __init__(self, X_train, y_train, likelihood):
-        super().__init__(X_train, y_train, likelihood)
-        self.mean_module = gpytorch.means.ConstantMean()
-        self.covar_module = gpytorch.kernels.ScaleKernel(
-            gpytorch.kernels.RBFKernel()
-        )
-
-    def forward(self, x):
-        mean_x = self.mean_module(x)
-        covar_x = self.covar_module(x)
-        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)