diff --git a/BLcourse2.3/01_one_dim.py b/BLcourse2.3/01_one_dim.py
index 4566f23b857aecd3fcb5707e104f7d3cf379c40b..39150d5179d539280a5dddaa5cb1731a3860c376 100644
--- a/BLcourse2.3/01_one_dim.py
+++ b/BLcourse2.3/01_one_dim.py
@@ -46,6 +46,7 @@ import torch
 import gpytorch
 from matplotlib import pyplot as plt
 from matplotlib import is_interactive
+import numpy as np
 
 from utils import extract_model_params, plot_samples
 
@@ -81,8 +82,11 @@ def ground_truth(x, const):
     return torch.sin(x) * torch.exp(-0.2 * x) + const
 
 
-def generate_data(x, gaps=[[1, 3]], const=5):
-    y = ground_truth(x, const=const) + torch.randn(x.shape) * 0.1
+def generate_data(x, gaps=[[1, 3]], const=None, noise_std=None):
+    noise_dist = torch.distributions.Normal(loc=0, scale=noise_std)
+    y = ground_truth(x, const=const) + noise_dist.sample(
+        sample_shape=(len(x),)
+    )
     msk = torch.tensor([True] * len(x))
     if gaps is not None:
         for g in gaps:
@@ -91,8 +95,11 @@ def generate_data(x, gaps=[[1, 3]], const=5):
 
 
 const = 5.0
+noise_std = 0.1
 x = torch.linspace(0, 4 * math.pi, 100)
-X_train, y_train, y_gt_train = generate_data(x, gaps=[[6, 10]], const=const)
+X_train, y_train, y_gt_train = generate_data(
+    x, gaps=[[6, 10]], const=const, noise_std=noise_std
+)
 X_pred = torch.linspace(
     X_train[0] - 2, X_train[-1] + 2, 200, requires_grad=False
 )
@@ -121,7 +128,7 @@ ax.legend()
 # `ScaleKernel`.
 #
 # In addition, we define a constant mean via `ConstantMean`. Finally we have
-# the likelihood noise $\sigma_n^2$. So in total we have 4 hyper params.
+# the likelihood variance $\sigma_n^2$. So in total we have 4 hyper params.
 #
 # * $\ell$ = `model.covar_module.base_kernel.lengthscale`
 # * $\sigma_n^2$ = `model.likelihood.noise_covar.noise`
@@ -359,12 +366,6 @@ pprint(extract_model_params(model, raw=False))
 # I_N)}$) predictions with
 #
 # $$\ma\Sigma = \testtest{\ma K} - \test{\ma K}\,(\ma K+\sigma_n^2\,\ma I)^{-1}\,\test{\ma K}^\top$$
-#
-# We find that $\ma\Sigma$ reflects behavior we would like to see from
-# epistemic uncertainty -- it is high when we have no data
-# (out-of-distribution). But this alone isn't the whole story. We need to add
-# the estimated noise level $\sigma_n^2$ in order for the confidence band to
-# cover the data.
 
 # +
 # Evaluation (predictive posterior) mode
@@ -444,6 +445,38 @@ with torch.no_grad():
     ax_sigmas.legend()
 # -
 
+# We find that $\ma\Sigma$ reflects behavior we would like to see from
+# epistemic uncertainty -- it is high when we have no data
+# (out-of-distribution). But this alone isn't the whole story. We need to add
+# the estimated likelihood variance $\sigma_n^2$ in order for the confidence
+# band to cover the data -- this turns the estimated total uncertainty into a
+# *calibrated* uncertainty.
+
+# # Let's check the learned noise
+#
+# We compare the target data noise (`noise_std`) to the learned GP noise, in
+# the form of the likelihood *standard deviation* $\sigma_n$. The latter is
+# equal to the $\sqrt{\cdot}$ of `likelihood.noise_covar.noise` and can also be
+# calculated via $\sqrt{\mathrm{diag}(\ma\Sigma + \sigma_n^2\,\ma I_N) -
+# \mathrm{diag}(\ma\Sigma)}$.
+
+# +
+# Target noise to learn
+print("data noise:", noise_std)
+
+# The two below must be the same
+print(
+    "learned noise:",
+    (post_pred_y.stddev**2 - post_pred_f.stddev**2).mean().sqrt().item(),
+)
+print(
+    "learned noise:",
+    np.sqrt(
+        extract_model_params(model, raw=False)["likelihood.noise_covar.noise"]
+    ),
+)
+# -
+
 # When running as script
 if not is_interactive():
     plt.show()