diff --git a/BLcourse2.3/02_two_dim.py b/BLcourse2.3/02_two_dim.py
index c5ab47b03f62733c7fee699f1bc352c5b73f7aa4..f808f8698ae338995aa2cc1a982a816202ebac9a 100644
--- a/BLcourse2.3/02_two_dim.py
+++ b/BLcourse2.3/02_two_dim.py
@@ -275,11 +275,14 @@ for ii in range(n_iter):
 # -
 
 ncols = len(history)
-fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 5, 5))
-for ax, (p_name, p_lst) in zip(axs, history.items()):
-    ax.plot(p_lst)
-    ax.set_title(p_name)
-    ax.set_xlabel("iterations")
+fig, axs = plt.subplots(
+    ncols=ncols, nrows=1, figsize=(ncols * 3, 3), layout="compressed"
+)
+with torch.no_grad():
+    for ax, (p_name, p_lst) in zip(axs, history.items()):
+        ax.plot(p_lst)
+        ax.set_title(p_name)
+        ax.set_xlabel("iterations")
 
 # Values of optimized hyper params
 pprint(extract_model_params(model))
@@ -328,7 +331,9 @@ assert (post_pred_f.mean == post_pred_y.mean).all()
 
 # +
 ncols = 4
-fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 7, 5))
+fig, axs = plt.subplots(
+    ncols=ncols, nrows=1, figsize=(ncols * 5, 4), layout="compressed"
+)
 
 vmax = post_pred_y.stddev.max()
 cs = []
diff --git a/BLcourse2.3/03_one_dim_SVI.py b/BLcourse2.3/03_one_dim_SVI.py
index 8efb75d312396a0776e79ff4d35b07f8a81865c1..e03f19cc224281bf130171dc4912c009b2f1b59f 100644
--- a/BLcourse2.3/03_one_dim_SVI.py
+++ b/BLcourse2.3/03_one_dim_SVI.py
@@ -94,10 +94,10 @@ print(f"{X_train.shape=}")
 print(f"{y_train.shape=}")
 print(f"{X_pred.shape=}")
 
-##fig, ax = plt.subplots()
-##ax.scatter(X_train, y_train, marker="o", color="tab:blue", label="noisy data")
-##ax.plot(X_pred, y_gt_pred, ls="--", color="k", label="ground truth")
-##ax.legend()
+fig, ax = plt.subplots()
+ax.scatter(X_train, y_train, marker="o", color="tab:blue", label="noisy data")
+ax.plot(X_pred, y_gt_pred, ls="--", color="k", label="ground truth")
+ax.legend()
 # -
 
 # # Define GP model
@@ -247,9 +247,11 @@ for i_iter in range(n_iter):
         print(f"iter {i_iter + 1}/{n_iter}, {loss=:.3f}")
 # -
 
-# Plot hyper params and loss (negative log marginal likelihood) convergence
+# Plot scalar hyper params and loss (ELBO) convergence
 ncols = len(history)
-fig, axs = plt.subplots(ncols=ncols, nrows=1, figsize=(ncols * 5, 5))
+fig, axs = plt.subplots(
+    ncols=ncols, nrows=1, figsize=(ncols * 3, 3), layout="compressed"
+)
 with torch.no_grad():
     for ax, (p_name, p_lst) in zip(axs, history.items()):
         ax.plot(p_lst)
@@ -275,7 +277,7 @@ with torch.no_grad():
     post_pred_f = model(X_pred)
     post_pred_y = likelihood(model(X_pred))
 
-    fig, axs = plt.subplots(ncols=2, figsize=(12, 5), sharex=True, sharey=True)
+    fig, axs = plt.subplots(ncols=2, figsize=(14, 5), sharex=True, sharey=True)
     fig_sigmas, ax_sigmas = plt.subplots()
     for ii, (ax, post_pred, name, title) in enumerate(
         zip(