diff --git a/BLcourse2.3/01_simple_gp_regression.ipynb b/BLcourse2.3/01_simple_gp_regression.ipynb index b6581706745e41359849d48b271ce9febbd37d58..826c08fab29a8ac94ae682dccf9749d9dc38c42c 100644 --- a/BLcourse2.3/01_simple_gp_regression.ipynb +++ b/BLcourse2.3/01_simple_gp_regression.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "8bc26c14", + "id": "16bd8a69", "metadata": {}, "source": [ "# Notation\n", @@ -23,7 +23,7 @@ }, { "cell_type": "markdown", - "id": "3ef36e32", + "id": "87093c43", "metadata": {}, "source": [ "# Imports, helpers, setup" @@ -32,7 +32,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c86d69b2", + "id": "69a0af7f", "metadata": {}, "outputs": [], "source": [ @@ -105,7 +105,7 @@ }, { "cell_type": "markdown", - "id": "9c543940", + "id": "eba6d895", "metadata": { "lines_to_next_cell": 2 }, @@ -122,7 +122,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0674cc5a", + "id": "24e8765e", "metadata": {}, "outputs": [], "source": [ @@ -150,7 +150,7 @@ }, { "cell_type": "markdown", - "id": "e4ddc32f", + "id": "9b980b1c", "metadata": { "lines_to_next_cell": 2 }, @@ -179,7 +179,7 @@ { "cell_type": "code", "execution_count": null, - "id": "93822ff7", + "id": "a0a0c12c", "metadata": { "lines_to_next_cell": 2 }, @@ -215,7 +215,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3288a7ca", + "id": "e2778668", "metadata": {}, "outputs": [], "source": [ @@ -226,7 +226,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9348a1db", + "id": "eacc9f2b", "metadata": {}, "outputs": [], "source": [ @@ -237,7 +237,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b2b5d326", + "id": "f90ce51c", "metadata": { "lines_to_next_cell": 2 }, @@ -254,7 +254,7 @@ }, { "cell_type": "markdown", - "id": "c7309ee3", + "id": "eb9ed01f", "metadata": {}, "source": [ "# Sample from the GP prior\n", @@ -269,7 +269,7 @@ { "cell_type": "code", "execution_count": null, - "id": "56b26cd6", + "id": "894b7e36", "metadata": { "lines_to_next_cell": 2 }, @@ -305,7 +305,7 @@ }, { "cell_type": "markdown", - "id": "6758e47f", + "id": "33bf94f7", "metadata": {}, "source": [ "Let's investigate the samples more closely. A constant mean $\\ve m(\\ma X) =\n", @@ -318,7 +318,7 @@ { "cell_type": "code", "execution_count": null, - "id": "69ae529a", + "id": "ff0dcbaa", "metadata": {}, "outputs": [], "source": [ @@ -332,7 +332,7 @@ { "cell_type": "code", "execution_count": null, - "id": "df46aa3f", + "id": "7018cb26", "metadata": {}, "outputs": [], "source": [ @@ -346,7 +346,7 @@ }, { "cell_type": "markdown", - "id": "5a9350d8", + "id": "ea005dc2", "metadata": {}, "source": [ "# Fit GP to data: optimize hyper params\n", @@ -366,7 +366,7 @@ { "cell_type": "code", "execution_count": null, - "id": "638d1244", + "id": "67b5961b", "metadata": { "lines_to_next_cell": 2 }, @@ -403,7 +403,7 @@ }, { "cell_type": "markdown", - "id": "777b67d9", + "id": "5faec1a4", "metadata": {}, "source": [ "# Run prediction\n", @@ -424,7 +424,7 @@ { "cell_type": "code", "execution_count": null, - "id": "740fb21a", + "id": "e56fd041", "metadata": {}, "outputs": [], "source": [ @@ -483,7 +483,7 @@ ], "metadata": { "jupytext": { - "formats": "ipynb,py:percent" + "formats": "ipynb,py:light" }, "kernelspec": { "display_name": "Python 3 (ipykernel)", diff --git a/BLcourse2.3/01_simple_gp_regression.py b/BLcourse2.3/01_simple_gp_regression.py index fcfcda544ec40c9be131cde8bb0937b0980f886c..a48f086fccda0a7586a468ac21598987296be2e8 100644 --- a/BLcourse2.3/01_simple_gp_regression.py +++ b/BLcourse2.3/01_simple_gp_regression.py @@ -1,19 +1,18 @@ # --- # jupyter: # jupytext: -# formats: ipynb,py:percent +# formats: ipynb,py:light # text_representation: # extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.14.4 +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.16.2 # kernelspec: # display_name: Python 3 (ipykernel) # language: python # name: python3 # --- -# %% [markdown] # # Notation # $\newcommand{\ve}[1]{\mathit{\boldsymbol{#1}}}$ # $\newcommand{\ma}[1]{\mathbf{#1}}$ @@ -29,10 +28,9 @@ # we still denote the collection of all $\ve x_i = x_i\in\mathbb R$ points with # $\ma X$ to keep the notation consistent with the slides. -# %% [markdown] # # Imports, helpers, setup -# %% +# + # %matplotlib inline import math @@ -98,8 +96,8 @@ def plot_samples(ax, X_pred, samples, label=None, **kwds): torch.set_default_dtype(torch.float64) torch.manual_seed(123) +# - -# %% [markdown] # # Generate toy 1D data # # Here we generate noisy 1D data `X_train`, `y_train` as well as an extended @@ -109,7 +107,7 @@ torch.manual_seed(123) # show how the model uncertainty will behave there. -# %% +# + def generate_data(x, gaps=[[1, 3]], const=5): y = torch.sin(x) * torch.exp(-0.2 * x) + torch.randn(x.shape) * 0.1 + const msk = torch.tensor([True] * len(x)) @@ -130,8 +128,8 @@ print(f"{y_train.shape=}") print(f"{X_pred.shape=}") plt.scatter(X_train, y_train, marker="o", color="tab:blue") +# - -# %% [markdown] # # Define GP model # # We define the simplest possible textbook GP model using a Gaussian @@ -153,7 +151,7 @@ plt.scatter(X_train, y_train, marker="o", color="tab:blue") # * $m(\ve x) = c$ = `model.mean_module.constant` -# %% +# + class ExactGPModel(gpytorch.models.ExactGP): """API: @@ -179,17 +177,16 @@ class ExactGPModel(gpytorch.models.ExactGP): likelihood = gpytorch.likelihoods.GaussianLikelihood() model = ExactGPModel(X_train, y_train, likelihood) +# - -# %% # Inspect the model print(model) -# %% # Default start hyper params pprint(extract_model_params(model, raw=False)) -# %% +# + # Set new start hyper params model.mean_module.constant = 3.0 model.covar_module.base_kernel.lengthscale = 1.0 @@ -197,9 +194,9 @@ model.covar_module.outputscale = 1.0 model.likelihood.noise_covar.noise = 0.1 pprint(extract_model_params(model, raw=False)) +# - -# %% [markdown] # # Sample from the GP prior # # We sample a number of functions $f_j, j=1,\ldots,M$ from the GP prior and @@ -208,7 +205,7 @@ pprint(extract_model_params(model, raw=False)) # c, \ma K)$. Each sampled vector $\pred{\ve y}\in\mathbb R^{N'}$ and the # covariance (kernel) matrix is $\ma K\in\mathbb R^{N'\times N'}$. -# %% +# + model.eval() likelihood.eval() @@ -235,9 +232,9 @@ with torch.no_grad(): label="confidence", ) ax.legend() +# - -# %% [markdown] # Let's investigate the samples more closely. A constant mean $\ve m(\ma X) = # \ve c$ does *not* mean that each sampled vector $\pred{\ve y}$'s mean is # equal to $c$. Instead, we have that at each $\ve x_i$, the mean of @@ -245,14 +242,12 @@ with torch.no_grad(): # \approx c$ and for $M\rightarrow\infty$ it will be exactly $c$. # -# %% # Look at the first 20 x points from M=10 samples print(f"{f_samples.shape=}") print(f"{f_samples.mean(axis=0)[:20]=}") print(f"{f_samples.mean(axis=0).mean()=}") print(f"{f_samples.mean(axis=0).std()=}") -# %% # Take more samples, the means should get closer to c f_samples = pri_f.sample(sample_shape=torch.Size((M * 200,))) print(f"{f_samples.shape=}") @@ -260,7 +255,6 @@ print(f"{f_samples.mean(axis=0)[:20]=}") print(f"{f_samples.mean(axis=0).mean()=}") print(f"{f_samples.mean(axis=0).std()=}") -# %% [markdown] # # Fit GP to data: optimize hyper params # # In each step of the optimizer, we condition on the training data (e.g. do @@ -274,7 +268,7 @@ print(f"{f_samples.mean(axis=0).std()=}") # Observe how all hyper params converge. In particular, note that the constant # mean $m(\ve x)=c$ converges to the `const` value in `generate_data()`. -# %% +# + # Train mode model.train() likelihood.train() @@ -302,11 +296,11 @@ for ax, (p_name, p_lst) in zip(axs, history.items()): ax.plot(p_lst) ax.set_title(p_name) ax.set_xlabel("iterations") +# - -# %% [markdown] # # Run prediction - +# # We show "noiseless" (left: $\sigma = \sqrt{\mathrm{diag}(\ma\Sigma)}$) vs. # "noisy" (right: $\sigma = \sqrt{\mathrm{diag}(\ma\Sigma + \sigma_n^2\,\ma # I_N)}$) predictions, where $\ma\Sigma\equiv\cov(\ve f_*)$ is the posterior @@ -319,7 +313,7 @@ for ax, (p_name, p_lst) in zip(axs, history.items()): # https://elcorto.github.io/gp_playground/content/gp_pred_comp/notebook_plot.html # for details. -# %% +# + # Evaluation (predictive posterior) mode model.eval() likelihood.eval()