Bayesian Linear Regression


In [1]:
import sys
# Add the symgp folder path to the sys.path list
module_path = r'/Users/jaduol/Documents/Uni (original)/Part II/IIB/MEng Project/'
if module_path not in sys.path:
    sys.path.append(module_path)

from symgp import SuperMatSymbol, utils, MVG, Variable
from sympy import symbols, ZeroMatrix, Identity
from IPython.display import display, Math, Latex

In [2]:
# Define some symbols
D, N, Ns = symbols('D N Ns')
sig_y = symbols('\u03c3_y')

1. Prior


In [3]:
# Prior
w = Variable('w',D,1)
p_w = MVG([w],mean=ZeroMatrix(D,1),cov=Identity(D))

print("p_w:")
display(Latex(utils.matLatex(p_w)))


p_w:
\begin{align*} p\left(\mathbf{w}\right)&= \mathcal{N}\left(\mathbf{w};\mathbf{m}_{\mathbf{w}},\mathbf{\Sigma}_{\mathbf{w}}\right)\\ \mathbf{m}_{\mathbf{w}} &= \mathbf{0}\\ \mathbf{\Sigma}_{\mathbf{w}} &= \mathbf{I}\\ \end{align*}

2. Likelihood


In [4]:
# Likelihood of w given X
X, y = utils.variables('X y',[(D,N), N])
p_y = MVG([y], mean=X.T*w,
               cov=sig_y**2*Identity(N),
               cond_vars=[w,X])

print("p_y:")
display(Latex(utils.matLatex(p_y)))


p_y:
\begin{align*} p\left(\mathbf{y}|\mathbf{w},\mathbf{X}\right)&= \mathcal{N}\left(\mathbf{y};\mathbf{m}_{\mathbf{y}|\mathbf{w},\mathbf{X}},\mathbf{\Sigma}_{\mathbf{y}|\mathbf{w},\mathbf{X}}\right)\\ \mathbf{m}_{\mathbf{y}|\mathbf{w},\mathbf{X}} &= \mathbf{X}^T \mathbf{w}\\ \mathbf{\Sigma}_{\mathbf{y}|\mathbf{w},\mathbf{X}} &= \sigma_y^{2} \mathbf{I}\\ \end{align*}

3. Posterior


In [5]:
# Joint of w and y
p_w_y = p_w*p_y

print("p_w_y:")
display(Latex(utils.matLatex(p_w_y)))


cond_vars:  {w}
conditional_cond_vars:  {w, X}
new_conditioned_vars:  [X]
p_w_y:
\begin{align*} p\left(\mathbf{y},\mathbf{w}|\mathbf{X}\right)&= \mathcal{N}\left(\left[\begin{smallmatrix}\mathbf{y}\\\mathbf{w}\end{smallmatrix}\right];\mathbf{m}_{\mathbf{y},\mathbf{w}|\mathbf{X}},\mathbf{\Sigma}_{\mathbf{y},\mathbf{w}|\mathbf{X}}\right)\\ \mathbf{m}_{\mathbf{y},\mathbf{w}|\mathbf{X}} &= \left[\begin{smallmatrix}\mathbf{0}\\\mathbf{0}\end{smallmatrix}\right]\\ \mathbf{\Sigma}_{\mathbf{y},\mathbf{w}|\mathbf{X}} &= \left[\begin{smallmatrix}\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}&\mathbf{X}^T\\\mathbf{X}&\mathbf{I}\end{smallmatrix}\right]\\ \end{align*}

In [6]:
# Inference: posterior over w
p_w_post = p_w_y.condition([y])

print("p_w_post:")
display(Latex(utils.matLatex(p_w_post)))


p_w_post:
\begin{align*} p\left(\mathbf{w}|\mathbf{X},\mathbf{y}\right)&= \mathcal{N}\left(\mathbf{w};\mathbf{m}_{\mathbf{w}|\mathbf{X},\mathbf{y}},\mathbf{\Sigma}_{\mathbf{w}|\mathbf{X},\mathbf{y}}\right)\\ \mathbf{m}_{\mathbf{w}|\mathbf{X},\mathbf{y}} &= \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{y}\\ \mathbf{\Sigma}_{\mathbf{w}|\mathbf{X},\mathbf{y}} &= \mathbf{I} - \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\\ \end{align*}

4. Prediction


In [7]:
#Prediction

# Likelihood of w given Xs
Xs, ys = utils.variables('X_{*} y_{*}',[(D,Ns), Ns])
p_ys = MVG([ys], mean=Xs.T*w,
                 cov=sig_y**2*Identity(Ns),
                 cond_vars=[w,Xs])

print("p_ys:")
display(Latex(utils.matLatex(p_ys)))


p_ys:
\begin{align*} p\left(\mathbf{y_{*}}|\mathbf{w},\mathbf{X_{*}}\right)&= \mathcal{N}\left(\mathbf{y_{*}};\mathbf{m}_{\mathbf{y_{*}}|\mathbf{w},\mathbf{X_{*}}},\mathbf{\Sigma}_{\mathbf{y_{*}}|\mathbf{w},\mathbf{X_{*}}}\right)\\ \mathbf{m}_{\mathbf{y_{*}}|\mathbf{w},\mathbf{X_{*}}} &= \mathbf{X_{*}}^T \mathbf{w}\\ \mathbf{\Sigma}_{\mathbf{y_{*}}|\mathbf{w},\mathbf{X_{*}}} &= \sigma_y^{2} \mathbf{I}\\ \end{align*}

In [8]:
# Joint of w and ys
p_w_ys = p_w_post*p_ys

print("p_w_ys:")
display(Latex(utils.matLatex(p_w_ys)))


cond_vars:  {w}
conditional_cond_vars:  {X_{*}, w}
new_conditioned_vars:  [X_{*}, X, y]
p_w_ys:
\begin{align*} p\left(\mathbf{y_{*}},\mathbf{w}|\mathbf{X_{*}},\mathbf{X},\mathbf{y}\right)&= \mathcal{N}\left(\left[\begin{smallmatrix}\mathbf{y_{*}}\\\mathbf{w}\end{smallmatrix}\right];\mathbf{m}_{\mathbf{y_{*}},\mathbf{w}|\mathbf{X_{*}},\mathbf{X},\mathbf{y}},\mathbf{\Sigma}_{\mathbf{y_{*}},\mathbf{w}|\mathbf{X_{*}},\mathbf{X},\mathbf{y}}\right)\\ \mathbf{m}_{\mathbf{y_{*}},\mathbf{w}|\mathbf{X_{*}},\mathbf{X},\mathbf{y}} &= \left[\begin{smallmatrix}\mathbf{X_{*}}^T \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{y}\\\mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{y}\end{smallmatrix}\right]\\ \mathbf{\Sigma}_{\mathbf{y_{*}},\mathbf{w}|\mathbf{X_{*}},\mathbf{X},\mathbf{y}} &= \left[\begin{smallmatrix}\sigma_y^{2} \mathbf{I} + \mathbf{X_{*}}^T \left(\mathbf{I} - \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\right) \mathbf{X_{*}}&\mathbf{X_{*}}^T \left(\mathbf{I} - \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\right)\\\left(\mathbf{I} - \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\right) \mathbf{X_{*}}&\mathbf{I} - \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\end{smallmatrix}\right]\\ \end{align*}

In [9]:
# Predictive distribution of ys
p_ys_post = p_w_ys.marginalise([w])

print("p_ys_post:")
display(Latex(utils.matLatex(p_ys_post)))


self.name:  S_{y_{*},y_{*}|X_{*},X,y}
name:  
p_ys_post:
\begin{align*} p\left(\mathbf{y_{*}}|\mathbf{X_{*}},\mathbf{X},\mathbf{y}\right)&= \mathcal{N}\left(\mathbf{y_{*}};\mathbf{m}_{\mathbf{y_{*}}|\mathbf{X_{*}},\mathbf{X},\mathbf{y}},\mathbf{\Sigma}_{\mathbf{y_{*}}|\mathbf{X_{*}},\mathbf{X},\mathbf{y}}\right)\\ \mathbf{m}_{\mathbf{y_{*}}|\mathbf{X_{*}},\mathbf{X},\mathbf{y}} &= \mathbf{X_{*}}^T \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{y}\\ \mathbf{\Sigma}_{\mathbf{y_{*}}|\mathbf{X_{*}},\mathbf{X},\mathbf{y}} &= \sigma_y^{2} \mathbf{I} + \mathbf{X_{*}}^T \left(\mathbf{I} - \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\right) \mathbf{X_{*}}\\ \end{align*}

In [ ]: