Bayesian Linear Regression


In [1]:
import MVG as MVG
import SuperMatExpr as SME
from sympy import *
import utils as utils
from IPython.display import display, Math, Latex

In [2]:
# Define some symbols
D, N, Ns = symbols('D N Ns')
sig_y = symbols('\u03c3_y')

1. Prior


In [12]:
# Prior
w = SME.SuperMatSymbol(D,1,name='w',mat_type='var')
mu_w = SME.SuperMatSymbol(D,1,mat_type='mean',dep_vars=[w],expanded=ZeroMatrix(D,1))
S_ww = SME.SuperMatSymbol(D,D,mat_type='covar',dep_vars=[w],expanded=Identity(D))
p_w = MVG.MVG([w],moments=[mu_w,S_ww,utils.getZ(S_ww)])

print("p_w:")
display(Latex(utils.matLatex(p_w)))


p_w:
\begin{align*} p\left(\mathbf{w}\right)&= \mathcal{N}\left(\mathbf{m}_{\mathbf{w}},\mathbf{\Sigma}_{\mathbf{w}}\right)\\ \mathbf{m}_{\mathbf{w}} &= \mathbf{0}\\ \mathbf{\Sigma}_{\mathbf{w}} &= \mathbf{I}\\ \end{align*}

2. Likelihood


In [13]:
# Likelihood of w given X
X = SME.SuperMatSymbol(D,N,name='X',mat_type='var')
y = SME.SuperMatSymbol(N,1,name='y',mat_type='var')
mu_y = SME.SuperMatSymbol(N,1,mat_type='mean',dep_vars=[y],cond_vars=[w,X],expanded=X.T*w)
S_yy = SME.SuperMatSymbol(N,N,mat_type='covar',dep_vars=[y],cond_vars=[w,X],expanded=sig_y**2*Identity(N))
p_y = MVG.MVG([y],moments=[mu_y,S_yy,utils.getZ(S_yy)],conditioned_vars=[w,X])

print("p_y:")
display(Latex(utils.matLatex(p_y)))


p_y:
\begin{align*} p\left(\mathbf{y}|\mathbf{w},\mathbf{X}\right)&= \mathcal{N}\left(\mathbf{m}_{\mathbf{y}|\mathbf{w},\mathbf{X}},\mathbf{\Sigma}_{\mathbf{y}|\mathbf{w},\mathbf{X}}\right)\\ \mathbf{m}_{\mathbf{y}|\mathbf{w},\mathbf{X}} &= \mathbf{X}^T \mathbf{w}\\ \mathbf{\Sigma}_{\mathbf{y}|\mathbf{w},\mathbf{X}} &= \sigma_y^{2} \mathbf{I}\\ \end{align*}

3. Posterior


In [14]:
# Joint of w and y
p_w_y = p_w*p_y

print("p_w_y:")
display(Latex(utils.matLatex(p_w_y)))


cond_vars:  [w]
X'*w
coeffs:  [X']
rem:  0
Lambda:  [0]
Omega:  [[X']]
marginal_mean:  [0]
utils.matmul(Omega,marginal_mean):  [0]
p_w_y:
\begin{align*} p\left(\mathbf{y},\mathbf{w}|\mathbf{X}\right)&= \mathcal{N}\left(\mathbf{m}_{\mathbf{y},\mathbf{w}|\mathbf{X}},\mathbf{\Sigma}_{\mathbf{y},\mathbf{w}|\mathbf{X}}\right)\\ \mathbf{m}_{\mathbf{y},\mathbf{w}|\mathbf{X}} &= \left[\begin{smallmatrix}\mathbf{0}\\\mathbf{0}\end{smallmatrix}\right]\\ \mathbf{\Sigma}_{\mathbf{y},\mathbf{w}|\mathbf{X}} &= \left[\begin{smallmatrix}\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}&\mathbf{X}^T\\\mathbf{X}&\mathbf{I}\end{smallmatrix}\right]\\ \end{align*}

In [15]:
# Inference: posterior over w
p_w_post = p_w_y.condition([y])

print("p_w_post:")
display(Latex(utils.matLatex(p_w_post)))


S.blockform:  [[S_(y,y)]]
S.variables_dim1:  {y: 0}
S.variables_dim2:  {y: 0}
old_mean:  [0]
P: [[S_(w,w)]], Q: [[S_(w,y)]], R: [[S_(y,w)]], S: [[S_(y,y)]]
v_marg_vars:  [y]
marg_mean:  [0]
p_w_post:
\begin{align*} p\left(\mathbf{w}|\mathbf{X},\mathbf{y}\right)&= \mathcal{N}\left(\mathbf{m}_{\mathbf{w}|\mathbf{X},\mathbf{y}},\mathbf{\Sigma}_{\mathbf{w}|\mathbf{X},\mathbf{y}}\right)\\ \mathbf{m}_{\mathbf{w}|\mathbf{X},\mathbf{y}} &= \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{y}\\ \mathbf{\Sigma}_{\mathbf{w}|\mathbf{X},\mathbf{y}} &= \mathbf{I} - \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\\ \end{align*}

4. Prediction


In [16]:
#Prediction

# Likelihood of w given Xs
Xs = SME.SuperMatSymbol(D,Ns,name='X_s',mat_type='var')
ys = SME.SuperMatSymbol(Ns,1,name='y_s',mat_type='var')
mu_ys = SME.SuperMatSymbol(Ns,1,mat_type='mean',dep_vars=[ys],cond_vars=[w,Xs],expanded=Xs.T*w)
S_ysys = SME.SuperMatSymbol(Ns,Ns,mat_type='covar',dep_vars=[ys],cond_vars=[w,Xs],expanded=sig_y**2*Identity(Ns))
p_ys = MVG.MVG([ys],moments=[mu_ys,S_ysys,utils.getZ(S_ysys)],conditioned_vars=[w,Xs])

print("p_ys:")
display(Latex(utils.matLatex(p_ys)))


p_ys:
\begin{align*} p\left(\mathbf{y}_{*}|\mathbf{w},\mathbf{X}_{*}\right)&= \mathcal{N}\left(\mathbf{m}_{\mathbf{y}_{*}|\mathbf{w},\mathbf{X}_{*}},\mathbf{\Sigma}_{\mathbf{y}_{*}|\mathbf{w},\mathbf{X}_{*}}\right)\\ \mathbf{m}_{\mathbf{y}_{*}|\mathbf{w},\mathbf{X}_{*}} &= \mathbf{X}_{*}^T \mathbf{w}\\ \mathbf{\Sigma}_{\mathbf{y}_{*}|\mathbf{w},\mathbf{X}_{*}} &= \sigma_y^{2} \mathbf{I}\\ \end{align*}

In [17]:
# Joint of w and ys
p_w_ys = p_w_post*p_ys

print("p_w_ys:")
display(Latex(utils.matLatex(p_w_ys)))


cond_vars:  [w]
X_s'*w
coeffs:  [X_s']
rem:  0
Lambda:  [0]
Omega:  [[X_s']]
marginal_mean:  [S_(w,y)*S_(y,y)^-1*y]
utils.matmul(Omega,marginal_mean):  [X_s'*S_(w,y)*S_(y,y)^-1*y]
p_w_ys:
\begin{align*} p\left(\mathbf{y}_{*},\mathbf{w}|\mathbf{y},\mathbf{X},\mathbf{X}_{*}\right)&= \mathcal{N}\left(\mathbf{m}_{\mathbf{y}_{*},\mathbf{w}|\mathbf{y},\mathbf{X},\mathbf{X}_{*}},\mathbf{\Sigma}_{\mathbf{y}_{*},\mathbf{w}|\mathbf{y},\mathbf{X},\mathbf{X}_{*}}\right)\\ \mathbf{m}_{\mathbf{y}_{*},\mathbf{w}|\mathbf{y},\mathbf{X},\mathbf{X}_{*}} &= \left[\begin{smallmatrix}\mathbf{X}_{*}^T \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{y}\\\mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{y}\end{smallmatrix}\right]\\ \mathbf{\Sigma}_{\mathbf{y}_{*},\mathbf{w}|\mathbf{y},\mathbf{X},\mathbf{X}_{*}} &= \left[\begin{smallmatrix}\sigma_y^{2} \mathbf{I} + \mathbf{X}_{*}^T \left(\mathbf{I} - \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\right) \mathbf{X}_{*}&\mathbf{X}_{*}^T \left(\mathbf{I} - \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\right)\\\left(\mathbf{I} + \left(-1 \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\right)^T\right) \mathbf{X}_{*}&\mathbf{I} - \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\end{smallmatrix}\right]\\ \end{align*}

In [18]:
# Predictive distribution of ys
p_ys_post = p_w_ys.marginalize([w])

print("p_ys_post:")
display(Latex(utils.matLatex(p_ys_post)))


new_blockfom:  [[S_(y_s,y_s), S_(y_s,w)], [S_(w,y_s), S_(w,w)]]
self.covar.blockform:  [[S_(y_s,y_s), S_(y_s,w)], [S_(w,y_s), S_(w,w)]]
new_blockfom:  [[S_(y_s,y_s)]]
self.covar.blockform:  [[S_(y_s,y_s), S_(y_s,w)], [S_(w,y_s), S_(w,w)]]
p_ys_post:
\begin{align*} p\left(\mathbf{y}_{*}|\mathbf{y},\mathbf{X},\mathbf{X}_{*}\right)&= \mathcal{N}\left(\mathbf{m}_{\mathbf{y}_{*}|\mathbf{y},\mathbf{X},\mathbf{X}_{*}},\mathbf{\Sigma}_{\mathbf{y}_{*}|\mathbf{y},\mathbf{X},\mathbf{X}_{*}}\right)\\ \mathbf{m}_{\mathbf{y}_{*}|\mathbf{y},\mathbf{X},\mathbf{X}_{*}} &= \mathbf{X}_{*}^T \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{y}\\ \mathbf{\Sigma}_{\mathbf{y}_{*}|\mathbf{y},\mathbf{X},\mathbf{X}_{*}} &= \sigma_y^{2} \mathbf{I} + \mathbf{X}_{*}^T \left(\mathbf{I} - \mathbf{X} \left(\sigma_y^{2} \mathbf{I} + \mathbf{X}^T \mathbf{X}\right)^{-1} \mathbf{X}^T\right) \mathbf{X}_{*}\\ \end{align*}

In [ ]: