In [1]:
from numpy import random
from scipy import stats

### I call my notebook using the command  'ipython notebook --pylab inline'  so numpy and pyplot are already imported.  If not:
#from matplotlib.pyplot import *
#from numpy import *

Section 1.2.6: Bayesian Curve Fitting


In [2]:
def phi(x):
    return array([x**i for i in range(M+1)]).reshape(M+1,1)

In [3]:
def m(x,x_tr,t_tr,alpha,beta):
    S = linalg.inv(alpha*np.identity(M+1) + beta*array([dot(phi(xn),phi(xn).T) for xn in x_tr]).sum(axis=0))
    return (beta*dot(phi(x).T,dot(S,array([t_tr[n]*phi(x_tr[n]) for n in range(len(x_tr))]).sum(axis=0))))[0][0]

In [4]:
def s(x,x_tr,t_tr,alpha,beta):
    S = linalg.inv(alpha*np.identity(M+1) + beta*array([dot(phi(xn),phi(xn).T) for xn in x_tr]).sum(axis=0))
    return sqrt((1.0/beta) + dot(phi(x).T,dot(S,phi(x))))[0][0]

In [35]:
figure(figsize=(12,7))
N=10
M=10
noise_sigma=0.25
#Produce training set from sine curve with random gaussian noise
x_tr = array(random.uniform(0,1,N))
t_tr = array([sin(2*pi*xi) + random.normal(0,noise_sigma) for xi in x_tr])
beta = 11
alpha = 5e-3
#determine best value for beta by calculating liklihood function iteratively
#for i in range(5):
#    beta = float(N)/sum([(m(x_tr[n],x_tr,t_tr,alpha,beta)-t_tr[n])**2 for n in range(len(x_tr))])

plot(x_tr,t_tr,'bo')
x_arr = linspace(0,1,1000)
plot(x_arr,sin(2*pi*x_arr),'b',label='sin(2 pi x)')
plot(x_arr,[m(x,x_tr,t_tr,alpha,beta) for x in x_arr],'r',label='mean posterior')
#one sigam limits
s1a = [m(x,x_tr,t_tr,alpha,beta)+s(x,x_tr,t_tr,alpha,beta) for x in x_arr]
s1b = [m(x,x_tr,t_tr,alpha,beta)-s(x,x_tr,t_tr,alpha,beta) for x in x_arr]
#two sigma limits
s2a = [m(x,x_tr,t_tr,alpha,beta)+2*s(x,x_tr,t_tr,alpha,beta) for x in x_arr]
s2b = [m(x,x_tr,t_tr,alpha,beta)-2*s(x,x_tr,t_tr,alpha,beta) for x in x_arr]
plot(x_arr,s1a,'r--')
plot(x_arr,s1b,'r--')
plot(x_arr,s2a,'m--',alpha=0.5)
plot(x_arr,s2b,'m--',alpha=0.5)
fill_between(x_arr,s1a,s1b, facecolor='red', interpolate=True,alpha=0.3)
legend()
ylim(-2,2)
show()
#calculate test array on 1000 values
x_test = array(random.uniform(0,1,1000))
t_test = array([sin(2*pi*xi) + random.normal(0,.25) for xi in x_test])
print 'percent of test data falling in 2-sigma confidence range =',100.0*logical_and(t_test < [m(x,x_tr,t_tr,alpha,beta)+2.0*s(x,x_tr,t_tr,alpha,beta) for x in x_test],\
                        t_test > [m(x,x_tr,t_tr,alpha,beta)-2.0*s(x,x_tr,t_tr,alpha,beta) for x in x_test]).sum()/1000.0


percent of test data falling in 2-sigma confidence range = 96.1

In [37]:
#This is just me being curious what's happening to beta as you iteravively solve it and how this affects the model
beta = 11
alpha = 5e-3
beta_arr = []
res_arr = []
for i in range(10):
    res_arr.append(100.0*logical_and(t_test < [m(x,x_tr,t_tr,alpha,beta)+2.0*s(x,x_tr,t_tr,alpha,beta) for x in x_test],\
                        t_test > [m(x,x_tr,t_tr,alpha,beta)-2.0*s(x,x_tr,t_tr,alpha,beta) for x in x_test]).sum()/1000.0)
    beta_arr.append(beta)
    beta = float(N)/sum([(m(x_tr[n],x_tr,t_tr,alpha,beta)-t_tr[n])**2 for n in range(len(x_tr))])

In [38]:
plot(range(10),beta_arr)
xlabel('iteration')
ylabel('beta')


Out[38]:
<matplotlib.text.Text at 0x109681c10>

In [39]:
plot(range(10),res_arr)
xlabel('iteration')
ylabel('percent test data within 2-sigma confidence')


Out[39]:
<matplotlib.text.Text at 0x1094e8390>

In [ ]: