In [354]:
import pandas as pd
import numpy as np

Some test data


In [355]:
# sample dataset: 3 features and 8 observations
data = pd.DataFrame()
data['Y'] =  [30,31,32,33,31,29,28,21]
data['X1'] = [1,2,3,4,5,6,7,8]
data['X2'] = [4,3,2,1,2,3,4,5]
data['X3'] = [9,7,5,3,1,-1,-3,1-5]
data


Out[355]:
Y X1 X2 X3
0 30 1 4 9
1 31 2 3 7
2 32 3 2 5
3 33 4 1 3
4 31 5 2 1
5 29 6 3 -1
6 28 7 4 -3
7 21 8 5 -4

Analytical Solution


In [356]:
# Find the analytical solution
data['X0'] = [1,1,1,1,1,1,1,1]
X = np.array(data[['X0','X1','X2','X3']])
y = np.array(data['Y'])

theta_true = np.dot(np.dot(np.linalg.inv(np.dot(X.T,X)),X.T),y)
theta_true


Out[356]:
array([ 90.38186813, -10.32692308,  -1.32692308,  -4.96703297])

In [357]:
# predicted y (analytical solution, note there is some irreducible error)
np.dot(X,theta_true.T)


Out[357]:
array([ 30.04395604,  30.97802198,  31.91208791,  32.84615385,
        31.12637363,  29.40659341,  27.68681319,  21.        ])

Gradient Descent


In [402]:
def GradDesc_PT(y,X,alpha,theta,n):
    for k in range(n):
        theta = theta - alpha*1/len(y) * np.dot(X.T, np.dot(X,theta.T) - y.T)
    return theta

In [419]:
alpha = 0.01
theta_start = np.array([1,1,1,1])

In [420]:
theta_gd = GradDesc_PT(y,X,alpha,theta_start,1000000)
theta_gd


Out[420]:
array([ 89.39902765, -10.14528428,  -1.33810929,  -4.87358234])

In [421]:
# predicted y (gradient descent solution, compare to analytical above)
np.dot(X,theta_gd.T)


Out[421]:
array([ 30.03906513,  30.97905482,  31.91904452,  32.85903421,
        31.12280532,  29.38657643,  27.65034753,  21.0405363 ])

In [ ]: