In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
data = pd.read_csv('ex1data2.txt', header=None, names=['size', 'bedrooms', 'price'])
data.head()
Out[2]:
Reformated data into a matrix for the 'size', 'bedrooms' columns and into a numpy array for the 'price'
In [3]:
X = data.as_matrix(('size', 'bedrooms'))
y = np.array(data.price)
In [4]:
X.shape
Out[4]:
As the dimension of X and the future theta vector does not fit for matrix multiplication we add a 'ones' column column to handle theta0
In [5]:
vector = np.ones(X.shape[0], dtype=float)
X = data.as_matrix(('size', 'bedrooms'))
X = np.c_[vector, X]
X
Out[5]:
we want to normalize our feature so we substract the mean and divide by the standard deviation
In [6]:
def featureNormalize(X):
mean = X.mean(axis=0)
stdev = X.std(axis=0)
X = (X - mean)/stdev
return X, mean, stdev
the feature normalisation is applied to each column except the first one to avoid the 0
In [7]:
X[:,1:], mean, stdev = featureNormalize(X[:,1:])
X
Out[7]:
In [8]:
def predict(X, theta):
return(np.dot(X, theta))
def cost(X, y, theta):
return ((1/(2 * X.shape[0])) * (np.sum((predict(X, theta) - y)**2)))
def gradient_descent(X, y, theta, alpha, num_iters):
m = X.shape[0]
J_history = []
for i in range(0, num_iters):
theta = theta - (alpha/m) * np.dot((predict(X, theta) - y), X)
J_history.append(cost(X, y, theta))
return theta, J_history
In [9]:
theta = np.zeros(3, dtype=float)
theta, J_history = gradient_descent(X, y, theta, 0.001, 5000)
theta
Out[9]:
we can visualize the evolution of the cost as the iterations increases
In [10]:
fit = plt.figure()
ax = plt.axes()
ax.plot(J_history)
Out[10]:
we are trying to predict the price of a house with 3 bedrooms and 1650 feet-square
In [11]:
X_test = (np.array([1650,3]) - mean)/stdev
X_test = np.hstack([1, X_test])
predict(X_test,theta)
Out[11]:
Expected output: 292220.53
gradient descent can also be implemented as a normal equation (vectorized version)
In [12]:
def normal_gradient_descent(X, y):
theta = np.zeros(3, dtype=float)
theta = np.dot(np.dot(np.linalg.pinv(np.dot(np.transpose(X), X)), np.transpose(X)), y)
return (theta)
In [13]:
t = normal_gradient_descent(X, y)
t
Out[13]: