In [1]:
    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
    
In [2]:
    
data = pd.read_csv('ex1data2.txt', header=None, names=['size', 'bedrooms', 'price'])
data.head()
    
    Out[2]:
Reformated data into a matrix for the 'size', 'bedrooms' columns and into a numpy array for the 'price'
In [3]:
    
X = data.as_matrix(('size', 'bedrooms'))
y = np.array(data.price)
    
In [4]:
    
X.shape
    
    Out[4]:
As the dimension of X and the future theta vector does not fit for matrix multiplication we add a 'ones' column column to handle theta0
In [5]:
    
vector = np.ones(X.shape[0], dtype=float)
X = data.as_matrix(('size', 'bedrooms'))
X = np.c_[vector, X]
X
    
    Out[5]:
we want to normalize our feature so we substract the mean and divide by the standard deviation
In [6]:
    
def featureNormalize(X):
    mean = X.mean(axis=0)
    stdev = X.std(axis=0)
    X = (X - mean)/stdev
    return X, mean, stdev
    
the feature normalisation is applied to each column except the first one to avoid the 0
In [7]:
    
X[:,1:], mean, stdev = featureNormalize(X[:,1:])
X
    
    Out[7]:
In [8]:
    
def predict(X, theta):
    return(np.dot(X, theta))
def cost(X, y, theta):
    return ((1/(2 * X.shape[0])) * (np.sum((predict(X, theta) - y)**2)))
def gradient_descent(X, y, theta, alpha, num_iters):
    m = X.shape[0]
    J_history = []
    for i in range(0, num_iters):
        theta = theta - (alpha/m) * np.dot((predict(X, theta) - y), X)
        J_history.append(cost(X, y, theta))
    return theta, J_history
    
In [9]:
    
theta = np.zeros(3, dtype=float)
theta, J_history = gradient_descent(X, y, theta, 0.001, 5000)
theta
    
    Out[9]:
we can visualize the evolution of the cost as the iterations increases
In [10]:
    
fit = plt.figure()
ax = plt.axes()
ax.plot(J_history)
    
    Out[10]:
    
we are trying to predict the price of a house with 3 bedrooms and 1650 feet-square
In [11]:
    
X_test = (np.array([1650,3]) - mean)/stdev
X_test = np.hstack([1, X_test])
predict(X_test,theta)
    
    Out[11]:
Expected output: 292220.53
gradient descent can also be implemented as a normal equation (vectorized version)
In [12]:
    
def normal_gradient_descent(X, y):
    theta = np.zeros(3, dtype=float)
    theta = np.dot(np.dot(np.linalg.pinv(np.dot(np.transpose(X), X)), np.transpose(X)), y)
    return (theta)
    
In [13]:
    
t = normal_gradient_descent(X, y)
t
    
    Out[13]: