Import librairies

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

reading file and describing it

In [2]:
data = pd.read_csv('ex1data2.txt', header=None, names=['size', 'bedrooms', 'price'])

size bedrooms price
0 2104 3 399900
1 1600 3 329900
2 2400 3 369000
3 1416 2 232000
4 3000 4 539900

Reformated data into a matrix for the 'size', 'bedrooms' columns and into a numpy array for the 'price'

In [3]:
X = data.as_matrix(('size', 'bedrooms'))
y = np.array(data.price)

In [4]:

(47, 2)

As the dimension of X and the future theta vector does not fit for matrix multiplication we add a 'ones' column column to handle theta0

In [5]:
vector = np.ones(X.shape[0], dtype=float)
X = data.as_matrix(('size', 'bedrooms'))
X = np.c_[vector, X]

array([[1.000e+00, 2.104e+03, 3.000e+00],
       [1.000e+00, 1.600e+03, 3.000e+00],
       [1.000e+00, 2.400e+03, 3.000e+00],
       [1.000e+00, 1.416e+03, 2.000e+00],
       [1.000e+00, 3.000e+03, 4.000e+00],
       [1.000e+00, 1.985e+03, 4.000e+00],
       [1.000e+00, 1.534e+03, 3.000e+00],
       [1.000e+00, 1.427e+03, 3.000e+00],
       [1.000e+00, 1.380e+03, 3.000e+00],
       [1.000e+00, 1.494e+03, 3.000e+00],
       [1.000e+00, 1.940e+03, 4.000e+00],
       [1.000e+00, 2.000e+03, 3.000e+00],
       [1.000e+00, 1.890e+03, 3.000e+00],
       [1.000e+00, 4.478e+03, 5.000e+00],
       [1.000e+00, 1.268e+03, 3.000e+00],
       [1.000e+00, 2.300e+03, 4.000e+00],
       [1.000e+00, 1.320e+03, 2.000e+00],
       [1.000e+00, 1.236e+03, 3.000e+00],
       [1.000e+00, 2.609e+03, 4.000e+00],
       [1.000e+00, 3.031e+03, 4.000e+00],
       [1.000e+00, 1.767e+03, 3.000e+00],
       [1.000e+00, 1.888e+03, 2.000e+00],
       [1.000e+00, 1.604e+03, 3.000e+00],
       [1.000e+00, 1.962e+03, 4.000e+00],
       [1.000e+00, 3.890e+03, 3.000e+00],
       [1.000e+00, 1.100e+03, 3.000e+00],
       [1.000e+00, 1.458e+03, 3.000e+00],
       [1.000e+00, 2.526e+03, 3.000e+00],
       [1.000e+00, 2.200e+03, 3.000e+00],
       [1.000e+00, 2.637e+03, 3.000e+00],
       [1.000e+00, 1.839e+03, 2.000e+00],
       [1.000e+00, 1.000e+03, 1.000e+00],
       [1.000e+00, 2.040e+03, 4.000e+00],
       [1.000e+00, 3.137e+03, 3.000e+00],
       [1.000e+00, 1.811e+03, 4.000e+00],
       [1.000e+00, 1.437e+03, 3.000e+00],
       [1.000e+00, 1.239e+03, 3.000e+00],
       [1.000e+00, 2.132e+03, 4.000e+00],
       [1.000e+00, 4.215e+03, 4.000e+00],
       [1.000e+00, 2.162e+03, 4.000e+00],
       [1.000e+00, 1.664e+03, 2.000e+00],
       [1.000e+00, 2.238e+03, 3.000e+00],
       [1.000e+00, 2.567e+03, 4.000e+00],
       [1.000e+00, 1.200e+03, 3.000e+00],
       [1.000e+00, 8.520e+02, 2.000e+00],
       [1.000e+00, 1.852e+03, 4.000e+00],
       [1.000e+00, 1.203e+03, 3.000e+00]])

we want to normalize our feature so we substract the mean and divide by the standard deviation

In [6]:
def featureNormalize(X):
    mean = X.mean(axis=0)
    stdev = X.std(axis=0)
    X = (X - mean)/stdev
    return X, mean, stdev

the feature normalisation is applied to each column except the first one to avoid the 0

In [7]:
X[:,1:], mean, stdev = featureNormalize(X[:,1:])

array([[ 1.00000000e+00,  1.31415422e-01, -2.26093368e-01],
       [ 1.00000000e+00, -5.09640698e-01, -2.26093368e-01],
       [ 1.00000000e+00,  5.07908699e-01, -2.26093368e-01],
       [ 1.00000000e+00, -7.43677059e-01, -1.55439190e+00],
       [ 1.00000000e+00,  1.27107075e+00,  1.10220517e+00],
       [ 1.00000000e+00, -1.99450507e-02,  1.10220517e+00],
       [ 1.00000000e+00, -5.93588523e-01, -2.26093368e-01],
       [ 1.00000000e+00, -7.29685755e-01, -2.26093368e-01],
       [ 1.00000000e+00, -7.89466782e-01, -2.26093368e-01],
       [ 1.00000000e+00, -6.44465993e-01, -2.26093368e-01],
       [ 1.00000000e+00, -7.71822042e-02,  1.10220517e+00],
       [ 1.00000000e+00, -8.65999486e-04, -2.26093368e-01],
       [ 1.00000000e+00, -1.40779041e-01, -2.26093368e-01],
       [ 1.00000000e+00,  3.15099326e+00,  2.43050370e+00],
       [ 1.00000000e+00, -9.31923697e-01, -2.26093368e-01],
       [ 1.00000000e+00,  3.80715024e-01,  1.10220517e+00],
       [ 1.00000000e+00, -8.65782986e-01, -1.55439190e+00],
       [ 1.00000000e+00, -9.72625673e-01, -2.26093368e-01],
       [ 1.00000000e+00,  7.73743478e-01,  1.10220517e+00],
       [ 1.00000000e+00,  1.31050078e+00,  1.10220517e+00],
       [ 1.00000000e+00, -2.97227261e-01, -2.26093368e-01],
       [ 1.00000000e+00, -1.43322915e-01, -1.55439190e+00],
       [ 1.00000000e+00, -5.04552951e-01, -2.26093368e-01],
       [ 1.00000000e+00, -4.91995958e-02,  1.10220517e+00],
       [ 1.00000000e+00,  2.40309445e+00, -2.26093368e-01],
       [ 1.00000000e+00, -1.14560907e+00, -2.26093368e-01],
       [ 1.00000000e+00, -6.90255715e-01, -2.26093368e-01],
       [ 1.00000000e+00,  6.68172729e-01, -2.26093368e-01],
       [ 1.00000000e+00,  2.53521350e-01, -2.26093368e-01],
       [ 1.00000000e+00,  8.09357707e-01, -2.26093368e-01],
       [ 1.00000000e+00, -2.05647815e-01, -1.55439190e+00],
       [ 1.00000000e+00, -1.27280274e+00, -2.88269044e+00],
       [ 1.00000000e+00,  5.00114703e-02,  1.10220517e+00],
       [ 1.00000000e+00,  1.44532608e+00, -2.26093368e-01],
       [ 1.00000000e+00, -2.41262044e-01,  1.10220517e+00],
       [ 1.00000000e+00, -7.16966387e-01, -2.26093368e-01],
       [ 1.00000000e+00, -9.68809863e-01, -2.26093368e-01],
       [ 1.00000000e+00,  1.67029651e-01,  1.10220517e+00],
       [ 1.00000000e+00,  2.81647389e+00,  1.10220517e+00],
       [ 1.00000000e+00,  2.05187753e-01,  1.10220517e+00],
       [ 1.00000000e+00, -4.28236746e-01, -1.55439190e+00],
       [ 1.00000000e+00,  3.01854946e-01, -2.26093368e-01],
       [ 1.00000000e+00,  7.20322135e-01,  1.10220517e+00],
       [ 1.00000000e+00, -1.01841540e+00, -2.26093368e-01],
       [ 1.00000000e+00, -1.46104938e+00, -1.55439190e+00],
       [ 1.00000000e+00, -1.89112638e-01,  1.10220517e+00],
       [ 1.00000000e+00, -1.01459959e+00, -2.26093368e-01]])

In [8]:
def predict(X, theta):
    return(, theta))

def cost(X, y, theta):
    return ((1/(2 * X.shape[0])) * (np.sum((predict(X, theta) - y)**2)))

def gradient_descent(X, y, theta, alpha, num_iters):
    m = X.shape[0]
    J_history = []
    for i in range(0, num_iters):
        theta = theta - (alpha/m) *, theta) - y), X)
        J_history.append(cost(X, y, theta))
    return theta, J_history

In [9]:
theta = np.zeros(3, dtype=float)
theta, J_history = gradient_descent(X, y, theta, 0.001, 5000)

array([ 3.38124708e+05,  1.03002980e+05, -1.75438180e+02])

we can visualize the evolution of the cost as the iterations increases

In [10]:
fit = plt.figure()
ax = plt.axes()

[<matplotlib.lines.Line2D at 0x7f536203beb8>]

we are trying to predict the price of a house with 3 bedrooms and 1650 feet-square

In [11]:
X_test = (np.array([1650,3]) - mean)/stdev
X_test = np.hstack([1, X_test])


Expected output: 292220.53

gradient descent can also be implemented as a normal equation (vectorized version)

In [12]:
def normal_gradient_descent(X, y):
    theta = np.zeros(3, dtype=float)
    theta =, X)), np.transpose(X)), y)
    return (theta)

In [13]:
t = normal_gradient_descent(X, y)

array([340412.65957447, 109447.79646964,  -6578.35485416])