In [3]:
    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
    
In [4]:
    
data = pd.read_csv('ex1data1.txt', header=None, names=['population', 'profit'])
data.head()
    
    Out[4]:
In [5]:
    
data.plot.scatter('population', 'profit')
    
    Out[5]:
    
Dots seem to follow a line, we could have done a correlation test to check if the two variabes are linked. Now we transform the data matrix into two numpy arrays.
In [6]:
    
X = np.array(data["population"])
y = np.array(data["profit"])
    
now we will developp the two functions predict (apply theta to the X matrix) and gradient_descent1 (update theta)
In [14]:
    
def predict(X, theta):
    return (X * theta[1] + theta[0])
def gradient_descent1(X, y, theta, alpha, num_iters):
    m = X.shape[0]
    for i in range(0, num_iters):
        theta0 = theta[0] - (alpha / m) * np.sum(predict(X, theta) - y)
        theta1 = theta[1] - (alpha / m) * np.dot(predict(X, theta) - y, np.transpose(X))
        theta = [theta0, theta1]
    return theta
    
In [15]:
    
theta = np.zeros(2, dtype=float)
theta = gradient_descent1(X, y, theta, 0.01, 1500)
theta
    
    Out[15]:
Expected output (for alpha 0.01 and 1500 iterations):[-3.6302914394043597, 1.166362350335582]
The visualize plot our dataset with the regression line corresponding to theta
In [9]:
    
def visualize(theta):
    fig = plt.figure()
    ax = plt.axes()
    ax.set_xlim([4.5,22.5])
    ax.set_ylim([-5, 25])
    ax.scatter(X, y)
    line_x = np.linspace(0,22.5, 20)
    line_y = theta[0] + line_x * theta[1]
    ax.plot(line_x, line_y)
    plt.show()
    
In [10]:
    
visualize(theta)
    
    
the cost function will allow us to record the evolution of the cost during the gradient descent
In [16]:
    
def cost(X, y, theta):
    loss = predict(X, theta) - y
    cost = (1 / (2 * X.shape[0])) * np.dot(loss, np.transpose(loss))
    return(cost)
    
In [17]:
    
cost(X, y, [0, 0])
    
    Out[17]:
expected output for [0, 0]: 32.072733877455676
the full version of gradient descent now records the cost history
In [21]:
    
def gradient_descent(X, y, theta, alpha, num_iters):
    m = X.shape[0]
    J_history = []
    for i in range(0, num_iters):
        theta0 = theta[0] - (alpha / m) * np.sum(predict(X, theta) - y)
        theta1 = theta[1] - (alpha / m) * np.dot(predict(X, theta) - y, np.transpose(X))
        theta = [theta0, theta1]
        J_history.append(cost(X, y, theta))
    return theta, J_history
    
In [22]:
    
theta = np.zeros(2, dtype=float)
theta, J_history = gradient_descent(X, y, theta, 0.01, 1500)
theta
    
    Out[22]:
Expected output for alhpa 0.01 and 1500 iterations: [-3.6302914394043597, 1.166362350335582]
In [23]:
    
fit = plt.figure()
ax = plt.axes()
ax.plot(J_history)
    
    Out[23]: