In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [4]:
data = pd.read_csv('ex1data1.txt', header=None, names=['population', 'profit'])
data.head()
Out[4]:
In [5]:
data.plot.scatter('population', 'profit')
Out[5]:
Dots seem to follow a line, we could have done a correlation test to check if the two variabes are linked. Now we transform the data matrix into two numpy arrays.
In [6]:
X = np.array(data["population"])
y = np.array(data["profit"])
now we will developp the two functions predict (apply theta to the X matrix) and gradient_descent1 (update theta)
In [14]:
def predict(X, theta):
return (X * theta[1] + theta[0])
def gradient_descent1(X, y, theta, alpha, num_iters):
m = X.shape[0]
for i in range(0, num_iters):
theta0 = theta[0] - (alpha / m) * np.sum(predict(X, theta) - y)
theta1 = theta[1] - (alpha / m) * np.dot(predict(X, theta) - y, np.transpose(X))
theta = [theta0, theta1]
return theta
In [15]:
theta = np.zeros(2, dtype=float)
theta = gradient_descent1(X, y, theta, 0.01, 1500)
theta
Out[15]:
Expected output (for alpha 0.01 and 1500 iterations):[-3.6302914394043597, 1.166362350335582]
The visualize plot our dataset with the regression line corresponding to theta
In [9]:
def visualize(theta):
fig = plt.figure()
ax = plt.axes()
ax.set_xlim([4.5,22.5])
ax.set_ylim([-5, 25])
ax.scatter(X, y)
line_x = np.linspace(0,22.5, 20)
line_y = theta[0] + line_x * theta[1]
ax.plot(line_x, line_y)
plt.show()
In [10]:
visualize(theta)
the cost function will allow us to record the evolution of the cost during the gradient descent
In [16]:
def cost(X, y, theta):
loss = predict(X, theta) - y
cost = (1 / (2 * X.shape[0])) * np.dot(loss, np.transpose(loss))
return(cost)
In [17]:
cost(X, y, [0, 0])
Out[17]:
expected output for [0, 0]: 32.072733877455676
the full version of gradient descent now records the cost history
In [21]:
def gradient_descent(X, y, theta, alpha, num_iters):
m = X.shape[0]
J_history = []
for i in range(0, num_iters):
theta0 = theta[0] - (alpha / m) * np.sum(predict(X, theta) - y)
theta1 = theta[1] - (alpha / m) * np.dot(predict(X, theta) - y, np.transpose(X))
theta = [theta0, theta1]
J_history.append(cost(X, y, theta))
return theta, J_history
In [22]:
theta = np.zeros(2, dtype=float)
theta, J_history = gradient_descent(X, y, theta, 0.01, 1500)
theta
Out[22]:
Expected output for alhpa 0.01 and 1500 iterations: [-3.6302914394043597, 1.166362350335582]
In [23]:
fit = plt.figure()
ax = plt.axes()
ax.plot(J_history)
Out[23]: