Suppose you are the CEO of a restaurant franchise and are considering different cities for opening a new outlet. The chain already has trucks in various cities, and you have data for profits and populations from the cities. You would like to use this data to help you select which city to expand to next.
The file ex1data1.txt contains the dataset. The first column is the population of a city and the second column is the profit of a food truck in that city. A negative value for profit indicates a loss.
$J(\theta_{0},\theta_{1},...,\theta_{m}) = J(\theta) = \frac{1}{2m}\sum\limits_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})^2$
simultaneously update $\theta_{j}$ for $j=0,1,...,n$
repeat until convergence {
$\theta_{j} := \theta_{j} - \alpha \frac{\partial}{\partial\theta_{j}} J(\theta) = \theta_{j} - \alpha \frac{1}{m}\sum\limits_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) \cdot x_{j}^{(i)} = \theta_{j} - \alpha \frac{1}{m}\sum\limits_{i=1}^{m}(\theta^{T}x^{(i)} - y^{(i)}) \cdot x_{j}^{(i)}$
}
In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# import data
data = pd.read_csv('mlclass-ex1/ex1data1.txt', header=None)
# initial data vectors
x = data[0].values
y = data[1].values
# training sample size
m = data[0].count()
# number of features
n = 1
# proper vector shapes
x.shape = (m,1)
y.shape = (m,1)
# visualize data set
plt.scatter(x,y, marker='x', color='r')
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
plt.show()
# design matrix
X = np.hstack((np.ones((m,1)),x))
# theta parameters
theta = np.zeros((2,1))
# gradient descent, number of iterations
iterations = 1500
print theta.T, theta.T.shape
print theta, theta.shape
print X[0], X[0].shape
# learning rate
alpha = 0.01
# cost function
def J():
J_sum = 0
for i in xrange(0,m):
J_sum += ((theta.T.dot(X[i]) - y[i])**2)
return ((1.0/(2.0*m)) * J_sum)
# delta function
def delta(j):
delta_sum = 0
for i in xrange(0,m):
delta_sum += ((theta.T.dot(X[i]) - y[i]) * X[i][j])
return delta_sum
# gradient descent algorithm
def gradient_descent(iterations):
J_values = np.zeros((iterations,1))
for iteration in xrange(0,iterations):
temp_theta = np.zeros((2,1))
for j in xrange(0,(n+1)):
temp_theta[j] = (theta[j] - (alpha * (1.0/m) * delta(j)))
for j in xrange(0,(n+1)):
theta[j] = temp_theta[j]
J_values[iteration] = J()
return J_values
# run gradient descent; return cost values
J = gradient_descent(iterations)
# visualize the cost function (2-D)
cost_x = np.arange(iterations)
cost_x.shape = (iterations,1)
plt.plot(cost_x,J)
plt.xlabel('iterations')
plt.ylabel(r"$J(\theta)$")
plt.show()
# visualize the training data with the linear regression fit
plt.scatter(x,y, marker='x', color='r', label="training data")
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
x = np.linspace(np.min(x),np.max(x))
hypothesis = (theta[0] + theta[1] * x)
plt.plot(x, hypothesis, label="linear regression")
plt.legend(loc=4)
plt.show()