In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
data = np.loadtxt('/home/alien/Desktop/ML/ex1data1.txt', delimiter=',')
In [3]:
y = data[:,1]
m = len(y)
x = data[:,0]
X = np.ones(shape = (m,2))
X[:,1] = x
theta = np.zeros(shape = (2,1))
In [4]:
def computeCost(x,y,theta):
m = len(y)
h = x.dot(theta)
square_errors = (h - y)**2
J = (1.0/(2*m)) * square_errors.sum()
return J
In [5]:
def gradientDescent(X, y, theta, alpha, iterations):
m = len(y)
J_history = np.zeros(shape = (iterations,1))
for i in range(iterations):
predictions = X.dot(theta).flatten()
errors_x1 = (predictions - y) * X[:, 0]
errors_x2 = (predictions - y) * X[:, 1]
theta[0][0] = theta[0][0] - alpha * (1.0 / m) * errors_x1.sum()
theta[1][0] = theta[1][0] - alpha * (1.0 / m) * errors_x2.sum()
J_history[i, 0] = computeCost(X, y, theta)
return theta, J_history
In [6]:
print computeCost(X, y, theta)
theta, J_history = gradientDescent(X,y,theta,0.01,1500)
In [7]:
print 'For population = 35,000, we predict a profit of %f' % (np.array([1, 3.5]).dot(theta) * 10000)
In [8]:
print 'For population = 70,000, we predict a profit of %f' % (np.array([1, 7]).dot(theta) * 10000)
In [10]:
plt.scatter(data[:, 0], data[:, 1], marker='o', c='b')
plt.title('Profits distribution')
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
Out[10]:
In [ ]:
#References: https://gist.github.com/marcelcaraciolo/1321575