In [1]:
    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
    
In [2]:
    
data = np.loadtxt('/home/alien/Desktop/ML/ex1data1.txt', delimiter=',')
    
In [3]:
    
y = data[:,1]
m = len(y)
x = data[:,0]
X = np.ones(shape = (m,2))
X[:,1] = x
theta = np.zeros(shape = (2,1))
    
In [4]:
    
def computeCost(x,y,theta):
    m = len(y)
    h = x.dot(theta)
    square_errors = (h - y)**2
    J = (1.0/(2*m)) * square_errors.sum()
    return J
    
In [5]:
    
def gradientDescent(X, y, theta, alpha, iterations):
    m = len(y)
    J_history = np.zeros(shape = (iterations,1))
    for i in range(iterations):
        
        predictions = X.dot(theta).flatten()
        errors_x1 = (predictions - y) * X[:, 0]
        errors_x2 = (predictions - y) * X[:, 1]
        theta[0][0] = theta[0][0] - alpha * (1.0 / m) * errors_x1.sum()
        theta[1][0] = theta[1][0] - alpha * (1.0 / m) * errors_x2.sum()
        J_history[i, 0] = computeCost(X, y, theta)
    return theta, J_history
    
In [6]:
    
print computeCost(X, y, theta)
theta, J_history = gradientDescent(X,y,theta,0.01,1500)
    
    
In [7]:
    
print 'For population = 35,000, we predict a profit of %f' % (np.array([1, 3.5]).dot(theta) * 10000)
    
    
In [8]:
    
print 'For population = 70,000, we predict a profit of %f' % (np.array([1, 7]).dot(theta) * 10000)
    
    
In [10]:
    
plt.scatter(data[:, 0], data[:, 1], marker='o', c='b')
plt.title('Profits distribution')
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
    
    Out[10]:
    
In [ ]:
    
#References: https://gist.github.com/marcelcaraciolo/1321575