Linear Regression with one variable. Course: Machine Learning Prof.: Andrew Ng



In [1]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline



In [2]:

    
data = np.loadtxt('/home/alien/Desktop/ML/ex1data1.txt', delimiter=',')



In [3]:

    
y = data[:,1]
m = len(y)
x = data[:,0]
X = np.ones(shape = (m,2))
X[:,1] = x
theta = np.zeros(shape = (2,1))



In [4]:

    
def computeCost(x,y,theta):
    m = len(y)
    h = x.dot(theta)
    square_errors = (h - y)**2
    J = (1.0/(2*m)) * square_errors.sum()
    return J



In [5]:

    
def gradientDescent(X, y, theta, alpha, iterations):
    m = len(y)
    J_history = np.zeros(shape = (iterations,1))
    for i in range(iterations):
        
        predictions = X.dot(theta).flatten()

        errors_x1 = (predictions - y) * X[:, 0]
        errors_x2 = (predictions - y) * X[:, 1]

        theta[0][0] = theta[0][0] - alpha * (1.0 / m) * errors_x1.sum()
        theta[1][0] = theta[1][0] - alpha * (1.0 / m) * errors_x2.sum()

        J_history[i, 0] = computeCost(X, y, theta)

    return theta, J_history



In [6]:

    
print computeCost(X, y, theta)
theta, J_history = gradientDescent(X,y,theta,0.01,1500)









    



3111.05518611



In [7]:

    
print 'For population = 35,000, we predict a profit of %f' % (np.array([1, 3.5]).dot(theta) * 10000)









    



For population = 35,000, we predict a profit of 4519.767868



In [8]:

    
print 'For population = 70,000, we predict a profit of %f' % (np.array([1, 7]).dot(theta) * 10000)









    



For population = 70,000, we predict a profit of 45342.450129



In [10]:

    
plt.scatter(data[:, 0], data[:, 1], marker='o', c='b')
plt.title('Profits distribution')
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')









    Out[10]:





<matplotlib.text.Text at 0x7f08e6972690>



In [ ]:

    
#References: https://gist.github.com/marcelcaraciolo/1321575