Assignment 4a

Logistic Regression

A classification model that estimates an applicant’s probability of admission based on scores from two exams.


In [122]:
import numpy as np
data = np.genfromtxt('ex2data1.txt', delimiter = ",")
m = len(data[:,0])
y = np.zeros((m, 1))
y[:,0] = data[:, 2]
m = len(y)
X = np.ones((m,3))
X[:, 1:] = data[:, 0:2]
Visualizing the data

In [123]:
pos = []
neg = []
for i in range(0, m):
    if y[i] == 1:
        pos.append(i)
    else:
        neg.append(i)
import matplotlib.pyplot as plt
plt.plot(X[pos, 1], X[pos, 2], '+', label = "Admitted")
plt.plot(X[neg, 1], X[neg, 2], 'yo', label = "Not Admitted")
plt.xlabel('Exam 1 score')
plt.ylabel('Exam 2 score')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()


Sigmoid function

In [124]:
def sigmoid(z):
    neg_z = np.negative(z)
    exp_z = np.exp(neg_z)
    den = np.add(1, exp_z)
    return np.reciprocal(den)
Cost function

In [125]:
def cost():
    z = np.dot(X, theta)
    h = sigmoid(z)
    # if sigmoid function gives error uncomment the 2 statements below and comment the statement above
    #from scipy.special import expit
    #h = expit(z)
    t1 = -1 * y * np.log(h)
    t2 = (y - 1) * np.log(1 - h)
    sum_ = np.sum(t1 + t2)
    J = sum_/m 
    return J
Gradient function

In [126]:
# function to calculate gradient
def dif_cost():
    n = len(theta)
    grad = np.zeros((n, 1))
    z = np.dot(X, theta)
    h = sigmoid(z)
    # if sigmoid function gives error uncomment the 2 statements below and comment the statement above
    #from scipy.special import expit
    #h = expit(z)
    for j in range(0, n):
        x = np.zeros((m,1))
        x[:,0] = X[:,j]
        t = (h - y) * x
        sum_ = np.sum(t)
        grad[j, 0] = sum_ / m
    return grad
Initialize theta values

In [127]:
theta = np.zeros((3, 1))
Train

In [128]:
no_iter = 300000
alpha = 0.00425
for i in range(1, no_iter):
    grad = dif_cost()
    theta = theta - alpha * grad
Theta values :

In [129]:
theta


Out[129]:
array([[-25.56299656],
       [  0.20626618],
       [  0.20150353]])
Cost :

In [130]:
cost()


Out[130]:
0.20843031693614494
Test

For a student with an Exam 1 score of 45 and an Exam 2 score of 85, probability of selection is


In [131]:
A = [1, 45, 85]
probability = np.dot(A, theta)
print(probability)


[ 0.84678197]
Visualizing the decision boundary

In [132]:
plot_x = [min(X[:,2])-2,  max(X[:,2])+2]
plot_y = (-1.0/theta[2,0]) * (np.multiply(theta[1,0],plot_x) + theta[0,0])
plt.plot(X[pos, 1], X[pos, 2], '+', label = "Admitted")
plt.plot(X[neg, 1], X[neg, 2], 'yo', label = "Not Admitted")
plt.plot(plot_x, plot_y)
plt.xlabel('Exam 1 score')
plt.ylabel('Exam 2 score')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()



In [ ]: