Assignment 4b

Regularised logistic regression

Predict whether microchips from a fabrication plant passes quality assurance



In [2]:

    
import numpy as np
data = np.genfromtxt('ex2data2.txt', delimiter = ",")
m = len(data[:,0])
y = data[:, 2].reshape((m, 1))
X = np.ones((m,3))
X[:, 1:] = data[:, 0:2]

Visualizing the data



In [3]:

    
pos = []
neg = []
for i in range(0, m):
    if y[i] == 1:
        pos.append(i)
    else:
        neg.append(i)
import matplotlib.pyplot as plt
plt.plot(X[pos, 1], X[pos, 2], '+', label = "y=1")
plt.plot(X[neg, 1], X[neg, 2], 'yo', label = "y=0")
plt.axis([-1, 1.5, -1, 1.25])

plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()

Feature Mapping

One way to fit the data better is to create more features from each data point. The function mapFeature, maps the features into all polynomial terms of x1 and x2 up to the sixth power. As a result of this mapping, our vector of two features (the scores on two QA tests) has been transformed into a 28-dimensional vector.



In [4]:

    
def mapFeature(X1, X2):
    if isinstance(X1, np.float64) :
        l = 1
    else:
        l = len(X1)
    X1.reshape((l, 1))
    X2.reshape((l, 1))
    degree = 6
    out = np.ones((l, 28))
    k = 1
    for i in range(1, degree + 1):
        for j in range(0, i + 1):
            out[:, k] = (np.power(X1, (i-j))) * np.power(X2, j)
            k = k + 1     
    return out



In [5]:

    
X = mapFeature(X[:, 1], X[:, 2])

Sigmoid function



In [6]:

    
def sigmoid(z):
    neg_z = np.negative(z)
    exp_z = np.exp(neg_z)
    den = np.add(1, exp_z)
    return np.reciprocal(den)

Cost function



In [7]:

    
def cost():
    z = np.dot(X, theta)
    h = sigmoid(z)
    # if sigmoid function gives error uncomment the 2 statements below and comment the statement above
    #from scipy.special import expit
    #h = expit(z)
    t1 = -1 * y * np.log(h)
    t2 = (y - 1) * np.log(1 - h)
    sum_ = np.sum(t1 + t2)
    sq_theta = theta[1:, 0] * theta[1:, 0]
    sum_sq_theta = np.sum(sq_theta)
    J = (sum_ + (lambda_ * sum_sq_theta)/2) / m 
    return J

Gradient function



In [8]:

    
def dif_cost():
    n = len(theta)
    grad = np.zeros((n, 1))
    new_grad = np.zeros((n, 1))
    z = np.dot(X, theta)
    h = sigmoid(z)
    # if sigmoid function gives error uncomment the 2 statements below and comment the statement above
    #from scipy.special import expit
    #h = expit(z)
    for j in range(0, n):
        x = np.zeros((m,1))
        x[:,0] = X[:,j]
        t = (h - y) * x 
        sum_ = np.sum(t)
        grad[j, 0] = sum_ / m
    new_grad[0,0] = grad[0,0] 
    new_grad[1:, 0] = grad[1:, 0] + lambda_ * theta[1:,0] / m   
    return new_grad

Initialize theta values and lambda



In [9]:

    
theta = np.zeros((28, 1))
lambda_ = 1

Training



In [10]:

    
no_iter = 1000
alpha = 1
for i in range(1, no_iter):
    grad = dif_cost()
    theta = theta - alpha * grad

Accuracy



In [13]:

    
z_pred = np.dot(X, theta)
pred = sigmoid(z_pred)
# if sigmoid function gives error uncomment the 2 statements below and comment the statement above
#from scipy.special import expit
#pred = expit(z_pred)
for i in range(0,m):
    if(pred[i, 0] >= 0.5):
        pred[i, 0] = 1
    else:
        pred[i, 0] = 0     
count = 0
for i in range(0,m):
    if(pred[i, 0] == y[i, 0]):
        count = count + 1
accuracy = 100 * count / m
print("Accuracy : {0:.2f} %".format(accuracy))









    



Accuracy : 83.05 %

Plotting decision boundary



In [14]:

    
u = np.linspace(-1, 1.5, 50)
v = np.linspace(-1, 1.5, 50)
z = np.zeros((len(u), len(v)))
for i in range(1, len(u)):
    for j in range(1, len(v)):
        z[i, j] = np.dot(mapFeature(u[i], v[j]), theta);
z = np.transpose(z) 
pos = []
neg = []
for i in range(0, m):
    if y[i] == 1:
        pos.append(i)
    else:
        neg.append(i)
import matplotlib.pyplot as plt
plt.plot(X[pos, 1], X[pos, 2], '+', label = "y=1")
plt.plot(X[neg, 1], X[neg, 2], 'yo', label = "y=0")
plt.contour(u, v, z, 0, colors = 'r')
plt.axis([-1, 1.5, -1, 1.25])

plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
plt.legend()
plt.show()



In [ ]: