In [2]:
import numpy as np
data = np.genfromtxt('ex2data2.txt', delimiter = ",")
m = len(data[:,0])
y = data[:, 2].reshape((m, 1))
X = np.ones((m,3))
X[:, 1:] = data[:, 0:2]
In [3]:
pos = []
neg = []
for i in range(0, m):
if y[i] == 1:
pos.append(i)
else:
neg.append(i)
import matplotlib.pyplot as plt
plt.plot(X[pos, 1], X[pos, 2], '+', label = "y=1")
plt.plot(X[neg, 1], X[neg, 2], 'yo', label = "y=0")
plt.axis([-1, 1.5, -1, 1.25])
plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()
One way to fit the data better is to create more features from each data point. The function mapFeature, maps the features into all polynomial terms of x1 and x2 up to the sixth power. As a result of this mapping, our vector of two features (the scores on two QA tests) has been transformed into a 28-dimensional vector.
In [4]:
def mapFeature(X1, X2):
if isinstance(X1, np.float64) :
l = 1
else:
l = len(X1)
X1.reshape((l, 1))
X2.reshape((l, 1))
degree = 6
out = np.ones((l, 28))
k = 1
for i in range(1, degree + 1):
for j in range(0, i + 1):
out[:, k] = (np.power(X1, (i-j))) * np.power(X2, j)
k = k + 1
return out
In [5]:
X = mapFeature(X[:, 1], X[:, 2])
In [6]:
def sigmoid(z):
neg_z = np.negative(z)
exp_z = np.exp(neg_z)
den = np.add(1, exp_z)
return np.reciprocal(den)
In [7]:
def cost():
z = np.dot(X, theta)
h = sigmoid(z)
# if sigmoid function gives error uncomment the 2 statements below and comment the statement above
#from scipy.special import expit
#h = expit(z)
t1 = -1 * y * np.log(h)
t2 = (y - 1) * np.log(1 - h)
sum_ = np.sum(t1 + t2)
sq_theta = theta[1:, 0] * theta[1:, 0]
sum_sq_theta = np.sum(sq_theta)
J = (sum_ + (lambda_ * sum_sq_theta)/2) / m
return J
In [8]:
def dif_cost():
n = len(theta)
grad = np.zeros((n, 1))
new_grad = np.zeros((n, 1))
z = np.dot(X, theta)
h = sigmoid(z)
# if sigmoid function gives error uncomment the 2 statements below and comment the statement above
#from scipy.special import expit
#h = expit(z)
for j in range(0, n):
x = np.zeros((m,1))
x[:,0] = X[:,j]
t = (h - y) * x
sum_ = np.sum(t)
grad[j, 0] = sum_ / m
new_grad[0,0] = grad[0,0]
new_grad[1:, 0] = grad[1:, 0] + lambda_ * theta[1:,0] / m
return new_grad
In [9]:
theta = np.zeros((28, 1))
lambda_ = 1
In [10]:
no_iter = 1000
alpha = 1
for i in range(1, no_iter):
grad = dif_cost()
theta = theta - alpha * grad
In [13]:
z_pred = np.dot(X, theta)
pred = sigmoid(z_pred)
# if sigmoid function gives error uncomment the 2 statements below and comment the statement above
#from scipy.special import expit
#pred = expit(z_pred)
for i in range(0,m):
if(pred[i, 0] >= 0.5):
pred[i, 0] = 1
else:
pred[i, 0] = 0
count = 0
for i in range(0,m):
if(pred[i, 0] == y[i, 0]):
count = count + 1
accuracy = 100 * count / m
print("Accuracy : {0:.2f} %".format(accuracy))
In [14]:
u = np.linspace(-1, 1.5, 50)
v = np.linspace(-1, 1.5, 50)
z = np.zeros((len(u), len(v)))
for i in range(1, len(u)):
for j in range(1, len(v)):
z[i, j] = np.dot(mapFeature(u[i], v[j]), theta);
z = np.transpose(z)
pos = []
neg = []
for i in range(0, m):
if y[i] == 1:
pos.append(i)
else:
neg.append(i)
import matplotlib.pyplot as plt
plt.plot(X[pos, 1], X[pos, 2], '+', label = "y=1")
plt.plot(X[neg, 1], X[neg, 2], 'yo', label = "y=0")
plt.contour(u, v, z, 0, colors = 'r')
plt.axis([-1, 1.5, -1, 1.25])
plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
plt.legend()
plt.show()
In [ ]: