In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
In [3]:
datafile = 'ex2/ex2data1.txt'
data = np.loadtxt(datafile, delimiter=',', usecols=(0,1,2), unpack=True)
In [4]:
X = np.column_stack((data[0], data[1]))
y = data[2].reshape(100, 1)
In [5]:
# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]
In [6]:
# Split the targets into training/testing sets
y_train = y[:-20]
y_test = y[-20:]
In [7]:
# Create linear regression object
classifier = OneVsRestClassifier(LogisticRegression())
In [8]:
# Train the model using the training sets
classifier.fit(X_train, y_train)
Out[8]:
In [9]:
coef = classifier.coef_
intercept = classifier.intercept_
print "Coefficient: ", coef
print 'Intercept" ', intercept
print "Residual sum of squares: %.2f" % np.mean((classifier.predict(X_test) - y_test) ** 2)
print 'Variance score: %.2f' % classifier.score(X_test, y_test)
In [10]:
admitted = y == 1
rejected = y == 0
plt.figure(figsize=(7, 7))
plt.scatter(np.extract(admitted, data[0]),
np.extract(admitted, data[1]),
c='b', marker='+', label='admitted')
plt.scatter(np.extract(rejected, data[0]),
np.extract(rejected, data[1]),
c='y', marker='o', label='rejected')
plt.xlabel('Exam 1 score');
plt.ylabel('Exam 2 score');
plt.axes().set_aspect('equal', 'datalim')
plt.legend();
ex1 = np.linspace(30, 100, 100)
ex2 = -(coef[:, 0] * ex1 + intercept[:, 0]) / coef[:,1]
plt.plot(ex1, ex2, color='r', label='decision boundary');
In [11]:
theta = np.concatenate((intercept[0], coef[0]), axis=0)
freq = 1 / (1 + np.exp(-1 * np.dot(theta, [1, 45, 85])))
print "For a student with scores 45 and 85, we predict an admission probability of %f" % freq
In [12]:
datafile = 'ex2/ex2data2.txt'
data = np.loadtxt(datafile, delimiter=',', usecols=(0,1,2), unpack=True)
In [17]:
def map_features(f1, f2, order=1):
'''Map f1 and f2 to its higher order polynomial'''
assert order >= 1
def iter():
for i in range(1, order + 1):
for j in range(i + 1):
yield np.power(f1, i - j) * np.power(f2, j)
return np.vstack(iter())
In [29]:
out = map_features(data[0], data[1], order=6)
X = out.transpose()
y = data[2].reshape(118, 1)
In [30]:
# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]
In [31]:
# Split the targets into training/testing sets
y_train = y[:-20]
y_test = y[-20:]
In [36]:
# Logistic regression with lambda = 1 (C = 1/lambda)
classifier = OneVsRestClassifier(LogisticRegression(C=1))
# Train the model using the training sets
classifier.fit(X_train, y_train)
print 'Coefficents: ', classifier.coef_
print 'Intercept: ', classifier.intercept_
print 'Accuracy: ', classifier.score(X, y)
In [37]:
accepted = y == 1
rejected = y == 0
plt.figure(figsize=(6, 6))
plt.scatter(np.extract(accepted, data[0]),
np.extract(accepted, data[1]),
c='b', marker='+', label='accepted')
plt.scatter(np.extract(rejected, data[0]),
np.extract(rejected, data[1]),
c='y', marker='o', label='rejected')
plt.xlabel('Microchip Test 1');
plt.ylabel('Microchip Test 2');
plt.axes().set_aspect('equal', 'datalim')
plt.legend();
dim = np.linspace(-1, 1.5, 1000)
dx, dy = np.meshgrid(dim, dim)
v = map_features(dx.flatten(), dy.flatten(), order=6)
z = (np.dot(classifier.coef_, v) + classifier.intercept_).reshape(1000, 1000)
CS = plt.contour(dx, dy, z, levels=[0], colors=['r'])
In [ ]: