In [3]:
import pandas as pd
import numpy as np
from matplotlib import pylab as plt
%matplotlib inline
df = pd.read_csv("data/iris.txt", delimiter=" ")
df.sample(6)
Out[3]:
In [15]:
def sigmoid(x):
return 1 / (1 + np.exp(-x))
In [321]:
class LogisticRegression(object):
def __init__(self, X, Y, order=1):
super(LogisticRegression, self).__init__()
self.order = order
self.X = X
self.Y = Y
self.train()
@staticmethod
def prepareOrderedX(X, order):
return X
def train(self):
N = self.X.shape[0]
D = self.X.shape[1]
theta = np.ones((D, 1))
eta = 0.001
maxIteration = 500000
prev_error = np.infty
errors = []
for i in range(maxIteration):
pr = sigmoid(self.X * theta)
theta = theta + eta * self.X.T * (self.Y - pr)
error = np.power(self.Y - pr, 2).sum() / N
errors.append(error)
if (prev_error * 0.99999) < error:
break
prev_error = error
self.error_history = errors
self.theta = theta
def predict(self, X):
X = LogisticRegression.prepareOrderedX(X, self.order)
return sigmoid(X * self.theta)
def plotErrors(self):
plt.plot(self.error_history)
In [322]:
def calcAccuracy(X, Y, model):
y_est = model.predict(X)
y_est2 = model.predict(X)
y_est[y_est >= 0.5] = 1
y_est[y_est < 0.5] = 0
#print np.hstack([Y, y_est])
ROC = {
"truePositive": 0,
"falsePositive": 0,
"trueNegative": 0,
"falseNegative": 0,
}
for i_est, i in zip(y_est, Y):
if i == 1 and i_est == i:
ROC["truePositive"] += 1
elif i_est == 1:
ROC["falsePositive"] += 1
elif i_est == 0 and i != 1:
ROC["trueNegative"] += 1
elif i_est == 0 and i == 1:
ROC["falseNegative"] += 1
print ROC
In [327]:
x = np.matrix(df.iloc[:, [0, 1, 2, 3]])
y = np.matrix(df.iloc[:, 4]).T
y1, y2, y3 = np.copy(y), np.copy(y), np.copy(y)
y1[ y1!= 1] = 0
y2[ y2 != 2] = 0
y2[ y2 == 2] = 1
y3[ y3 != 3] = 0
y3[ y3 == 3] = 1
In [328]:
m = LogisticRegression(x, y1, order=2)
calcAccuracy(x, y1, m)
In [329]:
m = LogisticRegression(x, y2, order=2)
calcAccuracy(x, y2, m)
m.theta
Out[329]:
In [330]:
m = LogisticRegression(x, y3, order=2)
calcAccuracy(x, y3, m)
In [ ]:
In [ ]: