In [3]:
import pandas as pd
import numpy as np
from matplotlib import pylab as plt
%matplotlib inline
df = pd.read_csv("data/iris.txt", delimiter=" ")
df.sample(6)


Out[3]:
sl sw pl pw c
145 6.7 3.0 5.2 2.3 3
63 6.1 2.9 4.7 1.4 2
147 6.5 3.0 5.2 2.0 3
2 4.7 3.2 1.3 0.2 1
141 6.9 3.1 5.1 2.3 3
47 4.6 3.2 1.4 0.2 1

In [15]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [321]:
class LogisticRegression(object):
    def __init__(self, X, Y, order=1):
        super(LogisticRegression, self).__init__()
        self.order = order
        self.X = X
        self.Y = Y
        self.train()
        
    @staticmethod
    def prepareOrderedX(X, order):
        return X
    
    def train(self):
        N = self.X.shape[0]
        D = self.X.shape[1]
        
        theta = np.ones((D, 1))
        eta = 0.001
        maxIteration = 500000
        prev_error = np.infty
        
        errors = []
        for i in range(maxIteration):
            pr = sigmoid(self.X * theta)
            theta = theta + eta * self.X.T * (self.Y - pr)
            
            error = np.power(self.Y - pr, 2).sum() / N
            errors.append(error)
            if (prev_error * 0.99999) < error:
                break
            prev_error = error

        self.error_history = errors
        self.theta = theta
    
    def predict(self, X):
        X = LogisticRegression.prepareOrderedX(X, self.order)
        return sigmoid(X * self.theta)

    def plotErrors(self):
        plt.plot(self.error_history)

In [322]:
def calcAccuracy(X, Y, model):
    y_est = model.predict(X)
    y_est2 = model.predict(X)
    y_est[y_est >= 0.5] = 1
    y_est[y_est < 0.5] = 0
    #print np.hstack([Y, y_est])
    ROC = {
        "truePositive": 0,
        "falsePositive": 0,
        "trueNegative": 0,
        "falseNegative": 0,
    }
    
    for i_est, i in zip(y_est, Y):
        if i == 1 and i_est == i:
            ROC["truePositive"] += 1 
        elif i_est == 1:
            ROC["falsePositive"] += 1
        elif i_est == 0 and i != 1:
            ROC["trueNegative"] += 1
        elif i_est == 0 and i == 1:
            ROC["falseNegative"] += 1
        
    print ROC

In [327]:
x = np.matrix(df.iloc[:, [0, 1, 2, 3]])
y = np.matrix(df.iloc[:, 4]).T
y1, y2, y3 = np.copy(y), np.copy(y), np.copy(y)

y1[ y1!= 1] = 0

y2[ y2 != 2] = 0
y2[ y2 == 2] = 1

y3[ y3 != 3] = 0
y3[ y3 == 3] = 1

In [328]:
m = LogisticRegression(x, y1, order=2)
calcAccuracy(x, y1, m)


{'falseNegative': 0, 'falsePositive': 0, 'truePositive': 50, 'trueNegative': 100}

In [329]:
m = LogisticRegression(x, y2, order=2)
calcAccuracy(x, y2, m)
m.theta


{'falseNegative': 50, 'falsePositive': 0, 'truePositive': 0, 'trueNegative': 100}
Out[329]:
matrix([[-0.49591048],
        [ 0.10889452],
        [ 0.12148996],
        [ 0.68384537]])

In [330]:
m = LogisticRegression(x, y3, order=2)
calcAccuracy(x, y3, m)


{'falseNegative': 0, 'falsePositive': 48, 'truePositive': 50, 'trueNegative': 52}

In [ ]:


In [ ]: