w3-practice-03--logistic-regression



In [79]:
import pandas as pd
import matplotlib.pyplot as plt
import math

In [80]:
data = pd.read_csv('resources/data-logistic.csv', header=None)

In [81]:
data.head()


Out[81]:
0 1 2
0 -1 -0.663827 -0.138526
1 1 1.994596 2.468025
2 -1 -1.247395 0.749425
3 1 2.309374 1.899836
4 1 0.849143 2.407750

In [82]:
plt.scatter(data[1], data[2], c=data[0])


Out[82]:
<matplotlib.collections.PathCollection at 0x1167065d0>

In [88]:
def functionW1(w1, w2, y, X, k, C):
    l = len(y)
    S = 0
    for i in xrange(0, l):
        S += y[i] * X[1][i] * (1.0 - 1.0 / (1.0 + math.exp(-y[i] * (w1*X[1][i] + w2*X[2][i]))))

    return w1 + (k * (1.0 / l) * S) - k * C * w1

def functionW2(w1, w2, y, X, k, C):
    l = len(y)
    S = 0
    for i in xrange(0, l):
        S += y[i] * X[2][i] * (1.0 - 1.0 / (1.0 + math.exp(-y[i] * (w1*X[1][i] + w2*X[2][i]))))

    return w2 + (k * (1.0 / l) * S) - k * C * w2

In [84]:
def grad(y, X, C=0.0, w1=0.0, w2=0.0, k=0.1, err=1e-5):
    i = 0
    i_max = 10000
    w1_new, w2_new = w1, w2

    while True:
        i += 1
        w1_new, w2_new = functionW1(w1, w2, y, X, k, C), functionW2(w1, w2, y, X, k, C)
        e = math.sqrt((w1_new - w1) ** 2 + (w2_new - w2) ** 2)

        if i >= i_max or e <= err:
            break
        else:
            w1, w2 = w1_new, w2_new

    return [w1_new, w2_new]

In [89]:
w1, w2 = grad(data[0], data.loc[:, 1:])
rw1, rw2 = grad(data[0], data.loc[:, 1:], 10.0)

In [90]:
def auc_roc(X, w1, w2):
    return 1.0 / (1.0 + math.exp(-w1 * X[1] - w2 * X[2]))

In [91]:
y_score = data.loc[:,1:].apply(lambda x: a(x, w1, w2), axis=1)
y_rscore = data.loc[:,1:].apply(lambda x: a(x, rw1, rw2), axis=1)

auc = roc_auc_score(y, y_score)
rauc = roc_auc_score(y, y_rscore)

print("{:0.3f} {:0.3f}".format(auc, rauc))


0.927 0.936