In [79]:
import pandas as pd
import matplotlib.pyplot as plt
import math
In [80]:
data = pd.read_csv('resources/data-logistic.csv', header=None)
In [81]:
data.head()
Out[81]:
In [82]:
plt.scatter(data[1], data[2], c=data[0])
Out[82]:
In [88]:
def functionW1(w1, w2, y, X, k, C):
l = len(y)
S = 0
for i in xrange(0, l):
S += y[i] * X[1][i] * (1.0 - 1.0 / (1.0 + math.exp(-y[i] * (w1*X[1][i] + w2*X[2][i]))))
return w1 + (k * (1.0 / l) * S) - k * C * w1
def functionW2(w1, w2, y, X, k, C):
l = len(y)
S = 0
for i in xrange(0, l):
S += y[i] * X[2][i] * (1.0 - 1.0 / (1.0 + math.exp(-y[i] * (w1*X[1][i] + w2*X[2][i]))))
return w2 + (k * (1.0 / l) * S) - k * C * w2
In [84]:
def grad(y, X, C=0.0, w1=0.0, w2=0.0, k=0.1, err=1e-5):
i = 0
i_max = 10000
w1_new, w2_new = w1, w2
while True:
i += 1
w1_new, w2_new = functionW1(w1, w2, y, X, k, C), functionW2(w1, w2, y, X, k, C)
e = math.sqrt((w1_new - w1) ** 2 + (w2_new - w2) ** 2)
if i >= i_max or e <= err:
break
else:
w1, w2 = w1_new, w2_new
return [w1_new, w2_new]
In [89]:
w1, w2 = grad(data[0], data.loc[:, 1:])
rw1, rw2 = grad(data[0], data.loc[:, 1:], 10.0)
In [90]:
def auc_roc(X, w1, w2):
return 1.0 / (1.0 + math.exp(-w1 * X[1] - w2 * X[2]))
In [91]:
y_score = data.loc[:,1:].apply(lambda x: a(x, w1, w2), axis=1)
y_rscore = data.loc[:,1:].apply(lambda x: a(x, rw1, rw2), axis=1)
auc = roc_auc_score(y, y_score)
rauc = roc_auc_score(y, y_rscore)
print("{:0.3f} {:0.3f}".format(auc, rauc))