In [1]:
%load_ext autoreload

%autoreload 2

In [2]:
import sys

sys.path.append('..')

In [3]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, confusion_matrix

In [5]:
from vantgrd.datasets import read_pima_indians_diabetes, normalize_data
from vantgrd.logistic import LogisticRegressionWithAdadelta, LogisticRegressionWithAdagrad
from vantgrd.fm import FMWithAdagrad, FMWithSGD

In [6]:
X, y = read_pima_indians_diabetes('../data/diabetes/pima-indians-diabetes.data')
X = normalize_data(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# usage with normalized data
# lr = FMWithSGD(eta=0.0005, n_factors=2, epochs=50, rate=1000)
lr = FMWithAdagrad(eta=.01, k0=False, k1=True, regw=.01, regv=.01, n_factors=3, epochs=50, rate=500)
# lr = LogisticRegressionWithAdagrad(eta=0.25, epochs=50, rate=1000)
# lr = LogisticRegressionWithAdadelta(rho=0.8, epochs=50, regw=.001, rate=1000)
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)
print(classification_report(y_test, y_pred))

print(confusion_matrix(y_test, y_pred))

y_test_prob = lr.raw_predict(X_test)

fpr, tpr, thresholds = roc_curve(y_test, y_test_prob)

roc_auc = auc(fpr, tpr)

print("AUC = %f" % roc_auc)
print("Score = {}".format(lr.score(X, y)))

# for i in zip(y_test, y_test_prob):
#     p = 1. if i[1] >= 0.5 else 0.
#     print ','.join([str(x) for x in ['+' if i[0] == p else '-', i[0], p, i[1]]])


Epoch:   0 | Training Samples:       500 | Loss:     1355.75 | LossAdj:  2.71151 | Time taken:    0 seconds
Epoch:   1 | Training Samples:      1000 | Loss:     2600.16 | LossAdj:  2.60016 | Time taken:    0 seconds
Epoch:   2 | Training Samples:      1500 | Loss:     3848.62 | LossAdj:  2.56574 | Time taken:    0 seconds
Epoch:   3 | Training Samples:      2000 | Loss:     5080.42 | LossAdj:  2.54021 | Time taken:    0 seconds
Epoch:   4 | Training Samples:      2500 | Loss:     6138.23 | LossAdj:  2.45529 | Time taken:    0 seconds
Epoch:   5 | Training Samples:      3000 | Loss:     7355.83 | LossAdj:  2.45194 | Time taken:    0 seconds
Epoch:   6 | Training Samples:      3500 | Loss:     8486.60 | LossAdj:  2.42474 | Time taken:    0 seconds
Epoch:   6 | Training Samples:      4000 | Loss:     9666.19 | LossAdj:  2.41655 | Time taken:    0 seconds
Epoch:   7 | Training Samples:      4500 | Loss:    10800.03 | LossAdj:  2.40001 | Time taken:    0 seconds
Epoch:   8 | Training Samples:      5000 | Loss:    11962.75 | LossAdj:  2.39255 | Time taken:    0 seconds
Epoch:   9 | Training Samples:      5500 | Loss:    13275.89 | LossAdj:  2.41380 | Time taken:    0 seconds
Epoch:  10 | Training Samples:      6000 | Loss:    14411.82 | LossAdj:  2.40197 | Time taken:    0 seconds
Epoch:  11 | Training Samples:      6500 | Loss:    15805.24 | LossAdj:  2.43158 | Time taken:    0 seconds
Epoch:  12 | Training Samples:      7000 | Loss:    16955.23 | LossAdj:  2.42218 | Time taken:    0 seconds
Epoch:  13 | Training Samples:      7500 | Loss:    18346.26 | LossAdj:  2.44617 | Time taken:    0 seconds
Epoch:  13 | Training Samples:      8000 | Loss:    19597.54 | LossAdj:  2.44969 | Time taken:    0 seconds
Epoch:  14 | Training Samples:      8500 | Loss:    20850.79 | LossAdj:  2.45303 | Time taken:    0 seconds
Epoch:  15 | Training Samples:      9000 | Loss:    22277.04 | LossAdj:  2.47523 | Time taken:    0 seconds
Epoch:  16 | Training Samples:      9500 | Loss:    23549.15 | LossAdj:  2.47886 | Time taken:    0 seconds
Epoch:  17 | Training Samples:     10000 | Loss:    24754.79 | LossAdj:  2.47548 | Time taken:    0 seconds
Epoch:  18 | Training Samples:     10500 | Loss:    26464.51 | LossAdj:  2.52043 | Time taken:    0 seconds
Epoch:  19 | Training Samples:     11000 | Loss:    27786.66 | LossAdj:  2.52606 | Time taken:    0 seconds
Epoch:  19 | Training Samples:     11500 | Loss:    29135.97 | LossAdj:  2.53356 | Time taken:    0 seconds
Epoch:  20 | Training Samples:     12000 | Loss:    30354.94 | LossAdj:  2.52958 | Time taken:    0 seconds
Epoch:  21 | Training Samples:     12500 | Loss:    31835.55 | LossAdj:  2.54684 | Time taken:    0 seconds
Epoch:  22 | Training Samples:     13000 | Loss:    33264.28 | LossAdj:  2.55879 | Time taken:    0 seconds
Epoch:  23 | Training Samples:     13500 | Loss:    34636.78 | LossAdj:  2.56569 | Time taken:    0 seconds
Epoch:  24 | Training Samples:     14000 | Loss:    36007.65 | LossAdj:  2.57198 | Time taken:    0 seconds
Epoch:  25 | Training Samples:     14500 | Loss:    37457.89 | LossAdj:  2.58330 | Time taken:    0 seconds
Epoch:  26 | Training Samples:     15000 | Loss:    38918.02 | LossAdj:  2.59453 | Time taken:    0 seconds
Epoch:  26 | Training Samples:     15500 | Loss:    40235.99 | LossAdj:  2.59587 | Time taken:    0 seconds
Epoch:  27 | Training Samples:     16000 | Loss:    41675.28 | LossAdj:  2.60471 | Time taken:    0 seconds
Epoch:  28 | Training Samples:     16500 | Loss:    43023.74 | LossAdj:  2.60750 | Time taken:    0 seconds
Epoch:  29 | Training Samples:     17000 | Loss:    44293.53 | LossAdj:  2.60550 | Time taken:    0 seconds
Epoch:  30 | Training Samples:     17500 | Loss:    45883.29 | LossAdj:  2.62190 | Time taken:    0 seconds
Epoch:  31 | Training Samples:     18000 | Loss:    47344.47 | LossAdj:  2.63025 | Time taken:    0 seconds
Epoch:  32 | Training Samples:     18500 | Loss:    48713.99 | LossAdj:  2.63319 | Time taken:    0 seconds
Epoch:  32 | Training Samples:     19000 | Loss:    50128.57 | LossAdj:  2.63835 | Time taken:    0 seconds
Epoch:  33 | Training Samples:     19500 | Loss:    51598.75 | LossAdj:  2.64609 | Time taken:    0 seconds
Epoch:  34 | Training Samples:     20000 | Loss:    53024.68 | LossAdj:  2.65123 | Time taken:    0 seconds
Epoch:  35 | Training Samples:     20500 | Loss:    54427.68 | LossAdj:  2.65501 | Time taken:    0 seconds
Epoch:  36 | Training Samples:     21000 | Loss:    55757.23 | LossAdj:  2.65511 | Time taken:    0 seconds
Epoch:  37 | Training Samples:     21500 | Loss:    57077.26 | LossAdj:  2.65476 | Time taken:    0 seconds
Epoch:  38 | Training Samples:     22000 | Loss:    58635.97 | LossAdj:  2.66527 | Time taken:    0 seconds
Epoch:  39 | Training Samples:     22500 | Loss:    60116.88 | LossAdj:  2.67186 | Time taken:    0 seconds
Epoch:  39 | Training Samples:     23000 | Loss:    61562.03 | LossAdj:  2.67661 | Time taken:    0 seconds
Epoch:  40 | Training Samples:     23500 | Loss:    63060.58 | LossAdj:  2.68343 | Time taken:    0 seconds
Epoch:  41 | Training Samples:     24000 | Loss:    64460.54 | LossAdj:  2.68586 | Time taken:    0 seconds
Epoch:  42 | Training Samples:     24500 | Loss:    66018.22 | LossAdj:  2.69462 | Time taken:    0 seconds
Epoch:  43 | Training Samples:     25000 | Loss:    67458.36 | LossAdj:  2.69833 | Time taken:    0 seconds
Epoch:  44 | Training Samples:     25500 | Loss:    68908.12 | LossAdj:  2.70228 | Time taken:    0 seconds
Epoch:  45 | Training Samples:     26000 | Loss:    70338.26 | LossAdj:  2.70532 | Time taken:    0 seconds
Epoch:  46 | Training Samples:     26500 | Loss:    71840.71 | LossAdj:  2.71097 | Time taken:    0 seconds
Epoch:  46 | Training Samples:     27000 | Loss:    73277.26 | LossAdj:  2.71397 | Time taken:    0 seconds
Epoch:  47 | Training Samples:     27500 | Loss:    74799.80 | LossAdj:  2.71999 | Time taken:    0 seconds
Epoch:  48 | Training Samples:     28000 | Loss:    76406.31 | LossAdj:  2.72880 | Time taken:    0 seconds
Epoch:  49 | Training Samples:     28500 | Loss:    77841.92 | LossAdj:  2.73130 | Time taken:    0 seconds
 --- TRAINING FINISHED IN 4 SECONDS WITH LOSS 2.73 ---
              precision    recall  f1-score   support

         0.0       0.84      0.63      0.72       123
         1.0       0.55      0.78      0.64        69

    accuracy                           0.69       192
   macro avg       0.69      0.71      0.68       192
weighted avg       0.73      0.69      0.69       192

[[78 45]
 [15 54]]
AUC = 0.795805
Score = 0.7421875

In [7]:
fig = plt.figure()
ax = fig.add_subplot(1, 2, 1)
ax.plot(fpr, tpr, color='blue', label='ROC area = %0.2f' % roc_auc)
ax.set_xlabel("False positive rate")
ax.set_ylabel("True positive rate")
ax.grid()

bx = fig.add_subplot(1, 2, 2)
bx.scatter(y_test_prob, y_test, s=5, alpha=0.10, color='blue')
bx.set_xlabel("Output Probability")
bx.set_ylabel("Target Variable")
bx.grid()

plt.show()



In [ ]: