In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score\
    ,fbeta_score,classification_report,confusion_matrix,precision_recall_curve,roc_auc_score\
    ,roc_curve, auc

Data observation


In [2]:
filepath = '/Users/mac/Desktop/Kaggle_datasets/HR_analytics/'
filename01 = 'HR_comma_sep.csv'

df_full = pd.read_csv(os.path.join(filepath, filename01))

In [3]:
df_full #target是目標是否left


Out[3]:
satisfaction_level last_evaluation number_project average_montly_hours time_spend_company Work_accident left promotion_last_5years sales salary
0 0.38 0.53 2 157 3 0 1 0 sales low
1 0.80 0.86 5 262 6 0 1 0 sales medium
2 0.11 0.88 7 272 4 0 1 0 sales medium
3 0.72 0.87 5 223 5 0 1 0 sales low
4 0.37 0.52 2 159 3 0 1 0 sales low
5 0.41 0.50 2 153 3 0 1 0 sales low
6 0.10 0.77 6 247 4 0 1 0 sales low
7 0.92 0.85 5 259 5 0 1 0 sales low
8 0.89 1.00 5 224 5 0 1 0 sales low
9 0.42 0.53 2 142 3 0 1 0 sales low
10 0.45 0.54 2 135 3 0 1 0 sales low
11 0.11 0.81 6 305 4 0 1 0 sales low
12 0.84 0.92 4 234 5 0 1 0 sales low
13 0.41 0.55 2 148 3 0 1 0 sales low
14 0.36 0.56 2 137 3 0 1 0 sales low
15 0.38 0.54 2 143 3 0 1 0 sales low
16 0.45 0.47 2 160 3 0 1 0 sales low
17 0.78 0.99 4 255 6 0 1 0 sales low
18 0.45 0.51 2 160 3 1 1 1 sales low
19 0.76 0.89 5 262 5 0 1 0 sales low
20 0.11 0.83 6 282 4 0 1 0 sales low
21 0.38 0.55 2 147 3 0 1 0 sales low
22 0.09 0.95 6 304 4 0 1 0 sales low
23 0.46 0.57 2 139 3 0 1 0 sales low
24 0.40 0.53 2 158 3 0 1 0 sales low
25 0.89 0.92 5 242 5 0 1 0 sales low
26 0.82 0.87 4 239 5 0 1 0 sales low
27 0.40 0.49 2 135 3 0 1 0 sales low
28 0.41 0.46 2 128 3 0 1 0 accounting low
29 0.38 0.50 2 132 3 0 1 0 accounting low
... ... ... ... ... ... ... ... ... ... ...
14969 0.43 0.46 2 157 3 0 1 0 sales medium
14970 0.78 0.93 4 225 5 0 1 0 sales medium
14971 0.39 0.45 2 140 3 0 1 0 sales medium
14972 0.11 0.97 6 310 4 0 1 0 accounting medium
14973 0.36 0.52 2 143 3 0 1 0 accounting medium
14974 0.36 0.54 2 153 3 0 1 0 accounting medium
14975 0.10 0.79 7 310 4 0 1 0 hr medium
14976 0.40 0.47 2 136 3 0 1 0 hr medium
14977 0.81 0.85 4 251 6 0 1 0 hr medium
14978 0.40 0.47 2 144 3 0 1 0 hr medium
14979 0.09 0.93 6 296 4 0 1 0 technical medium
14980 0.76 0.89 5 238 5 0 1 0 technical high
14981 0.73 0.93 5 162 4 0 1 0 technical low
14982 0.38 0.49 2 137 3 0 1 0 technical medium
14983 0.72 0.84 5 257 5 0 1 0 technical medium
14984 0.40 0.56 2 148 3 0 1 0 technical medium
14985 0.91 0.99 5 254 5 0 1 0 technical medium
14986 0.85 0.85 4 247 6 0 1 0 technical low
14987 0.90 0.70 5 206 4 0 1 0 technical low
14988 0.46 0.55 2 145 3 0 1 0 technical low
14989 0.43 0.57 2 159 3 1 1 0 technical low
14990 0.89 0.88 5 228 5 1 1 0 support low
14991 0.09 0.81 6 257 4 0 1 0 support low
14992 0.40 0.48 2 155 3 0 1 0 support low
14993 0.76 0.83 6 293 6 0 1 0 support low
14994 0.40 0.57 2 151 3 0 1 0 support low
14995 0.37 0.48 2 160 3 0 1 0 support low
14996 0.37 0.53 2 143 3 0 1 0 support low
14997 0.11 0.96 6 280 4 0 1 0 support low
14998 0.37 0.52 2 158 3 0 1 0 support low

14999 rows × 10 columns


In [4]:
df_full.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14999 entries, 0 to 14998
Data columns (total 10 columns):
satisfaction_level       14999 non-null float64
last_evaluation          14999 non-null float64
number_project           14999 non-null int64
average_montly_hours     14999 non-null int64
time_spend_company       14999 non-null int64
Work_accident            14999 non-null int64
left                     14999 non-null int64
promotion_last_5years    14999 non-null int64
sales                    14999 non-null object
salary                   14999 non-null object
dtypes: float64(2), int64(6), object(2)
memory usage: 1.1+ MB

In [5]:
df_full.columns


Out[5]:
Index(['satisfaction_level', 'last_evaluation', 'number_project',
       'average_montly_hours', 'time_spend_company', 'Work_accident', 'left',
       'promotion_last_5years', 'sales', 'salary'],
      dtype='object')

In [6]:
df_full['left'].value_counts() #這個是target


Out[6]:
0    11428
1     3571
Name: left, dtype: int64

In [7]:
df_dum = pd.get_dummies(df_full)
df_dum.head()


Out[7]:
satisfaction_level last_evaluation number_project average_montly_hours time_spend_company Work_accident left promotion_last_5years sales_IT sales_RandD ... sales_hr sales_management sales_marketing sales_product_mng sales_sales sales_support sales_technical salary_high salary_low salary_medium
0 0.38 0.53 2 157 3 0 1 0 0 0 ... 0 0 0 0 1 0 0 0 1 0
1 0.80 0.86 5 262 6 0 1 0 0 0 ... 0 0 0 0 1 0 0 0 0 1
2 0.11 0.88 7 272 4 0 1 0 0 0 ... 0 0 0 0 1 0 0 0 0 1
3 0.72 0.87 5 223 5 0 1 0 0 0 ... 0 0 0 0 1 0 0 0 1 0
4 0.37 0.52 2 159 3 0 1 0 0 0 ... 0 0 0 0 1 0 0 0 1 0

5 rows × 21 columns


In [8]:
k = 20 #number of variables for heatmap
corrmat = df_dum.corr()
cols = corrmat.nlargest(k, 'left')['left'].index
cm = np.corrcoef(df_dum[cols].values.T)

plt.figure(figsize=(15,15)) #可以調整大小

sns.set(font_scale=1.25)
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10},
                 yticklabels = cols.values, xticklabels = cols.values, cmap='rainbow')
plt.show()



In [9]:
sns.pairplot(df_full) #看不出以什麼意義的圖XD
plt.show()



In [14]:
# 直接瀏覽全部的numeric column和left的關係

cols = ['satisfaction_level', 'last_evaluation', 'number_project',
       'average_montly_hours', 'time_spend_company', 'Work_accident',
       'promotion_last_5years']

for col in cols:
    #Stratified
    facet = sns.FacetGrid(df_full, hue='left', aspect=4, size=4)
    facet.map(sns.kdeplot, col ,shade= True)
    facet.set()
    facet.add_legend()
    plt.show()


Data preprocessing: 洗牌,切feature/label,切train/test


In [15]:
from sklearn.utils import shuffle

shuffle_df = shuffle(df_dum, random_state=42)

df_label = shuffle_df['left']
df_feature = shuffle_df.drop('left', axis=1)

In [16]:
train_feature = np.array(df_feature.values[:10000,:])
train_label = np.array(df_label.values[:10000])
test_feature = np.array(df_feature.values[10000:,:])
test_label = np.array(df_label.values[10000:])

In [32]:
train_feature.shape


Out[32]:
(10000, 20)

In [33]:
train_label.shape


Out[33]:
(10000,)

In [17]:
scaler = MinMaxScaler()

scaler.fit(train_feature)
train_feature_trans = scaler.transform(train_feature)
test_feature_trans = scaler.transform(test_feature)

Keras MLP model: binary classification


In [33]:
######################### 建立模型
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

import matplotlib.pyplot as plt 
def show_train_history(train_history,train,validation):
    plt.plot(train_history.history[train])
    plt.plot(train_history.history[validation])
    plt.title('Train History')
    plt.ylabel(train)
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='best')
    plt.show()

model = Sequential() #一層一層到底,按順序

#輸入層(隱藏層1)
model.add(Dense(units=200, 
                input_dim=20, 
                kernel_initializer='uniform', 
                activation='relu'))
model.add(Dropout(0.5))

#隱藏層2,不用寫input_dim,因為就是前一層的units
model.add(Dense(units=200,  
                kernel_initializer='uniform', 
                activation='relu'))
model.add(Dropout(0.5))

#輸出層
model.add(Dense(units=1, #輸出一個數字 
                kernel_initializer='uniform',
                activation='sigmoid'))

print(model.summary()) #可以清楚看到model還有參數數量

model.compile(loss='binary_crossentropy',   #二元用binary
              optimizer='adam', metrics=['accuracy'])

train_history = model.fit(x=train_feature_trans, y=train_label,  #上面多分割一步在keras是內建的
                          validation_split=0.8, epochs=300, 
                          batch_size=2000, verbose=1) #verbose=2表示顯示訓練過程

######################### 訓練過程視覺化
show_train_history(train_history,'acc','val_acc')
show_train_history(train_history,'loss','val_loss')


######################### 實際測驗得分
scores = model.evaluate(test_feature_trans, test_label)
print('\n')
print('accuracy=',scores[1])

######################### 紀錄模型預測情形(答案卷)
prediction = model.predict_classes(test_feature_trans)

#儲存訓練結果
#model.save_weights("Keras_HRanalytics_MLP.h5")
#print('model saved to disk')


Using TensorFlow backend.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 200)               4200      
_________________________________________________________________
dropout_1 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 200)               40200     
_________________________________________________________________
dropout_2 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 201       
=================================================================
Total params: 44,601
Trainable params: 44,601
Non-trainable params: 0
_________________________________________________________________
None
Train on 1999 samples, validate on 8001 samples
Epoch 1/300
1999/1999 [==============================] - 0s - loss: 0.6939 - acc: 0.4357 - val_loss: 0.6881 - val_acc: 0.7610
Epoch 2/300
1999/1999 [==============================] - 0s - loss: 0.6882 - acc: 0.7574 - val_loss: 0.6836 - val_acc: 0.7610
Epoch 3/300
1999/1999 [==============================] - 0s - loss: 0.6835 - acc: 0.7634 - val_loss: 0.6781 - val_acc: 0.7610
Epoch 4/300
1999/1999 [==============================] - 0s - loss: 0.6780 - acc: 0.7634 - val_loss: 0.6725 - val_acc: 0.7610
Epoch 5/300
1999/1999 [==============================] - 0s - loss: 0.6723 - acc: 0.7634 - val_loss: 0.6660 - val_acc: 0.7610
Epoch 6/300
1999/1999 [==============================] - 0s - loss: 0.6658 - acc: 0.7634 - val_loss: 0.6584 - val_acc: 0.7610
Epoch 7/300
1999/1999 [==============================] - 0s - loss: 0.6577 - acc: 0.7634 - val_loss: 0.6501 - val_acc: 0.7610
Epoch 8/300
1999/1999 [==============================] - 0s - loss: 0.6497 - acc: 0.7634 - val_loss: 0.6408 - val_acc: 0.7610
Epoch 9/300
1999/1999 [==============================] - 0s - loss: 0.6405 - acc: 0.7634 - val_loss: 0.6306 - val_acc: 0.7610
Epoch 10/300
1999/1999 [==============================] - 0s - loss: 0.6308 - acc: 0.7634 - val_loss: 0.6194 - val_acc: 0.7610
Epoch 11/300
1999/1999 [==============================] - 0s - loss: 0.6198 - acc: 0.7634 - val_loss: 0.6076 - val_acc: 0.7610
Epoch 12/300
1999/1999 [==============================] - 0s - loss: 0.6071 - acc: 0.7634 - val_loss: 0.5954 - val_acc: 0.7610
Epoch 13/300
1999/1999 [==============================] - 0s - loss: 0.5948 - acc: 0.7634 - val_loss: 0.5832 - val_acc: 0.7610
Epoch 14/300
1999/1999 [==============================] - 0s - loss: 0.5834 - acc: 0.7634 - val_loss: 0.5714 - val_acc: 0.7610
Epoch 15/300
1999/1999 [==============================] - 0s - loss: 0.5691 - acc: 0.7634 - val_loss: 0.5607 - val_acc: 0.7610
Epoch 16/300
1999/1999 [==============================] - 0s - loss: 0.5617 - acc: 0.7634 - val_loss: 0.5519 - val_acc: 0.7610
Epoch 17/300
1999/1999 [==============================] - 0s - loss: 0.5512 - acc: 0.7634 - val_loss: 0.5456 - val_acc: 0.7610
Epoch 18/300
1999/1999 [==============================] - 0s - loss: 0.5453 - acc: 0.7634 - val_loss: 0.5422 - val_acc: 0.7610
Epoch 19/300
1999/1999 [==============================] - 0s - loss: 0.5386 - acc: 0.7634 - val_loss: 0.5417 - val_acc: 0.7610
Epoch 20/300
1999/1999 [==============================] - 0s - loss: 0.5416 - acc: 0.7634 - val_loss: 0.5433 - val_acc: 0.7610
Epoch 21/300
1999/1999 [==============================] - 0s - loss: 0.5422 - acc: 0.7634 - val_loss: 0.5457 - val_acc: 0.7610
Epoch 22/300
1999/1999 [==============================] - 0s - loss: 0.5463 - acc: 0.7634 - val_loss: 0.5474 - val_acc: 0.7610
Epoch 23/300
1999/1999 [==============================] - 0s - loss: 0.5423 - acc: 0.7634 - val_loss: 0.5478 - val_acc: 0.7610
Epoch 24/300
1999/1999 [==============================] - 0s - loss: 0.5441 - acc: 0.7634 - val_loss: 0.5465 - val_acc: 0.7610
Epoch 25/300
1999/1999 [==============================] - 0s - loss: 0.5441 - acc: 0.7634 - val_loss: 0.5436 - val_acc: 0.7610
Epoch 26/300
1999/1999 [==============================] - 0s - loss: 0.5412 - acc: 0.7634 - val_loss: 0.5398 - val_acc: 0.7610
Epoch 27/300
1999/1999 [==============================] - 0s - loss: 0.5378 - acc: 0.7634 - val_loss: 0.5356 - val_acc: 0.7610
Epoch 28/300
1999/1999 [==============================] - 0s - loss: 0.5293 - acc: 0.7634 - val_loss: 0.5314 - val_acc: 0.7610
Epoch 29/300
1999/1999 [==============================] - 0s - loss: 0.5266 - acc: 0.7634 - val_loss: 0.5277 - val_acc: 0.7610
Epoch 30/300
1999/1999 [==============================] - 0s - loss: 0.5211 - acc: 0.7634 - val_loss: 0.5247 - val_acc: 0.7610
Epoch 31/300
1999/1999 [==============================] - 0s - loss: 0.5205 - acc: 0.7634 - val_loss: 0.5223 - val_acc: 0.7610
Epoch 32/300
1999/1999 [==============================] - 0s - loss: 0.5183 - acc: 0.7634 - val_loss: 0.5204 - val_acc: 0.7610
Epoch 33/300
1999/1999 [==============================] - 0s - loss: 0.5163 - acc: 0.7634 - val_loss: 0.5190 - val_acc: 0.7610
Epoch 34/300
1999/1999 [==============================] - 0s - loss: 0.5165 - acc: 0.7634 - val_loss: 0.5178 - val_acc: 0.7610
Epoch 35/300
1999/1999 [==============================] - 0s - loss: 0.5133 - acc: 0.7634 - val_loss: 0.5166 - val_acc: 0.7610
Epoch 36/300
1999/1999 [==============================] - 0s - loss: 0.5119 - acc: 0.7634 - val_loss: 0.5154 - val_acc: 0.7610
Epoch 37/300
1999/1999 [==============================] - 0s - loss: 0.5100 - acc: 0.7634 - val_loss: 0.5139 - val_acc: 0.7610
Epoch 38/300
1999/1999 [==============================] - 0s - loss: 0.5084 - acc: 0.7634 - val_loss: 0.5123 - val_acc: 0.7610
Epoch 39/300
1999/1999 [==============================] - 0s - loss: 0.5093 - acc: 0.7634 - val_loss: 0.5105 - val_acc: 0.7610
Epoch 40/300
1999/1999 [==============================] - 0s - loss: 0.5060 - acc: 0.7634 - val_loss: 0.5085 - val_acc: 0.7610
Epoch 41/300
1999/1999 [==============================] - 0s - loss: 0.5043 - acc: 0.7634 - val_loss: 0.5066 - val_acc: 0.7610
Epoch 42/300
1999/1999 [==============================] - 0s - loss: 0.5002 - acc: 0.7634 - val_loss: 0.5047 - val_acc: 0.7610
Epoch 43/300
1999/1999 [==============================] - 0s - loss: 0.4981 - acc: 0.7634 - val_loss: 0.5030 - val_acc: 0.7610
Epoch 44/300
1999/1999 [==============================] - 0s - loss: 0.4958 - acc: 0.7634 - val_loss: 0.5014 - val_acc: 0.7610
Epoch 45/300
1999/1999 [==============================] - 0s - loss: 0.4936 - acc: 0.7634 - val_loss: 0.5000 - val_acc: 0.7610
Epoch 46/300
1999/1999 [==============================] - 0s - loss: 0.4949 - acc: 0.7634 - val_loss: 0.4986 - val_acc: 0.7610
Epoch 47/300
1999/1999 [==============================] - 0s - loss: 0.4888 - acc: 0.7634 - val_loss: 0.4973 - val_acc: 0.7610
Epoch 48/300
1999/1999 [==============================] - 0s - loss: 0.4913 - acc: 0.7634 - val_loss: 0.4959 - val_acc: 0.7610
Epoch 49/300
1999/1999 [==============================] - 0s - loss: 0.4853 - acc: 0.7634 - val_loss: 0.4944 - val_acc: 0.7610
Epoch 50/300
1999/1999 [==============================] - 0s - loss: 0.4855 - acc: 0.7634 - val_loss: 0.4927 - val_acc: 0.7610
Epoch 51/300
1999/1999 [==============================] - 0s - loss: 0.4841 - acc: 0.7634 - val_loss: 0.4909 - val_acc: 0.7610
Epoch 52/300
1999/1999 [==============================] - 0s - loss: 0.4816 - acc: 0.7634 - val_loss: 0.4890 - val_acc: 0.7610
Epoch 53/300
1999/1999 [==============================] - 0s - loss: 0.4800 - acc: 0.7634 - val_loss: 0.4870 - val_acc: 0.7610
Epoch 54/300
1999/1999 [==============================] - 0s - loss: 0.4734 - acc: 0.7634 - val_loss: 0.4850 - val_acc: 0.7610
Epoch 55/300
1999/1999 [==============================] - 0s - loss: 0.4776 - acc: 0.7634 - val_loss: 0.4830 - val_acc: 0.7610
Epoch 56/300
1999/1999 [==============================] - 0s - loss: 0.4750 - acc: 0.7634 - val_loss: 0.4810 - val_acc: 0.7610
Epoch 57/300
1999/1999 [==============================] - 0s - loss: 0.4720 - acc: 0.7634 - val_loss: 0.4790 - val_acc: 0.7610
Epoch 58/300
1999/1999 [==============================] - 0s - loss: 0.4700 - acc: 0.7634 - val_loss: 0.4770 - val_acc: 0.7610
Epoch 59/300
1999/1999 [==============================] - 0s - loss: 0.4638 - acc: 0.7634 - val_loss: 0.4750 - val_acc: 0.7610
Epoch 60/300
1999/1999 [==============================] - 0s - loss: 0.4641 - acc: 0.7634 - val_loss: 0.4730 - val_acc: 0.7610
Epoch 61/300
1999/1999 [==============================] - 0s - loss: 0.4649 - acc: 0.7634 - val_loss: 0.4709 - val_acc: 0.7610
Epoch 62/300
1999/1999 [==============================] - 0s - loss: 0.4636 - acc: 0.7634 - val_loss: 0.4689 - val_acc: 0.7610
Epoch 63/300
1999/1999 [==============================] - 0s - loss: 0.4609 - acc: 0.7634 - val_loss: 0.4669 - val_acc: 0.7610
Epoch 64/300
1999/1999 [==============================] - 0s - loss: 0.4592 - acc: 0.7634 - val_loss: 0.4649 - val_acc: 0.7610
Epoch 65/300
1999/1999 [==============================] - 0s - loss: 0.4564 - acc: 0.7634 - val_loss: 0.4629 - val_acc: 0.7610
Epoch 66/300
1999/1999 [==============================] - 0s - loss: 0.4540 - acc: 0.7634 - val_loss: 0.4609 - val_acc: 0.7610
Epoch 67/300
1999/1999 [==============================] - 0s - loss: 0.4537 - acc: 0.7634 - val_loss: 0.4589 - val_acc: 0.7610
Epoch 68/300
1999/1999 [==============================] - 0s - loss: 0.4505 - acc: 0.7634 - val_loss: 0.4568 - val_acc: 0.7610
Epoch 69/300
1999/1999 [==============================] - 0s - loss: 0.4471 - acc: 0.7634 - val_loss: 0.4547 - val_acc: 0.7610
Epoch 70/300
1999/1999 [==============================] - 0s - loss: 0.4493 - acc: 0.7634 - val_loss: 0.4526 - val_acc: 0.7610
Epoch 71/300
1999/1999 [==============================] - 0s - loss: 0.4438 - acc: 0.7634 - val_loss: 0.4504 - val_acc: 0.7610
Epoch 72/300
1999/1999 [==============================] - 0s - loss: 0.4416 - acc: 0.7634 - val_loss: 0.4483 - val_acc: 0.7610
Epoch 73/300
1999/1999 [==============================] - 0s - loss: 0.4400 - acc: 0.7634 - val_loss: 0.4461 - val_acc: 0.7610
Epoch 74/300
1999/1999 [==============================] - 0s - loss: 0.4371 - acc: 0.7634 - val_loss: 0.4440 - val_acc: 0.7610
Epoch 75/300
1999/1999 [==============================] - 0s - loss: 0.4350 - acc: 0.7639 - val_loss: 0.4419 - val_acc: 0.7610
Epoch 76/300
1999/1999 [==============================] - 0s - loss: 0.4346 - acc: 0.7639 - val_loss: 0.4399 - val_acc: 0.7610
Epoch 77/300
1999/1999 [==============================] - 0s - loss: 0.4329 - acc: 0.7634 - val_loss: 0.4379 - val_acc: 0.7610
Epoch 78/300
1999/1999 [==============================] - 0s - loss: 0.4291 - acc: 0.7644 - val_loss: 0.4359 - val_acc: 0.7610
Epoch 79/300
1999/1999 [==============================] - 0s - loss: 0.4285 - acc: 0.7664 - val_loss: 0.4339 - val_acc: 0.7610
Epoch 80/300
1999/1999 [==============================] - 0s - loss: 0.4240 - acc: 0.7674 - val_loss: 0.4319 - val_acc: 0.7610
Epoch 81/300
1999/1999 [==============================] - 0s - loss: 0.4263 - acc: 0.7674 - val_loss: 0.4299 - val_acc: 0.7607
Epoch 82/300
1999/1999 [==============================] - 0s - loss: 0.4222 - acc: 0.7694 - val_loss: 0.4279 - val_acc: 0.7602
Epoch 83/300
1999/1999 [==============================] - 0s - loss: 0.4191 - acc: 0.7714 - val_loss: 0.4259 - val_acc: 0.7632
Epoch 84/300
1999/1999 [==============================] - 0s - loss: 0.4201 - acc: 0.7714 - val_loss: 0.4239 - val_acc: 0.7655
Epoch 85/300
1999/1999 [==============================] - 0s - loss: 0.4180 - acc: 0.7779 - val_loss: 0.4218 - val_acc: 0.7702
Epoch 86/300
1999/1999 [==============================] - 0s - loss: 0.4160 - acc: 0.7774 - val_loss: 0.4198 - val_acc: 0.7774
Epoch 87/300
1999/1999 [==============================] - 0s - loss: 0.4107 - acc: 0.7799 - val_loss: 0.4178 - val_acc: 0.7928
Epoch 88/300
1999/1999 [==============================] - 0s - loss: 0.4107 - acc: 0.7794 - val_loss: 0.4158 - val_acc: 0.7975
Epoch 89/300
1999/1999 [==============================] - 0s - loss: 0.4122 - acc: 0.7864 - val_loss: 0.4139 - val_acc: 0.8000
Epoch 90/300
1999/1999 [==============================] - 0s - loss: 0.4040 - acc: 0.7974 - val_loss: 0.4119 - val_acc: 0.8013
Epoch 91/300
1999/1999 [==============================] - 0s - loss: 0.4014 - acc: 0.7934 - val_loss: 0.4100 - val_acc: 0.8036
Epoch 92/300
1999/1999 [==============================] - 0s - loss: 0.4011 - acc: 0.7994 - val_loss: 0.4081 - val_acc: 0.8060
Epoch 93/300
1999/1999 [==============================] - 0s - loss: 0.3987 - acc: 0.7984 - val_loss: 0.4062 - val_acc: 0.8066
Epoch 94/300
1999/1999 [==============================] - 0s - loss: 0.3923 - acc: 0.7959 - val_loss: 0.4043 - val_acc: 0.8079
Epoch 95/300
1999/1999 [==============================] - 0s - loss: 0.3952 - acc: 0.8049 - val_loss: 0.4023 - val_acc: 0.8090
Epoch 96/300
1999/1999 [==============================] - 0s - loss: 0.3925 - acc: 0.7974 - val_loss: 0.4003 - val_acc: 0.8104
Epoch 97/300
1999/1999 [==============================] - 0s - loss: 0.3912 - acc: 0.7994 - val_loss: 0.3983 - val_acc: 0.8139
Epoch 98/300
1999/1999 [==============================] - 0s - loss: 0.3891 - acc: 0.7984 - val_loss: 0.3963 - val_acc: 0.8161
Epoch 99/300
1999/1999 [==============================] - 0s - loss: 0.3858 - acc: 0.8064 - val_loss: 0.3944 - val_acc: 0.8173
Epoch 100/300
1999/1999 [==============================] - 0s - loss: 0.3864 - acc: 0.8029 - val_loss: 0.3925 - val_acc: 0.8180
Epoch 101/300
1999/1999 [==============================] - 0s - loss: 0.3795 - acc: 0.8094 - val_loss: 0.3906 - val_acc: 0.8201
Epoch 102/300
1999/1999 [==============================] - 0s - loss: 0.3811 - acc: 0.8049 - val_loss: 0.3888 - val_acc: 0.8206
Epoch 103/300
1999/1999 [==============================] - 0s - loss: 0.3787 - acc: 0.8049 - val_loss: 0.3870 - val_acc: 0.8213
Epoch 104/300
1999/1999 [==============================] - 0s - loss: 0.3753 - acc: 0.8149 - val_loss: 0.3852 - val_acc: 0.8205
Epoch 105/300
1999/1999 [==============================] - 0s - loss: 0.3714 - acc: 0.8189 - val_loss: 0.3832 - val_acc: 0.8229
Epoch 106/300
1999/1999 [==============================] - 0s - loss: 0.3711 - acc: 0.8124 - val_loss: 0.3811 - val_acc: 0.8259
Epoch 107/300
1999/1999 [==============================] - 0s - loss: 0.3676 - acc: 0.8114 - val_loss: 0.3790 - val_acc: 0.8296
Epoch 108/300
1999/1999 [==============================] - 0s - loss: 0.3722 - acc: 0.8149 - val_loss: 0.3770 - val_acc: 0.8290
Epoch 109/300
1999/1999 [==============================] - 0s - loss: 0.3670 - acc: 0.8179 - val_loss: 0.3749 - val_acc: 0.8295
Epoch 110/300
1999/1999 [==============================] - 0s - loss: 0.3669 - acc: 0.8169 - val_loss: 0.3729 - val_acc: 0.8308
Epoch 111/300
1999/1999 [==============================] - 0s - loss: 0.3650 - acc: 0.8224 - val_loss: 0.3710 - val_acc: 0.8323
Epoch 112/300
1999/1999 [==============================] - 0s - loss: 0.3587 - acc: 0.8294 - val_loss: 0.3690 - val_acc: 0.8329
Epoch 113/300
1999/1999 [==============================] - 0s - loss: 0.3551 - acc: 0.8304 - val_loss: 0.3671 - val_acc: 0.8335
Epoch 114/300
1999/1999 [==============================] - 0s - loss: 0.3559 - acc: 0.8184 - val_loss: 0.3653 - val_acc: 0.8334
Epoch 115/300
1999/1999 [==============================] - 0s - loss: 0.3553 - acc: 0.8249 - val_loss: 0.3634 - val_acc: 0.8348
Epoch 116/300
1999/1999 [==============================] - 0s - loss: 0.3530 - acc: 0.8159 - val_loss: 0.3615 - val_acc: 0.8359
Epoch 117/300
1999/1999 [==============================] - 0s - loss: 0.3483 - acc: 0.8414 - val_loss: 0.3596 - val_acc: 0.8361
Epoch 118/300
1999/1999 [==============================] - 0s - loss: 0.3454 - acc: 0.8349 - val_loss: 0.3577 - val_acc: 0.8368
Epoch 119/300
1999/1999 [==============================] - 0s - loss: 0.3429 - acc: 0.8389 - val_loss: 0.3559 - val_acc: 0.8394
Epoch 120/300
1999/1999 [==============================] - 0s - loss: 0.3444 - acc: 0.8409 - val_loss: 0.3539 - val_acc: 0.8406
Epoch 121/300
1999/1999 [==============================] - 0s - loss: 0.3445 - acc: 0.8309 - val_loss: 0.3519 - val_acc: 0.8436
Epoch 122/300
1999/1999 [==============================] - 0s - loss: 0.3359 - acc: 0.8459 - val_loss: 0.3500 - val_acc: 0.8440
Epoch 123/300
1999/1999 [==============================] - 0s - loss: 0.3347 - acc: 0.8404 - val_loss: 0.3480 - val_acc: 0.8448
Epoch 124/300
1999/1999 [==============================] - 0s - loss: 0.3324 - acc: 0.8364 - val_loss: 0.3460 - val_acc: 0.8461
Epoch 125/300
1999/1999 [==============================] - 0s - loss: 0.3287 - acc: 0.8394 - val_loss: 0.3439 - val_acc: 0.8473
Epoch 126/300
1999/1999 [==============================] - 0s - loss: 0.3349 - acc: 0.8374 - val_loss: 0.3417 - val_acc: 0.8485
Epoch 127/300
1999/1999 [==============================] - 0s - loss: 0.3263 - acc: 0.8429 - val_loss: 0.3394 - val_acc: 0.8503
Epoch 128/300
1999/1999 [==============================] - 0s - loss: 0.3281 - acc: 0.8474 - val_loss: 0.3370 - val_acc: 0.8518
Epoch 129/300
1999/1999 [==============================] - 0s - loss: 0.3217 - acc: 0.8424 - val_loss: 0.3349 - val_acc: 0.8529
Epoch 130/300
1999/1999 [==============================] - 0s - loss: 0.3188 - acc: 0.8524 - val_loss: 0.3328 - val_acc: 0.8546
Epoch 131/300
1999/1999 [==============================] - 0s - loss: 0.3160 - acc: 0.8569 - val_loss: 0.3308 - val_acc: 0.8564
Epoch 132/300
1999/1999 [==============================] - 0s - loss: 0.3175 - acc: 0.8504 - val_loss: 0.3288 - val_acc: 0.8575
Epoch 133/300
1999/1999 [==============================] - 0s - loss: 0.3148 - acc: 0.8549 - val_loss: 0.3265 - val_acc: 0.8588
Epoch 134/300
1999/1999 [==============================] - 0s - loss: 0.3139 - acc: 0.8624 - val_loss: 0.3240 - val_acc: 0.8611
Epoch 135/300
1999/1999 [==============================] - 0s - loss: 0.3086 - acc: 0.8589 - val_loss: 0.3217 - val_acc: 0.8621
Epoch 136/300
1999/1999 [==============================] - 0s - loss: 0.3039 - acc: 0.8574 - val_loss: 0.3194 - val_acc: 0.8639
Epoch 137/300
1999/1999 [==============================] - 0s - loss: 0.3049 - acc: 0.8559 - val_loss: 0.3171 - val_acc: 0.8659
Epoch 138/300
1999/1999 [==============================] - 0s - loss: 0.3035 - acc: 0.8569 - val_loss: 0.3148 - val_acc: 0.8680
Epoch 139/300
1999/1999 [==============================] - 0s - loss: 0.3035 - acc: 0.8594 - val_loss: 0.3124 - val_acc: 0.8686
Epoch 140/300
1999/1999 [==============================] - 0s - loss: 0.3007 - acc: 0.8614 - val_loss: 0.3101 - val_acc: 0.8695
Epoch 141/300
1999/1999 [==============================] - 0s - loss: 0.2903 - acc: 0.8739 - val_loss: 0.3078 - val_acc: 0.8703
Epoch 142/300
1999/1999 [==============================] - 0s - loss: 0.2919 - acc: 0.8664 - val_loss: 0.3057 - val_acc: 0.8708
Epoch 143/300
1999/1999 [==============================] - 0s - loss: 0.2888 - acc: 0.8739 - val_loss: 0.3035 - val_acc: 0.8713
Epoch 144/300
1999/1999 [==============================] - 0s - loss: 0.2895 - acc: 0.8664 - val_loss: 0.3012 - val_acc: 0.8735
Epoch 145/300
1999/1999 [==============================] - 0s - loss: 0.2781 - acc: 0.8759 - val_loss: 0.2989 - val_acc: 0.8740
Epoch 146/300
1999/1999 [==============================] - 0s - loss: 0.2794 - acc: 0.8744 - val_loss: 0.2967 - val_acc: 0.8749
Epoch 147/300
1999/1999 [==============================] - 0s - loss: 0.2815 - acc: 0.8804 - val_loss: 0.2943 - val_acc: 0.8765
Epoch 148/300
1999/1999 [==============================] - 0s - loss: 0.2761 - acc: 0.8769 - val_loss: 0.2920 - val_acc: 0.8785
Epoch 149/300
1999/1999 [==============================] - 0s - loss: 0.2767 - acc: 0.8799 - val_loss: 0.2897 - val_acc: 0.8784
Epoch 150/300
1999/1999 [==============================] - 0s - loss: 0.2723 - acc: 0.8779 - val_loss: 0.2872 - val_acc: 0.8805
Epoch 151/300
1999/1999 [==============================] - 0s - loss: 0.2735 - acc: 0.8804 - val_loss: 0.2847 - val_acc: 0.8828
Epoch 152/300
1999/1999 [==============================] - 0s - loss: 0.2659 - acc: 0.8789 - val_loss: 0.2822 - val_acc: 0.8855
Epoch 153/300
1999/1999 [==============================] - 0s - loss: 0.2619 - acc: 0.8894 - val_loss: 0.2799 - val_acc: 0.8869
Epoch 154/300
1999/1999 [==============================] - 0s - loss: 0.2618 - acc: 0.8864 - val_loss: 0.2778 - val_acc: 0.8885
Epoch 155/300
1999/1999 [==============================] - 0s - loss: 0.2583 - acc: 0.8914 - val_loss: 0.2756 - val_acc: 0.8894
Epoch 156/300
1999/1999 [==============================] - 0s - loss: 0.2593 - acc: 0.8899 - val_loss: 0.2734 - val_acc: 0.8905
Epoch 157/300
1999/1999 [==============================] - 0s - loss: 0.2559 - acc: 0.8909 - val_loss: 0.2710 - val_acc: 0.8926
Epoch 158/300
1999/1999 [==============================] - 0s - loss: 0.2572 - acc: 0.8884 - val_loss: 0.2686 - val_acc: 0.8944
Epoch 159/300
1999/1999 [==============================] - 0s - loss: 0.2520 - acc: 0.8949 - val_loss: 0.2664 - val_acc: 0.8954
Epoch 160/300
1999/1999 [==============================] - 0s - loss: 0.2516 - acc: 0.8944 - val_loss: 0.2644 - val_acc: 0.8966
Epoch 161/300
1999/1999 [==============================] - 0s - loss: 0.2483 - acc: 0.8999 - val_loss: 0.2625 - val_acc: 0.8985
Epoch 162/300
1999/1999 [==============================] - 0s - loss: 0.2435 - acc: 0.8989 - val_loss: 0.2605 - val_acc: 0.8994
Epoch 163/300
1999/1999 [==============================] - 0s - loss: 0.2355 - acc: 0.9045 - val_loss: 0.2585 - val_acc: 0.9003
Epoch 164/300
1999/1999 [==============================] - 0s - loss: 0.2379 - acc: 0.9040 - val_loss: 0.2564 - val_acc: 0.9019
Epoch 165/300
1999/1999 [==============================] - 0s - loss: 0.2406 - acc: 0.9040 - val_loss: 0.2545 - val_acc: 0.9031
Epoch 166/300
1999/1999 [==============================] - 0s - loss: 0.2411 - acc: 0.9005 - val_loss: 0.2525 - val_acc: 0.9048
Epoch 167/300
1999/1999 [==============================] - 0s - loss: 0.2367 - acc: 0.9045 - val_loss: 0.2507 - val_acc: 0.9066
Epoch 168/300
1999/1999 [==============================] - 0s - loss: 0.2378 - acc: 0.9100 - val_loss: 0.2489 - val_acc: 0.9081
Epoch 169/300
1999/1999 [==============================] - 0s - loss: 0.2376 - acc: 0.9045 - val_loss: 0.2464 - val_acc: 0.9099
Epoch 170/300
1999/1999 [==============================] - 0s - loss: 0.2316 - acc: 0.9090 - val_loss: 0.2442 - val_acc: 0.9106
Epoch 171/300
1999/1999 [==============================] - 0s - loss: 0.2279 - acc: 0.9100 - val_loss: 0.2420 - val_acc: 0.9111
Epoch 172/300
1999/1999 [==============================] - 0s - loss: 0.2226 - acc: 0.9140 - val_loss: 0.2400 - val_acc: 0.9130
Epoch 173/300
1999/1999 [==============================] - 0s - loss: 0.2266 - acc: 0.9180 - val_loss: 0.2383 - val_acc: 0.9143
Epoch 174/300
1999/1999 [==============================] - 0s - loss: 0.2208 - acc: 0.9145 - val_loss: 0.2367 - val_acc: 0.9155
Epoch 175/300
1999/1999 [==============================] - 0s - loss: 0.2191 - acc: 0.9115 - val_loss: 0.2346 - val_acc: 0.9156
Epoch 176/300
1999/1999 [==============================] - 0s - loss: 0.2177 - acc: 0.9210 - val_loss: 0.2327 - val_acc: 0.9186
Epoch 177/300
1999/1999 [==============================] - 0s - loss: 0.2177 - acc: 0.9170 - val_loss: 0.2310 - val_acc: 0.9194
Epoch 178/300
1999/1999 [==============================] - 0s - loss: 0.2155 - acc: 0.9205 - val_loss: 0.2295 - val_acc: 0.9198
Epoch 179/300
1999/1999 [==============================] - 0s - loss: 0.2085 - acc: 0.9190 - val_loss: 0.2280 - val_acc: 0.9193
Epoch 180/300
1999/1999 [==============================] - 0s - loss: 0.2064 - acc: 0.9265 - val_loss: 0.2269 - val_acc: 0.9213
Epoch 181/300
1999/1999 [==============================] - 0s - loss: 0.2163 - acc: 0.9185 - val_loss: 0.2250 - val_acc: 0.9234
Epoch 182/300
1999/1999 [==============================] - 0s - loss: 0.2061 - acc: 0.9240 - val_loss: 0.2232 - val_acc: 0.9265
Epoch 183/300
1999/1999 [==============================] - 0s - loss: 0.2009 - acc: 0.9290 - val_loss: 0.2219 - val_acc: 0.9273
Epoch 184/300
1999/1999 [==============================] - 0s - loss: 0.2021 - acc: 0.9280 - val_loss: 0.2205 - val_acc: 0.9281
Epoch 185/300
1999/1999 [==============================] - 0s - loss: 0.2004 - acc: 0.9225 - val_loss: 0.2193 - val_acc: 0.9288
Epoch 186/300
1999/1999 [==============================] - 0s - loss: 0.2075 - acc: 0.9185 - val_loss: 0.2182 - val_acc: 0.9296
Epoch 187/300
1999/1999 [==============================] - 0s - loss: 0.2041 - acc: 0.9270 - val_loss: 0.2168 - val_acc: 0.9310
Epoch 188/300
1999/1999 [==============================] - 0s - loss: 0.2021 - acc: 0.9290 - val_loss: 0.2151 - val_acc: 0.9326
Epoch 189/300
1999/1999 [==============================] - 0s - loss: 0.1949 - acc: 0.9320 - val_loss: 0.2134 - val_acc: 0.9333
Epoch 190/300
1999/1999 [==============================] - 0s - loss: 0.1967 - acc: 0.9300 - val_loss: 0.2120 - val_acc: 0.9328
Epoch 191/300
1999/1999 [==============================] - 0s - loss: 0.1988 - acc: 0.9275 - val_loss: 0.2106 - val_acc: 0.9324
Epoch 192/300
1999/1999 [==============================] - 0s - loss: 0.1985 - acc: 0.9295 - val_loss: 0.2094 - val_acc: 0.9344
Epoch 193/300
1999/1999 [==============================] - 0s - loss: 0.1912 - acc: 0.9335 - val_loss: 0.2088 - val_acc: 0.9358
Epoch 194/300
1999/1999 [==============================] - 0s - loss: 0.1906 - acc: 0.9330 - val_loss: 0.2080 - val_acc: 0.9366
Epoch 195/300
1999/1999 [==============================] - 0s - loss: 0.1884 - acc: 0.9310 - val_loss: 0.2067 - val_acc: 0.9368
Epoch 196/300
1999/1999 [==============================] - 0s - loss: 0.1842 - acc: 0.9370 - val_loss: 0.2054 - val_acc: 0.9361
Epoch 197/300
1999/1999 [==============================] - 0s - loss: 0.1871 - acc: 0.9360 - val_loss: 0.2045 - val_acc: 0.9356
Epoch 198/300
1999/1999 [==============================] - 0s - loss: 0.1902 - acc: 0.9305 - val_loss: 0.2039 - val_acc: 0.9354
Epoch 199/300
1999/1999 [==============================] - 0s - loss: 0.1847 - acc: 0.9360 - val_loss: 0.2034 - val_acc: 0.9366
Epoch 200/300
1999/1999 [==============================] - 0s - loss: 0.1958 - acc: 0.9350 - val_loss: 0.2025 - val_acc: 0.9369
Epoch 201/300
1999/1999 [==============================] - 0s - loss: 0.1846 - acc: 0.9400 - val_loss: 0.2012 - val_acc: 0.9385
Epoch 202/300
1999/1999 [==============================] - 0s - loss: 0.1773 - acc: 0.9475 - val_loss: 0.2000 - val_acc: 0.9381
Epoch 203/300
1999/1999 [==============================] - 0s - loss: 0.1785 - acc: 0.9415 - val_loss: 0.1988 - val_acc: 0.9393
Epoch 204/300
1999/1999 [==============================] - 0s - loss: 0.1852 - acc: 0.9355 - val_loss: 0.1975 - val_acc: 0.9406
Epoch 205/300
1999/1999 [==============================] - 0s - loss: 0.1810 - acc: 0.9365 - val_loss: 0.1964 - val_acc: 0.9418
Epoch 206/300
1999/1999 [==============================] - 0s - loss: 0.1720 - acc: 0.9445 - val_loss: 0.1953 - val_acc: 0.9425
Epoch 207/300
1999/1999 [==============================] - 0s - loss: 0.1788 - acc: 0.9405 - val_loss: 0.1946 - val_acc: 0.9428
Epoch 208/300
1999/1999 [==============================] - 0s - loss: 0.1756 - acc: 0.9435 - val_loss: 0.1931 - val_acc: 0.9443
Epoch 209/300
1999/1999 [==============================] - 0s - loss: 0.1686 - acc: 0.9460 - val_loss: 0.1923 - val_acc: 0.9423
Epoch 210/300
1999/1999 [==============================] - 0s - loss: 0.1772 - acc: 0.9440 - val_loss: 0.1920 - val_acc: 0.9419
Epoch 211/300
1999/1999 [==============================] - 0s - loss: 0.1679 - acc: 0.9490 - val_loss: 0.1916 - val_acc: 0.9426
Epoch 212/300
1999/1999 [==============================] - 0s - loss: 0.1770 - acc: 0.9380 - val_loss: 0.1912 - val_acc: 0.9431
Epoch 213/300
1999/1999 [==============================] - 0s - loss: 0.1661 - acc: 0.9435 - val_loss: 0.1911 - val_acc: 0.9436
Epoch 214/300
1999/1999 [==============================] - 0s - loss: 0.1718 - acc: 0.9430 - val_loss: 0.1907 - val_acc: 0.9443
Epoch 215/300
1999/1999 [==============================] - 0s - loss: 0.1705 - acc: 0.9430 - val_loss: 0.1904 - val_acc: 0.9445
Epoch 216/300
1999/1999 [==============================] - 0s - loss: 0.1724 - acc: 0.9455 - val_loss: 0.1900 - val_acc: 0.9433
Epoch 217/300
1999/1999 [==============================] - 0s - loss: 0.1727 - acc: 0.9390 - val_loss: 0.1898 - val_acc: 0.9423
Epoch 218/300
1999/1999 [==============================] - 0s - loss: 0.1618 - acc: 0.9475 - val_loss: 0.1896 - val_acc: 0.9420
Epoch 219/300
1999/1999 [==============================] - 0s - loss: 0.1664 - acc: 0.9440 - val_loss: 0.1891 - val_acc: 0.9425
Epoch 220/300
1999/1999 [==============================] - 0s - loss: 0.1613 - acc: 0.9425 - val_loss: 0.1887 - val_acc: 0.9430
Epoch 221/300
1999/1999 [==============================] - 0s - loss: 0.1576 - acc: 0.9505 - val_loss: 0.1882 - val_acc: 0.9431
Epoch 222/300
1999/1999 [==============================] - 0s - loss: 0.1627 - acc: 0.9440 - val_loss: 0.1877 - val_acc: 0.9443
Epoch 223/300
1999/1999 [==============================] - 0s - loss: 0.1625 - acc: 0.9430 - val_loss: 0.1869 - val_acc: 0.9459
Epoch 224/300
1999/1999 [==============================] - 0s - loss: 0.1660 - acc: 0.9410 - val_loss: 0.1860 - val_acc: 0.9459
Epoch 225/300
1999/1999 [==============================] - 0s - loss: 0.1658 - acc: 0.9465 - val_loss: 0.1853 - val_acc: 0.9459
Epoch 226/300
1999/1999 [==============================] - 0s - loss: 0.1644 - acc: 0.9465 - val_loss: 0.1846 - val_acc: 0.9459
Epoch 227/300
1999/1999 [==============================] - 0s - loss: 0.1565 - acc: 0.9545 - val_loss: 0.1840 - val_acc: 0.9459
Epoch 228/300
1999/1999 [==============================] - 0s - loss: 0.1734 - acc: 0.9480 - val_loss: 0.1834 - val_acc: 0.9455
Epoch 229/300
1999/1999 [==============================] - 0s - loss: 0.1638 - acc: 0.9435 - val_loss: 0.1831 - val_acc: 0.9450
Epoch 230/300
1999/1999 [==============================] - 0s - loss: 0.1599 - acc: 0.9440 - val_loss: 0.1827 - val_acc: 0.9441
Epoch 231/300
1999/1999 [==============================] - 0s - loss: 0.1634 - acc: 0.9470 - val_loss: 0.1822 - val_acc: 0.9441
Epoch 232/300
1999/1999 [==============================] - 0s - loss: 0.1477 - acc: 0.9545 - val_loss: 0.1817 - val_acc: 0.9454
Epoch 233/300
1999/1999 [==============================] - 0s - loss: 0.1508 - acc: 0.9560 - val_loss: 0.1812 - val_acc: 0.9459
Epoch 234/300
1999/1999 [==============================] - 0s - loss: 0.1564 - acc: 0.9480 - val_loss: 0.1809 - val_acc: 0.9463
Epoch 235/300
1999/1999 [==============================] - 0s - loss: 0.1529 - acc: 0.9555 - val_loss: 0.1805 - val_acc: 0.9456
Epoch 236/300
1999/1999 [==============================] - 0s - loss: 0.1615 - acc: 0.9485 - val_loss: 0.1803 - val_acc: 0.9450
Epoch 237/300
1999/1999 [==============================] - 0s - loss: 0.1481 - acc: 0.9515 - val_loss: 0.1803 - val_acc: 0.9451
Epoch 238/300
1999/1999 [==============================] - 0s - loss: 0.1459 - acc: 0.9505 - val_loss: 0.1803 - val_acc: 0.9459
Epoch 239/300
1999/1999 [==============================] - 0s - loss: 0.1470 - acc: 0.9525 - val_loss: 0.1805 - val_acc: 0.9460
Epoch 240/300
1999/1999 [==============================] - 0s - loss: 0.1530 - acc: 0.9470 - val_loss: 0.1804 - val_acc: 0.9456
Epoch 241/300
1999/1999 [==============================] - 0s - loss: 0.1521 - acc: 0.9480 - val_loss: 0.1802 - val_acc: 0.9449
Epoch 242/300
1999/1999 [==============================] - 0s - loss: 0.1487 - acc: 0.9500 - val_loss: 0.1802 - val_acc: 0.9443
Epoch 243/300
1999/1999 [==============================] - 0s - loss: 0.1479 - acc: 0.9545 - val_loss: 0.1798 - val_acc: 0.9451
Epoch 244/300
1999/1999 [==============================] - 0s - loss: 0.1448 - acc: 0.9575 - val_loss: 0.1790 - val_acc: 0.9463
Epoch 245/300
1999/1999 [==============================] - 0s - loss: 0.1475 - acc: 0.9525 - val_loss: 0.1785 - val_acc: 0.9471
Epoch 246/300
1999/1999 [==============================] - 0s - loss: 0.1579 - acc: 0.9490 - val_loss: 0.1781 - val_acc: 0.9473
Epoch 247/300
1999/1999 [==============================] - 0s - loss: 0.1478 - acc: 0.9495 - val_loss: 0.1781 - val_acc: 0.9459
Epoch 248/300
1999/1999 [==============================] - 0s - loss: 0.1400 - acc: 0.9525 - val_loss: 0.1782 - val_acc: 0.9455
Epoch 249/300
1999/1999 [==============================] - 0s - loss: 0.1409 - acc: 0.9535 - val_loss: 0.1775 - val_acc: 0.9459
Epoch 250/300
1999/1999 [==============================] - 0s - loss: 0.1476 - acc: 0.9500 - val_loss: 0.1769 - val_acc: 0.9476
Epoch 251/300
1999/1999 [==============================] - 0s - loss: 0.1413 - acc: 0.9535 - val_loss: 0.1766 - val_acc: 0.9474
Epoch 252/300
1999/1999 [==============================] - 0s - loss: 0.1450 - acc: 0.9560 - val_loss: 0.1765 - val_acc: 0.9484
Epoch 253/300
1999/1999 [==============================] - 0s - loss: 0.1442 - acc: 0.9520 - val_loss: 0.1766 - val_acc: 0.9481
Epoch 254/300
1999/1999 [==============================] - 0s - loss: 0.1420 - acc: 0.9535 - val_loss: 0.1770 - val_acc: 0.9471
Epoch 255/300
1999/1999 [==============================] - 0s - loss: 0.1490 - acc: 0.9555 - val_loss: 0.1773 - val_acc: 0.9464
Epoch 256/300
1999/1999 [==============================] - 0s - loss: 0.1434 - acc: 0.9565 - val_loss: 0.1772 - val_acc: 0.9468
Epoch 257/300
1999/1999 [==============================] - 0s - loss: 0.1418 - acc: 0.9535 - val_loss: 0.1769 - val_acc: 0.9466
Epoch 258/300
1999/1999 [==============================] - 0s - loss: 0.1439 - acc: 0.9540 - val_loss: 0.1762 - val_acc: 0.9474
Epoch 259/300
1999/1999 [==============================] - 0s - loss: 0.1460 - acc: 0.9545 - val_loss: 0.1756 - val_acc: 0.9486
Epoch 260/300
1999/1999 [==============================] - 0s - loss: 0.1481 - acc: 0.9585 - val_loss: 0.1749 - val_acc: 0.9486
Epoch 261/300
1999/1999 [==============================] - 0s - loss: 0.1357 - acc: 0.9535 - val_loss: 0.1742 - val_acc: 0.9479
Epoch 262/300
1999/1999 [==============================] - 0s - loss: 0.1431 - acc: 0.9530 - val_loss: 0.1736 - val_acc: 0.9478
Epoch 263/300
1999/1999 [==============================] - 0s - loss: 0.1378 - acc: 0.9525 - val_loss: 0.1731 - val_acc: 0.9480
Epoch 264/300
1999/1999 [==============================] - 0s - loss: 0.1481 - acc: 0.9555 - val_loss: 0.1729 - val_acc: 0.9476
Epoch 265/300
1999/1999 [==============================] - 0s - loss: 0.1418 - acc: 0.9580 - val_loss: 0.1728 - val_acc: 0.9478
Epoch 266/300
1999/1999 [==============================] - 0s - loss: 0.1399 - acc: 0.9550 - val_loss: 0.1726 - val_acc: 0.9483
Epoch 267/300
1999/1999 [==============================] - 0s - loss: 0.1389 - acc: 0.9560 - val_loss: 0.1726 - val_acc: 0.9486
Epoch 268/300
1999/1999 [==============================] - 0s - loss: 0.1418 - acc: 0.9525 - val_loss: 0.1725 - val_acc: 0.9484
Epoch 269/300
1999/1999 [==============================] - 0s - loss: 0.1432 - acc: 0.9490 - val_loss: 0.1725 - val_acc: 0.9483
Epoch 270/300
1999/1999 [==============================] - 0s - loss: 0.1351 - acc: 0.9515 - val_loss: 0.1726 - val_acc: 0.9473
Epoch 271/300
1999/1999 [==============================] - 0s - loss: 0.1355 - acc: 0.9600 - val_loss: 0.1729 - val_acc: 0.9471
Epoch 272/300
1999/1999 [==============================] - 0s - loss: 0.1337 - acc: 0.9615 - val_loss: 0.1729 - val_acc: 0.9473
Epoch 273/300
1999/1999 [==============================] - 0s - loss: 0.1442 - acc: 0.9535 - val_loss: 0.1729 - val_acc: 0.9476
Epoch 274/300
1999/1999 [==============================] - 0s - loss: 0.1306 - acc: 0.9545 - val_loss: 0.1729 - val_acc: 0.9476
Epoch 275/300
1999/1999 [==============================] - 0s - loss: 0.1328 - acc: 0.9585 - val_loss: 0.1728 - val_acc: 0.9478
Epoch 276/300
1999/1999 [==============================] - 0s - loss: 0.1344 - acc: 0.9570 - val_loss: 0.1728 - val_acc: 0.9481
Epoch 277/300
1999/1999 [==============================] - 0s - loss: 0.1373 - acc: 0.9620 - val_loss: 0.1728 - val_acc: 0.9486
Epoch 278/300
1999/1999 [==============================] - 0s - loss: 0.1343 - acc: 0.9565 - val_loss: 0.1724 - val_acc: 0.9484
Epoch 279/300
1999/1999 [==============================] - 0s - loss: 0.1327 - acc: 0.9615 - val_loss: 0.1722 - val_acc: 0.9484
Epoch 280/300
1999/1999 [==============================] - 0s - loss: 0.1389 - acc: 0.9545 - val_loss: 0.1720 - val_acc: 0.9491
Epoch 281/300
1999/1999 [==============================] - 0s - loss: 0.1294 - acc: 0.9600 - val_loss: 0.1718 - val_acc: 0.9491
Epoch 282/300
1999/1999 [==============================] - 0s - loss: 0.1278 - acc: 0.9630 - val_loss: 0.1717 - val_acc: 0.9493
Epoch 283/300
1999/1999 [==============================] - 0s - loss: 0.1383 - acc: 0.9595 - val_loss: 0.1718 - val_acc: 0.9484
Epoch 284/300
1999/1999 [==============================] - 0s - loss: 0.1312 - acc: 0.9620 - val_loss: 0.1719 - val_acc: 0.9479
Epoch 285/300
1999/1999 [==============================] - 0s - loss: 0.1273 - acc: 0.9575 - val_loss: 0.1721 - val_acc: 0.9483
Epoch 286/300
1999/1999 [==============================] - 0s - loss: 0.1287 - acc: 0.9590 - val_loss: 0.1721 - val_acc: 0.9483
Epoch 287/300
1999/1999 [==============================] - 0s - loss: 0.1289 - acc: 0.9585 - val_loss: 0.1719 - val_acc: 0.9499
Epoch 288/300
1999/1999 [==============================] - 0s - loss: 0.1287 - acc: 0.9560 - val_loss: 0.1718 - val_acc: 0.9500
Epoch 289/300
1999/1999 [==============================] - 0s - loss: 0.1281 - acc: 0.9565 - val_loss: 0.1718 - val_acc: 0.9496
Epoch 290/300
1999/1999 [==============================] - 0s - loss: 0.1371 - acc: 0.9545 - val_loss: 0.1714 - val_acc: 0.9494
Epoch 291/300
1999/1999 [==============================] - 0s - loss: 0.1348 - acc: 0.9595 - val_loss: 0.1707 - val_acc: 0.9495
Epoch 292/300
1999/1999 [==============================] - 0s - loss: 0.1333 - acc: 0.9560 - val_loss: 0.1701 - val_acc: 0.9501
Epoch 293/300
1999/1999 [==============================] - 0s - loss: 0.1291 - acc: 0.9615 - val_loss: 0.1697 - val_acc: 0.9505
Epoch 294/300
1999/1999 [==============================] - 0s - loss: 0.1341 - acc: 0.9620 - val_loss: 0.1693 - val_acc: 0.9508
Epoch 295/300
1999/1999 [==============================] - 0s - loss: 0.1323 - acc: 0.9625 - val_loss: 0.1690 - val_acc: 0.9508
Epoch 296/300
1999/1999 [==============================] - 0s - loss: 0.1262 - acc: 0.9610 - val_loss: 0.1688 - val_acc: 0.9500
Epoch 297/300
1999/1999 [==============================] - 0s - loss: 0.1330 - acc: 0.9580 - val_loss: 0.1687 - val_acc: 0.9496
Epoch 298/300
1999/1999 [==============================] - 0s - loss: 0.1292 - acc: 0.9565 - val_loss: 0.1688 - val_acc: 0.9499
Epoch 299/300
1999/1999 [==============================] - 0s - loss: 0.1311 - acc: 0.9610 - val_loss: 0.1692 - val_acc: 0.9499
Epoch 300/300
1999/1999 [==============================] - 0s - loss: 0.1341 - acc: 0.9560 - val_loss: 0.1696 - val_acc: 0.9494
4416/4999 [=========================>....] - ETA: 0s

accuracy= 0.948789757952
4864/4999 [============================>.] - ETA: 0s

Train/Test score


In [34]:
df_ans = pd.DataFrame({'Real Class' :test_label})
df_ans['Prediction'] = prediction

In [35]:
df_ans[ df_ans['Real Class'] != df_ans['Prediction'] ].head()


Out[35]:
Real Class Prediction
3 1 0
23 0 1
43 0 1
46 0 1
48 1 0

Confusion Matrix


In [36]:
import seaborn as sns
%matplotlib inline

cols = ['Real_Class_1','Real_Class_0']  #Gold standard
rows = ['Prediction_1','Prediction_0'] #diagnostic tool (our prediction)

B1P1 = len(df_ans[(df_ans['Prediction'] == df_ans['Real Class']) & (df_ans['Real Class'] == 1)])
B1P0 = len(df_ans[(df_ans['Prediction'] != df_ans['Real Class']) & (df_ans['Real Class'] == 1)])
B0P1 = len(df_ans[(df_ans['Prediction'] != df_ans['Real Class']) & (df_ans['Real Class'] == 0)])
B0P0 = len(df_ans[(df_ans['Prediction'] == df_ans['Real Class']) & (df_ans['Real Class'] == 0)])

conf = np.array([[B1P1,B0P1],[B1P0,B0P0]])
df_cm = pd.DataFrame(conf, columns = [i for i in cols], index = [i for i in rows])

f, ax= plt.subplots(figsize = (5, 5))
sns.heatmap(df_cm, annot=True, ax=ax, fmt='d') 
ax.xaxis.set_ticks_position('top') #Making x label be on top is common in textbooks.

print('total test case number: ', np.sum(conf))


total test case number:  4999

In [37]:
def model_efficacy(conf):
    total_num = np.sum(conf)
    sen = conf[0][0]/(conf[0][0]+conf[1][0])
    spe = conf[1][1]/(conf[1][0]+conf[1][1])
    false_positive_rate = conf[0][1]/(conf[0][1]+conf[1][1])
    false_negative_rate = conf[1][0]/(conf[0][0]+conf[1][0])
    
    print('total_num: ',total_num)
    print('G1P1: ',conf[0][0]) #G = gold standard; P = prediction
    print('G0P1: ',conf[0][1])
    print('G1P0: ',conf[1][0])
    print('G0P0: ',conf[1][1])
    print('##########################')
    print('sensitivity: ',sen)
    print('specificity: ',spe)
    print('false_positive_rate: ',false_positive_rate)
    print('false_negative_rate: ',false_negative_rate)
    
    return total_num, sen, spe, false_positive_rate, false_negative_rate

model_efficacy(conf)


total_num:  4999
G1P1:  1061
G0P1:  131
G1P0:  125
G0P0:  3682
##########################
sensitivity:  0.894603709949
specificity:  0.967165747308
false_positive_rate:  0.0343561500131
false_negative_rate:  0.105396290051
Out[37]:
(4999,
 0.89460370994940974,
 0.96716574730759131,
 0.034356150013113033,
 0.10539629005059022)

ROC curve


In [38]:
# Compute ROC curve and ROC area for each class

false_positive_rate, true_positive_rate, thresholds = roc_curve(label2_list, prediction2_list)
roc_auc = auc(false_positive_rate, true_positive_rate)

plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()


Scikit-Learn: DecisionTreeClassifier(打敗MLP model)


In [39]:
from sklearn import datasets,cross_validation,tree

X_train,X_test,y_train,y_test = cross_validation.train_test_split(train_feature_trans,train_label, 
                                              test_size=0.25, random_state=0,stratify=train_label)
clf=tree.DecisionTreeClassifier()
clf.fit(X_train,y_train)
print("Traing Score:%f"%clf.score(train_feature_trans,train_label))
print("Testing Score:%f"%clf.score(test_feature_trans,test_label))


Traing Score:0.992600
Testing Score:0.973195

In [40]:
prediction2 = clf.predict(test_feature_trans)
prediction2_list = prediction2.reshape(-1).astype(int)
label2_list = test_label.astype(int)

print(classification_report(label2_list, prediction2_list))
print(confusion_matrix(label2_list, prediction2_list))


             precision    recall  f1-score   support

          0       0.99      0.98      0.98      3813
          1       0.93      0.96      0.94      1186

avg / total       0.97      0.97      0.97      4999

[[3730   83]
 [  51 1135]]

In [41]:
conf = confusion_matrix(label2_list, prediction2_list)
f, ax= plt.subplots(figsize = (5, 5))
sns.heatmap(conf, annot=True, ax=ax, fmt='d') 
ax.xaxis.set_ticks_position('top') #Making x label be on top is common in textbooks.



In [42]:
def model_efficacy(conf):
    total_num = np.sum(conf)
    sen = conf[0][0]/(conf[0][0]+conf[1][0])
    spe = conf[1][1]/(conf[1][0]+conf[1][1])
    false_positive_rate = conf[0][1]/(conf[0][1]+conf[1][1])
    false_negative_rate = conf[1][0]/(conf[0][0]+conf[1][0])
    
    print('total_num: ',total_num)
    print('G1P1: ',conf[0][0]) #G = gold standard; P = prediction
    print('G0P1: ',conf[0][1])
    print('G1P0: ',conf[1][0])
    print('G0P0: ',conf[1][1])
    print('##########################')
    print('sensitivity: ',sen)
    print('specificity: ',spe)
    print('false_positive_rate: ',false_positive_rate)
    print('false_negative_rate: ',false_negative_rate)
    
    return total_num, sen, spe, false_positive_rate, false_negative_rate

conf = confusion_matrix(label2_list, prediction2_list)
model_efficacy(conf)


total_num:  4999
G1P1:  3730
G0P1:  83
G1P0:  51
G0P0:  1135
##########################
sensitivity:  0.986511504893
specificity:  0.956998313659
false_positive_rate:  0.068144499179
false_negative_rate:  0.0134884951071
Out[42]:
(4999,
 0.98651150489288553,
 0.95699831365935917,
 0.068144499178981938,
 0.01348849510711452)

In [43]:
# Compute ROC curve and ROC area for each class

false_positive_rate, true_positive_rate, thresholds = roc_curve(label2_list, prediction2_list)
roc_auc = auc(false_positive_rate, true_positive_rate)

plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()



In [44]:
tree.export_graphviz(clf, out_file='tree.dot')

Scikit-Learn: RandomForestClassifier(最強)


In [45]:
from sklearn import datasets,cross_validation,ensemble

X_train,X_test,y_train,y_test = cross_validation.train_test_split(train_feature_trans,train_label, 
                                              test_size=0.25, random_state=0,stratify=train_label)
clf=ensemble.RandomForestClassifier()
clf.fit(X_train,y_train)
print("Traing Score:%f"%clf.score(train_feature_trans,train_label))
print("Testing Score:%f"%clf.score(test_feature_trans,test_label))


Traing Score:0.994800
Testing Score:0.986197

In [46]:
prediction2 = clf.predict(test_feature_trans)
prediction2_list = prediction2.reshape(-1).astype(int)
label2_list = test_label.astype(int)

print(classification_report(label2_list, prediction2_list))
print(confusion_matrix(label2_list, prediction2_list))


             precision    recall  f1-score   support

          0       0.99      1.00      0.99      3813
          1       0.99      0.95      0.97      1186

avg / total       0.99      0.99      0.99      4999

[[3801   12]
 [  57 1129]]

In [47]:
conf = confusion_matrix(label2_list, prediction2_list)
f, ax= plt.subplots(figsize = (5, 5))
sns.heatmap(conf, annot=True, ax=ax, fmt='d') 
ax.xaxis.set_ticks_position('top') #Making x label be on top is common in textbooks.
plt.show()



In [48]:
def model_efficacy(conf):
    total_num = np.sum(conf)
    sen = conf[0][0]/(conf[0][0]+conf[1][0])
    spe = conf[1][1]/(conf[1][0]+conf[1][1])
    false_positive_rate = conf[0][1]/(conf[0][1]+conf[1][1])
    false_negative_rate = conf[1][0]/(conf[0][0]+conf[1][0])
    
    print('total_num: ',total_num)
    print('G1P1: ',conf[0][0]) #G = gold standard; P = prediction
    print('G0P1: ',conf[0][1])
    print('G1P0: ',conf[1][0])
    print('G0P0: ',conf[1][1])
    print('##########################')
    print('sensitivity: ',sen)
    print('specificity: ',spe)
    print('false_positive_rate: ',false_positive_rate)
    print('false_negative_rate: ',false_negative_rate)
    
    return total_num, sen, spe, false_positive_rate, false_negative_rate

conf = confusion_matrix(label2_list, prediction2_list)
model_efficacy(conf)


total_num:  4999
G1P1:  3801
G0P1:  12
G1P0:  57
G0P0:  1129
##########################
sensitivity:  0.985225505443
specificity:  0.951939291737
false_positive_rate:  0.0105170902717
false_negative_rate:  0.0147744945568
Out[48]:
(4999,
 0.98522550544323484,
 0.95193929173693081,
 0.010517090271691499,
 0.014774494556765163)

In [49]:
# Compute ROC curve and ROC area for each class

false_positive_rate, true_positive_rate, thresholds = roc_curve(label2_list, prediction2_list)
roc_auc = auc(false_positive_rate, true_positive_rate)

plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()


Scikit-learn: SVC(弱)


In [50]:
from sklearn import datasets,cross_validation,svm

X_train,X_test,y_train,y_test = cross_validation.train_test_split(train_feature_trans,train_label, 
                                              test_size=0.25, random_state=0,stratify=train_label)
clf=svm.SVC()
clf.fit(X_train,y_train)
print("Traing Score:%f"%clf.score(train_feature_trans,train_label))
print("Testing Score:%f"%clf.score(test_feature_trans,test_label))


Traing Score:0.812500
Testing Score:0.816563

In [52]:
prediction2 = clf.predict(test_feature_trans)
prediction2_list = prediction2.reshape(-1).astype(int)
label2_list = test_label.astype(int)

print(classification_report(label2_list, prediction2_list))
print(confusion_matrix(label2_list, prediction2_list))


             precision    recall  f1-score   support

          0       0.81      0.99      0.89      3813
          1       0.87      0.27      0.41      1186

avg / total       0.83      0.82      0.78      4999

[[3767   46]
 [ 871  315]]

In [51]:
conf = confusion_matrix(label2_list, prediction2_list)
f, ax= plt.subplots(figsize = (5, 5))
sns.heatmap(conf, annot=True, ax=ax, fmt='d') 
ax.xaxis.set_ticks_position('top') #Making x label be on top is common in textbooks.



In [52]:
def model_efficacy(conf):
    total_num = np.sum(conf)
    sen = conf[0][0]/(conf[0][0]+conf[1][0])
    spe = conf[1][1]/(conf[1][0]+conf[1][1])
    false_positive_rate = conf[0][1]/(conf[0][1]+conf[1][1])
    false_negative_rate = conf[1][0]/(conf[0][0]+conf[1][0])
    
    print('total_num: ',total_num)
    print('G1P1: ',conf[0][0]) #G = gold standard; P = prediction
    print('G0P1: ',conf[0][1])
    print('G1P0: ',conf[1][0])
    print('G0P0: ',conf[1][1])
    print('##########################')
    print('sensitivity: ',sen)
    print('specificity: ',spe)
    print('false_positive_rate: ',false_positive_rate)
    print('false_negative_rate: ',false_negative_rate)
    
    return total_num, sen, spe, false_positive_rate, false_negative_rate

conf = confusion_matrix(label2_list, prediction2_list)
model_efficacy(conf)


total_num:  4999
G1P1:  3801
G0P1:  12
G1P0:  57
G0P0:  1129
##########################
sensitivity:  0.985225505443
specificity:  0.951939291737
false_positive_rate:  0.0105170902717
false_negative_rate:  0.0147744945568
Out[52]:
(4999,
 0.98522550544323484,
 0.95193929173693081,
 0.010517090271691499,
 0.014774494556765163)

In [53]:
# Compute ROC curve and ROC area for each class

false_positive_rate, true_positive_rate, thresholds = roc_curve(label2_list, prediction2_list)
roc_auc = auc(false_positive_rate, true_positive_rate)

plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()


Scikit-Learn: Naive-Bayesian.BernoulliNB (弱)


In [54]:
from sklearn import datasets,cross_validation,naive_bayes

X_train,X_test,y_train,y_test = cross_validation.train_test_split(train_feature_trans,train_label, 
                                              test_size=0.25, random_state=0,stratify=train_label)
clf=naive_bayes.BernoulliNB()
clf.fit(X_train,y_train)
print("Traing Score:%f"%clf.score(train_feature_trans,train_label))
print("Testing Score:%f"%clf.score(test_feature_trans,test_label))


Traing Score:0.838900
Testing Score:0.843369

In [55]:
prediction2 = clf.predict(test_feature_trans)
prediction2_list = prediction2.reshape(-1).astype(int)
label2_list = test_label.astype(int)

print(classification_report(label2_list, prediction2_list))
print(confusion_matrix(label2_list, prediction2_list))


             precision    recall  f1-score   support

          0       0.85      0.96      0.90      3813
          1       0.79      0.46      0.58      1186

avg / total       0.84      0.84      0.83      4999

[[3669  144]
 [ 639  547]]

In [56]:
conf = confusion_matrix(label2_list, prediction2_list)
f, ax= plt.subplots(figsize = (5, 5))
sns.heatmap(conf, annot=True, ax=ax, fmt='d') 
ax.xaxis.set_ticks_position('top') #Making x label be on top is common in textbooks.



In [63]:
def model_efficacy(conf):
    total_num = np.sum(conf)
    sen = conf[0][0]/(conf[0][0]+conf[1][0])
    spe = conf[1][1]/(conf[1][0]+conf[1][1])
    false_positive_rate = conf[0][1]/(conf[0][1]+conf[1][1])
    false_negative_rate = conf[1][0]/(conf[0][0]+conf[1][0])
    
    print('total_num: ',total_num)
    print('G1P1: ',conf[0][0]) #G = gold standard; P = prediction
    print('G0P1: ',conf[0][1])
    print('G1P0: ',conf[1][0])
    print('G0P0: ',conf[1][1])
    print('##########################')
    print('sensitivity: ',sen)
    print('specificity: ',spe)
    print('false_positive_rate: ',false_positive_rate)
    print('false_negative_rate: ',false_negative_rate)
    
    return total_num, sen, spe, false_positive_rate, false_negative_rate

conf = confusion_matrix(label2_list, prediction2_list)
model_efficacy(conf)


total_num:  4999
G1P1:  3669
G0P1:  144
G1P0:  639
G0P0:  547
##########################
sensitivity:  0.851671309192
specificity:  0.461214165261
false_positive_rate:  0.208393632417
false_negative_rate:  0.148328690808
Out[63]:
(4999,
 0.85167130919220058,
 0.46121416526138281,
 0.20839363241678727,
 0.14832869080779945)

In [57]:
# Compute ROC curve and ROC area for each class
false_positive_rate, true_positive_rate, thresholds = roc_curve(label2_list, prediction2_list)
roc_auc = auc(false_positive_rate, true_positive_rate)

plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()



In [ ]: