Detecting Abnormalities in Mammograms Using Thresholded Data

Jay Narhan

June 2017

This is an application of the best performing models but using thresholded data instead of differenced data. See JN_DC_Diff_Detection.ipynb for more background and details on the problem.



In [1]:
import os
import sys
import time
import numpy as np

from tqdm import tqdm

import sklearn.metrics as skm
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

from skimage import color

import keras.callbacks as cb
import keras.utils.np_utils as np_utils
from keras import applications
from keras import regularizers
from keras.models import Sequential
from keras.constraints import maxnorm
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Activation, Dense, Dropout, Flatten, GaussianNoise

from matplotlib import pyplot as plt
%matplotlib inline

plt.rcParams['figure.figsize'] = (10,10)
np.set_printoptions(precision=2)

sys.path.insert(0, '../helper_modules/')
import jn_bc_helper as bc


Using Theano backend.

Reproducible Research


In [2]:
%%python
import os
os.system('python -V')
os.system('python ../helper_modules/Package_Versions.py')


scipy:           0.19.0
numpy:           1.12.1
matplotlib:      2.0.0
sklearn:         0.18.1
skimage:         0.13.0
theano:          0.9.0.dev-c697eeab84e5b8a74908da654b66ec9eca4f1291
tensorflow:      0.10.0
keras:           2.0.3
Python 2.7.13 :: Continuum Analytics, Inc.
Using Theano backend.

In [3]:
SEED = 7
np.random.seed(SEED)

CURR_DIR  = os.getcwd()
DATA_DIR  = '/Users/jnarhan/Dropbox/Breast_Cancer_Data/Data_Thresholded/ALL_IMGS/'
AUG_DIR   = '/Users/jnarhan/Dropbox/Breast_Cancer_Data/Data_Thresholded/AUG_DETECT_IMGS/'
meta_file = '../../Meta_Data_Files/meta_data_all.csv'
PATHO_INX = 5    # Column number of detected label in meta_file
FILE_INX  = 1    # Column number of File name in meta_file

meta_data, _ = tqdm( bc.load_meta(meta_file, patho_idx=PATHO_INX, file_idx=FILE_INX,
                                  balanceByRemoval=False, verbose=False) )

# Minor addition to reserve records in meta data for which we actually have images:
meta_data = bc.clean_meta(meta_data, DATA_DIR)


100%|██████████| 2/2 [00:00<00:00, 19239.93it/s]
Number of entries in incoming meta_data: 5822
Images found: 5251
Images missing: 571
Number of entries of outgoing meta_data: 5251


In [4]:
bc.pprint('Loading data')
cats = bc.bcLabels(['normal', 'abnormal'])

# For smaller images supply tuple argument for a parameter 'imgResize':
# X_data, Y_data = bc.load_data(meta_data, DATA_DIR, cats, imgResize=(150,150)) 
X_data, Y_data = tqdm( bc.load_data(meta_data, DATA_DIR, cats) )

cls_cnts = bc.get_clsCnts(Y_data, cats)
bc.pprint('Before Balancing')
for k in cls_cnts:
    print '{0:10}: {1}'.format(k, cls_cnts[k])


------------
Loading data
------------
100%|██████████| 2/2 [00:00<00:00, 31300.78it/s]
----------------
Before Balancing
----------------
abnormal  : 1732
normal    : 3519

Class Balancing

Here - I look at a modified version of SMOTE, growing the under-represented class via synthetic augmentation, until there is a balance among the categories:


In [5]:
datagen = ImageDataGenerator(rotation_range=5, width_shift_range=.01, height_shift_range=0.01,
                             data_format='channels_first')

In [6]:
X_data, Y_data = bc.balanceViaSmote(cls_cnts, meta_data, DATA_DIR, AUG_DIR, cats, 
                                    datagen, X_data, Y_data, seed=SEED, verbose=True)


---------------
After Balancing
---------------
abnormal  : 3519
normal    : 3519

Create the Training and Test Datasets


In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data,
                                                    test_size=0.25,
                                                    random_state=SEED,
                                                    stratify=zip(*Y_data)[0])

print 'Size of X_train: {:>5}'.format(len(X_train))
print 'Size of X_test: {:>5}'.format(len(X_test))
print 'Size of Y_train: {:>5}'.format(len(Y_train))
print 'Size of Y_test: {:>5}'.format(len(Y_test))

print X_train.shape
print X_test.shape
print Y_train.shape
print Y_test.shape

data = [X_train, X_test, Y_train, Y_test]


Size of X_train:  5278
Size of X_test:  1760
Size of Y_train:  5278
Size of Y_test:  1760
(5278, 255, 255)
(1760, 255, 255)
(5278, 1)
(1760, 1)

Support Vector Machine Model


In [8]:
X_train_svm = X_train.reshape( (X_train.shape[0], -1)) 
X_test_svm  = X_test.reshape( (X_test.shape[0], -1))

In [9]:
SVM_model = SVC(gamma=0.001)
SVM_model.fit( X_train_svm, Y_train)


/Users/jnarhan/miniconda2/envs/bc_venv/lib/python2.7/site-packages/sklearn/utils/validation.py:526: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
Out[9]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [10]:
predictOutput = SVM_model.predict(X_test_svm)
svm_acc = metrics.accuracy_score(y_true=Y_test, y_pred=predictOutput)

print 'SVM Accuracy: {: >7.2f}%'.format(svm_acc * 100)
print 'SVM Error: {: >10.2f}%'.format(100 - svm_acc * 100)


SVM Accuracy:   73.92%
SVM Error:      26.08%

In [11]:
svm_matrix = skm.confusion_matrix(y_true=Y_test, y_pred=predictOutput)
numBC = bc.reverseDict(cats)
class_names = numBC.values()

plt.figure(figsize=(8,6))
bc.plot_confusion_matrix(svm_matrix, classes=class_names, normalize=True, 
                         title='SVM Normalized Confusion Matrix Using Thresholded \n')
plt.tight_layout()
plt.savefig('../../figures/jn_SVM_Detect_Threshold_CM_20170609.png', dpi=100)


Normalized confusion matrix
[[ 0.59  0.41]
 [ 0.11  0.89]]

In [12]:
plt.figure(figsize=(8,6))
bc.plot_confusion_matrix(svm_matrix, classes=class_names, normalize=False, 
                         title='SVM Raw Confusion Matrix Using Thresholded \n')
plt.tight_layout()


Confusion matrix, without normalization
[[517 363]
 [ 96 784]]

In [13]:
bc.cat_stats(svm_matrix)


Out[13]:
{'Accuracy': 73.92,
 'F1': 0.77,
 'NPV': 84.34,
 'PPV': 68.35,
 'Sensitivity': 89.09,
 'Specificity': 58.75}

CNN Modelling Using VGG16 in Transfer Learning


In [14]:
def VGG_Prep(img_data):
    """
    :param img_data: training or test images of shape [#images, height, width]
    :return: the array transformed to the correct shape for the VGG network
                shape = [#images, height, width, 3] transforms to rgb and reshapes
    """
    images = np.zeros([len(img_data), img_data.shape[1], img_data.shape[2], 3])
    for i in range(0, len(img_data)):
        im = (img_data[i] * 255)        # Original imagenet images were not rescaled
        im = color.gray2rgb(im)
        images[i] = im
    return(images)

In [15]:
def vgg16_bottleneck(data, modelPath, fn_train_feats, fn_train_lbls, fn_test_feats, fn_test_lbls):
    # Loading data
    X_train, X_test, Y_train, Y_test = data
    
    print('Preparing the Training Data for the VGG_16 Model.')
    X_train = VGG_Prep(X_train)
    print('Preparing the Test Data for the VGG_16 Model')
    X_test = VGG_Prep(X_test)
        
    print('Loading the VGG_16 Model')
    # "model" excludes top layer of VGG16:
    model = applications.VGG16(include_top=False, weights='imagenet') 
        
    # Generating the bottleneck features for the training data
    print('Evaluating the VGG_16 Model on the Training Data')
    bottleneck_features_train = model.predict(X_train)
    
    # Saving the bottleneck features for the training data
    featuresTrain = os.path.join(modelPath, fn_train_feats)
    labelsTrain = os.path.join(modelPath, fn_train_lbls)
    print('Saving the Training Data Bottleneck Features.')
    np.save(open(featuresTrain, 'wb'), bottleneck_features_train)
    np.save(open(labelsTrain, 'wb'), Y_train)

    # Generating the bottleneck features for the test data
    print('Evaluating the VGG_16 Model on the Test Data')
    bottleneck_features_test = model.predict(X_test)
    
    # Saving the bottleneck features for the test data
    featuresTest = os.path.join(modelPath, fn_test_feats)
    labelsTest = os.path.join(modelPath, fn_test_lbls)
    print('Saving the Test Data Bottleneck Feaures.')
    np.save(open(featuresTest, 'wb'), bottleneck_features_test)
    np.save(open(labelsTest, 'wb'), Y_test)

In [16]:
# Locations for the bottleneck and labels files that we need
train_bottleneck = '2Class_VGG16_bottleneck_features_train_threshold.npy'
train_labels     = '2Class_VGG16_labels_train_threshold.npy'
test_bottleneck  = '2Class_VGG16_bottleneck_features_test_threshold.npy'
test_labels      = '2Class_VGG16_labels_test_threshold.npy'
modelPath = os.getcwd()

top_model_weights_path = './weights/'

np.random.seed(SEED)
vgg16_bottleneck(data, modelPath, train_bottleneck, train_labels, test_bottleneck, test_labels)


Preparing the Training Data for the VGG_16 Model.
Preparing the Test Data for the VGG_16 Model
Loading the VGG_16 Model
Evaluating the VGG_16 Model on the Training Data
Saving the Training Data Bottleneck Features.
Evaluating the VGG_16 Model on the Test Data
Saving the Test Data Bottleneck Feaures.

In [17]:
def train_top_model(train_feats, train_lab, test_feats, test_lab, model_path, model_save, epoch = 50, batch = 64):
    start_time = time.time()
    
    train_bottleneck = os.path.join(model_path, train_feats)
    train_labels = os.path.join(model_path, train_lab)
    test_bottleneck = os.path.join(model_path, test_feats)
    test_labels = os.path.join(model_path, test_lab)
    
    history = bc.LossHistory()
    
    X_train = np.load(train_bottleneck)
    Y_train = np.load(train_labels)
    Y_train = np_utils.to_categorical(Y_train, num_classes=2)
    
    X_test = np.load(test_bottleneck)
    Y_test = np.load(test_labels)
    Y_test = np_utils.to_categorical(Y_test, num_classes=2)

    model = Sequential()
    model.add(Flatten(input_shape=X_train.shape[1:]))
    model.add( Dropout(0.7))
    
    model.add( Dense(256, activation='relu', kernel_constraint= maxnorm(3.)) )
    model.add( Dropout(0.5))
    
    # Softmax for probabilities for each class at the output layer
    model.add( Dense(2, activation='softmax'))
    
    model.compile(optimizer='rmsprop',  # adadelta
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])

    model.fit(X_train, Y_train,
              epochs=epoch,
              batch_size=batch,
              callbacks=[history],
              validation_data=(X_test, Y_test),
              verbose=2)
    
    print "Training duration : {0}".format(time.time() - start_time)
    score = model.evaluate(X_test, Y_test, batch_size=16, verbose=2)

    print "Network's test score [loss, accuracy]: {0}".format(score)
    print 'CNN Error: {:.2f}%'.format(100 - score[1] * 100)
    
    bc.save_model(model_save, model, "jn_VGG16_Detection_top_weights_threshold.h5")
    
    return model, history.losses, history.acc, score

In [18]:
np.random.seed(SEED)
(trans_model, loss_cnn, acc_cnn, test_score_cnn) = train_top_model(train_feats=train_bottleneck,
                                                                   train_lab=train_labels, 
                                                                   test_feats=test_bottleneck, 
                                                                   test_lab=test_labels,
                                                                   model_path=modelPath, 
                                                                   model_save=top_model_weights_path,
                                                                   epoch=100)
plt.figure(figsize=(10,10))
bc.plot_losses(loss_cnn, acc_cnn)
plt.savefig('../../figures/epoch_figures/jn_Transfer_Detection_Learning_Threshold_20170609.png', dpi=100)


Train on 5278 samples, validate on 1760 samples
Epoch 1/100
14s - loss: 5.7267 - acc: 0.6252 - val_loss: 5.2490 - val_acc: 0.6591
Epoch 2/100
13s - loss: 5.3294 - acc: 0.6576 - val_loss: 4.8490 - val_acc: 0.6886
Epoch 3/100
13s - loss: 5.1174 - acc: 0.6724 - val_loss: 5.8314 - val_acc: 0.6256
Epoch 4/100
13s - loss: 4.9663 - acc: 0.6825 - val_loss: 4.9110 - val_acc: 0.6864
Epoch 5/100
13s - loss: 4.7598 - acc: 0.6936 - val_loss: 4.8388 - val_acc: 0.6886
Epoch 6/100
13s - loss: 4.7830 - acc: 0.6950 - val_loss: 5.1348 - val_acc: 0.6744
Epoch 7/100
14s - loss: 4.7952 - acc: 0.6940 - val_loss: 4.9695 - val_acc: 0.6852
Epoch 8/100
14s - loss: 4.7580 - acc: 0.6967 - val_loss: 5.0219 - val_acc: 0.6818
Epoch 9/100
15s - loss: 4.5918 - acc: 0.7075 - val_loss: 4.6551 - val_acc: 0.7023
Epoch 10/100
17s - loss: 4.8876 - acc: 0.6908 - val_loss: 5.2117 - val_acc: 0.6710
Epoch 11/100
18s - loss: 4.9263 - acc: 0.6862 - val_loss: 4.8213 - val_acc: 0.6949
Epoch 12/100
18s - loss: 4.7827 - acc: 0.6963 - val_loss: 4.6151 - val_acc: 0.7057
Epoch 13/100
19s - loss: 4.6472 - acc: 0.7035 - val_loss: 4.8000 - val_acc: 0.6949
Epoch 14/100
19s - loss: 4.7158 - acc: 0.7018 - val_loss: 4.4474 - val_acc: 0.7148
Epoch 15/100
19s - loss: 4.5537 - acc: 0.7118 - val_loss: 4.3341 - val_acc: 0.7261
Epoch 16/100
20s - loss: 4.6894 - acc: 0.7016 - val_loss: 4.5030 - val_acc: 0.7153
Epoch 17/100
20s - loss: 4.5637 - acc: 0.7101 - val_loss: 4.3482 - val_acc: 0.7239
Epoch 18/100
20s - loss: 4.4949 - acc: 0.7150 - val_loss: 4.2990 - val_acc: 0.7284
Epoch 19/100
21s - loss: 4.3897 - acc: 0.7226 - val_loss: 4.2779 - val_acc: 0.7290
Epoch 20/100
20s - loss: 4.6561 - acc: 0.7058 - val_loss: 4.5684 - val_acc: 0.7119
Epoch 21/100
21s - loss: 4.9442 - acc: 0.6881 - val_loss: 4.6287 - val_acc: 0.7080
Epoch 22/100
21s - loss: 4.5427 - acc: 0.7126 - val_loss: 4.3526 - val_acc: 0.7239
Epoch 23/100
21s - loss: 4.5540 - acc: 0.7120 - val_loss: 4.3341 - val_acc: 0.7256
Epoch 24/100
21s - loss: 4.4689 - acc: 0.7175 - val_loss: 4.4848 - val_acc: 0.7165
Epoch 25/100
21s - loss: 4.6980 - acc: 0.7035 - val_loss: 4.4559 - val_acc: 0.7199
Epoch 26/100
21s - loss: 4.4066 - acc: 0.7228 - val_loss: 4.5336 - val_acc: 0.7142
Epoch 27/100
21s - loss: 4.3630 - acc: 0.7245 - val_loss: 4.6135 - val_acc: 0.7080
Epoch 28/100
21s - loss: 4.4101 - acc: 0.7211 - val_loss: 4.8067 - val_acc: 0.6972
Epoch 29/100
21s - loss: 4.4633 - acc: 0.7186 - val_loss: 4.6489 - val_acc: 0.7068
Epoch 30/100
21s - loss: 4.5355 - acc: 0.7152 - val_loss: 4.5362 - val_acc: 0.7136
Epoch 31/100
22s - loss: 4.4949 - acc: 0.7167 - val_loss: 4.6140 - val_acc: 0.7085
Epoch 32/100
22s - loss: 4.3848 - acc: 0.7245 - val_loss: 4.5827 - val_acc: 0.7102
Epoch 33/100
22s - loss: 4.4578 - acc: 0.7209 - val_loss: 4.5072 - val_acc: 0.7153
Epoch 34/100
22s - loss: 4.4777 - acc: 0.7183 - val_loss: 4.6498 - val_acc: 0.7068
Epoch 35/100
22s - loss: 4.3249 - acc: 0.7272 - val_loss: 4.4018 - val_acc: 0.7205
Epoch 36/100
22s - loss: 4.2786 - acc: 0.7302 - val_loss: 4.4123 - val_acc: 0.7222
Epoch 37/100
22s - loss: 4.4743 - acc: 0.7181 - val_loss: 4.5300 - val_acc: 0.7153
Epoch 38/100
22s - loss: 4.4168 - acc: 0.7221 - val_loss: 4.6666 - val_acc: 0.7063
Epoch 39/100
22s - loss: 4.7201 - acc: 0.7031 - val_loss: 4.6501 - val_acc: 0.7080
Epoch 40/100
22s - loss: 4.5600 - acc: 0.7137 - val_loss: 4.5059 - val_acc: 0.7170
Epoch 41/100
23s - loss: 4.6573 - acc: 0.7071 - val_loss: 4.3648 - val_acc: 0.7256
Epoch 42/100
23s - loss: 4.3145 - acc: 0.7293 - val_loss: 4.5036 - val_acc: 0.7182
Epoch 43/100
23s - loss: 4.3311 - acc: 0.7275 - val_loss: 4.5257 - val_acc: 0.7148
Epoch 44/100
23s - loss: 4.5108 - acc: 0.7173 - val_loss: 4.3432 - val_acc: 0.7267
Epoch 45/100
23s - loss: 4.5146 - acc: 0.7156 - val_loss: 4.9583 - val_acc: 0.6898
Epoch 46/100
23s - loss: 4.4457 - acc: 0.7203 - val_loss: 4.2961 - val_acc: 0.7273
Epoch 47/100
23s - loss: 4.3867 - acc: 0.7236 - val_loss: 4.2498 - val_acc: 0.7318
Epoch 48/100
23s - loss: 4.3742 - acc: 0.7247 - val_loss: 4.3054 - val_acc: 0.7301
Epoch 49/100
23s - loss: 4.2645 - acc: 0.7321 - val_loss: 4.2390 - val_acc: 0.7330
Epoch 50/100
23s - loss: 4.3345 - acc: 0.7274 - val_loss: 4.4028 - val_acc: 0.7205
Epoch 51/100
23s - loss: 4.5126 - acc: 0.7169 - val_loss: 4.6485 - val_acc: 0.7091
Epoch 52/100
23s - loss: 4.4655 - acc: 0.7192 - val_loss: 4.4272 - val_acc: 0.7227
Epoch 53/100
23s - loss: 4.3646 - acc: 0.7251 - val_loss: 4.5062 - val_acc: 0.7165
Epoch 54/100
23s - loss: 4.3136 - acc: 0.7285 - val_loss: 4.6003 - val_acc: 0.7102
Epoch 55/100
23s - loss: 4.4375 - acc: 0.7209 - val_loss: 4.6004 - val_acc: 0.7119
Epoch 56/100
23s - loss: 4.4255 - acc: 0.7217 - val_loss: 4.5380 - val_acc: 0.7153
Epoch 57/100
23s - loss: 4.3314 - acc: 0.7270 - val_loss: 4.5279 - val_acc: 0.7153
Epoch 58/100
23s - loss: 4.2601 - acc: 0.7319 - val_loss: 4.5641 - val_acc: 0.7142
Epoch 59/100
23s - loss: 4.4226 - acc: 0.7219 - val_loss: 4.5918 - val_acc: 0.7125
Epoch 60/100
23s - loss: 4.4450 - acc: 0.7213 - val_loss: 4.7961 - val_acc: 0.6994
Epoch 61/100
23s - loss: 4.6456 - acc: 0.7080 - val_loss: 4.4778 - val_acc: 0.7182
Epoch 62/100
23s - loss: 4.3576 - acc: 0.7260 - val_loss: 4.5448 - val_acc: 0.7148
Epoch 63/100
23s - loss: 4.4067 - acc: 0.7238 - val_loss: 4.4444 - val_acc: 0.7216
Epoch 64/100
23s - loss: 4.3787 - acc: 0.7245 - val_loss: 4.4806 - val_acc: 0.7188
Epoch 65/100
23s - loss: 4.2574 - acc: 0.7319 - val_loss: 4.6544 - val_acc: 0.7068
Epoch 66/100
23s - loss: 4.4041 - acc: 0.7239 - val_loss: 4.3753 - val_acc: 0.7227
Epoch 67/100
23s - loss: 4.2477 - acc: 0.7329 - val_loss: 4.6110 - val_acc: 0.7097
Epoch 68/100
24s - loss: 4.3006 - acc: 0.7296 - val_loss: 4.4215 - val_acc: 0.7222
Epoch 69/100
24s - loss: 4.3274 - acc: 0.7275 - val_loss: 4.5474 - val_acc: 0.7131
Epoch 70/100
24s - loss: 4.3023 - acc: 0.7304 - val_loss: 4.4578 - val_acc: 0.7205
Epoch 71/100
23s - loss: 4.1568 - acc: 0.7389 - val_loss: 4.1927 - val_acc: 0.7369
Epoch 72/100
23s - loss: 4.3908 - acc: 0.7228 - val_loss: 4.4159 - val_acc: 0.7233
Epoch 73/100
23s - loss: 4.1861 - acc: 0.7370 - val_loss: 4.2766 - val_acc: 0.7301
Epoch 74/100
23s - loss: 4.2933 - acc: 0.7298 - val_loss: 4.4892 - val_acc: 0.7176
Epoch 75/100
23s - loss: 4.4432 - acc: 0.7207 - val_loss: 4.4753 - val_acc: 0.7193
Epoch 76/100
24s - loss: 4.2906 - acc: 0.7294 - val_loss: 4.3441 - val_acc: 0.7284
Epoch 77/100
23s - loss: 4.1963 - acc: 0.7359 - val_loss: 4.3467 - val_acc: 0.7273
Epoch 78/100
23s - loss: 4.1900 - acc: 0.7359 - val_loss: 4.2620 - val_acc: 0.7312
Epoch 79/100
24s - loss: 4.2915 - acc: 0.7304 - val_loss: 4.4874 - val_acc: 0.7176
Epoch 80/100
24s - loss: 4.5098 - acc: 0.7169 - val_loss: 4.4841 - val_acc: 0.7188
Epoch 81/100
24s - loss: 4.2185 - acc: 0.7353 - val_loss: 4.4945 - val_acc: 0.7165
Epoch 82/100
24s - loss: 4.3799 - acc: 0.7245 - val_loss: 4.5305 - val_acc: 0.7148
Epoch 83/100
24s - loss: 4.3515 - acc: 0.7264 - val_loss: 4.3067 - val_acc: 0.7295
Epoch 84/100
24s - loss: 4.2079 - acc: 0.7347 - val_loss: 4.4240 - val_acc: 0.7199
Epoch 85/100
24s - loss: 4.4279 - acc: 0.7217 - val_loss: 4.4240 - val_acc: 0.7216
Epoch 86/100
24s - loss: 4.2315 - acc: 0.7334 - val_loss: 4.2492 - val_acc: 0.7318
Epoch 87/100
24s - loss: 4.0597 - acc: 0.7444 - val_loss: 4.6020 - val_acc: 0.7108
Epoch 88/100
24s - loss: 4.4325 - acc: 0.7215 - val_loss: 4.1997 - val_acc: 0.7369
Epoch 89/100
24s - loss: 4.1394 - acc: 0.7401 - val_loss: 4.1045 - val_acc: 0.7420
Epoch 90/100
24s - loss: 4.2818 - acc: 0.7308 - val_loss: 4.1728 - val_acc: 0.7375
Epoch 91/100
24s - loss: 4.1001 - acc: 0.7425 - val_loss: 4.2634 - val_acc: 0.7330
Epoch 92/100
24s - loss: 4.1790 - acc: 0.7378 - val_loss: 4.3711 - val_acc: 0.7250
Epoch 93/100
24s - loss: 4.2280 - acc: 0.7344 - val_loss: 4.0411 - val_acc: 0.7426
Epoch 94/100
24s - loss: 4.2374 - acc: 0.7338 - val_loss: 4.0740 - val_acc: 0.7438
Epoch 95/100
24s - loss: 4.1013 - acc: 0.7421 - val_loss: 3.9726 - val_acc: 0.7500
Epoch 96/100
24s - loss: 4.1247 - acc: 0.7401 - val_loss: 4.0470 - val_acc: 0.7460
Epoch 97/100
24s - loss: 4.1801 - acc: 0.7372 - val_loss: 4.5653 - val_acc: 0.7119
Epoch 98/100
24s - loss: 4.2860 - acc: 0.7308 - val_loss: 4.2715 - val_acc: 0.7318
Epoch 99/100
24s - loss: 4.1986 - acc: 0.7357 - val_loss: 4.4534 - val_acc: 0.7205
Epoch 100/100
24s - loss: 4.2011 - acc: 0.7363 - val_loss: 4.1331 - val_acc: 0.7403
Training duration : 2220.00713396
Network's test score [loss, accuracy]: [4.1330553086448045, 0.74034090909090911]
CNN Error: 25.97%
Model and Weights Saved to Disk
<matplotlib.figure.Figure at 0x1154b10d0>

In [19]:
print 'Transfer Learning CNN Accuracy: {: >7.2f}%'.format(test_score_cnn[1] * 100)
print 'Transfer Learning CNN Error: {: >10.2f}%'.format(100 - test_score_cnn[1] * 100)

predictOutput = bc.predict(trans_model, np.load(test_bottleneck))
trans_matrix = skm.confusion_matrix(y_true=Y_test, y_pred=predictOutput)

plt.figure(figsize=(8,6))
bc.plot_confusion_matrix(trans_matrix, classes=class_names, normalize=True,
                         title='Transfer CNN Normalized Confusion Matrix Using Thresholded \n')
plt.tight_layout()
plt.savefig('../../figures/jn_Transfer_Detection_CM_Threshold_20170609.png', dpi=100)


Transfer Learning CNN Accuracy:   74.03%
Transfer Learning CNN Error:      25.97%
Normalized confusion matrix
[[ 0.72  0.28]
 [ 0.24  0.76]]

In [20]:
plt.figure(figsize=(8,6))
bc.plot_confusion_matrix(trans_matrix, classes=class_names, normalize=False,
                         title='Transfer CNN Raw Confusion Matrix Using Thresholded \n')
plt.tight_layout()


Confusion matrix, without normalization
[[636 244]
 [213 667]]

In [21]:
bc.cat_stats(trans_matrix)


Out[21]:
{'Accuracy': 74.03,
 'F1': 0.74,
 'NPV': 74.91,
 'PPV': 73.22,
 'Sensitivity': 75.8,
 'Specificity': 72.27}

Core CNN Modelling

Prep and package the data for Keras processing:


In [22]:
data = [X_train, X_test, Y_train, Y_test]
X_train, X_test, Y_train, Y_test = bc.prep_data(data, cats)
data = [X_train, X_test, Y_train, Y_test]

print X_train.shape
print X_test.shape
print Y_train.shape
print Y_test.shape


Prep data for NNs ...
Data Prepped for Neural Nets.
(5278, 1, 255, 255)
(1760, 1, 255, 255)
(5278, 2)
(1760, 2)

Heavy Regularization


In [23]:
def diff_model_v7_reg(numClasses, input_shape=(3, 150,150), add_noise=False, noise=0.01, verbose=False):
    model = Sequential()
    if (add_noise):
        model.add( GaussianNoise(noise, input_shape=input_shape))
        model.add( Convolution2D(filters=16, 
                                 kernel_size=(5,5), 
                                 data_format='channels_first',
                                 padding='same',
                                 activation='relu'))
    else:
        model.add( Convolution2D(filters=16, 
                                 kernel_size=(5,5), 
                                 data_format='channels_first',
                                 padding='same',
                                 activation='relu',
                                 input_shape=input_shape))
    model.add( Dropout(0.7))
    
    model.add( Convolution2D(filters=32, kernel_size=(3,3), 
                             data_format='channels_first', padding='same', activation='relu'))
    model.add( MaxPooling2D(pool_size= (2,2), data_format='channels_first'))
    model.add( Dropout(0.4))
    model.add( Convolution2D(filters=32, kernel_size=(3,3), 
                             data_format='channels_first', activation='relu'))
    
    model.add( Convolution2D(filters=64, kernel_size=(3,3), 
                             data_format='channels_first', padding='same', activation='relu',
                             kernel_regularizer=regularizers.l2(0.01)))
    model.add( MaxPooling2D(pool_size= (2,2), data_format='channels_first'))
    model.add( Convolution2D(filters=64, kernel_size=(3,3), 
                             data_format='channels_first', activation='relu',
                             kernel_regularizer=regularizers.l2(0.01)))
    model.add( Dropout(0.4))
    
    model.add( Convolution2D(filters=128, kernel_size=(3,3), 
                             data_format='channels_first', padding='same', activation='relu',
                             kernel_regularizer=regularizers.l2(0.01)))
    model.add( MaxPooling2D(pool_size= (2,2), data_format='channels_first'))
    
    model.add( Convolution2D(filters=128, kernel_size=(3,3), 
                             data_format='channels_first', activation='relu',
                             kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dropout(0.4))
    
    model.add( Flatten())
    
    model.add( Dense(128, activation='relu', kernel_constraint= maxnorm(3.)) )
    model.add( Dropout(0.4))
    
    model.add( Dense(64, activation='relu', kernel_constraint= maxnorm(3.)) )
    model.add( Dropout(0.4))
    
    # Softmax for probabilities for each class at the output layer
    model.add( Dense(numClasses, activation='softmax'))
    
    if verbose:
        print( model.summary() )
    
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    return model

In [24]:
diff_model7_noise_reg = diff_model_v7_reg(len(cats),
                                          input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3]),
                                          add_noise=True, verbose=True)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
gaussian_noise_1 (GaussianNo (None, 1, 255, 255)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 255, 255)      416       
_________________________________________________________________
dropout_3 (Dropout)          (None, 16, 255, 255)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 255, 255)      4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 127, 127)      0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 32, 127, 127)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 125, 125)      9248      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 64, 125, 125)      18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 64, 62, 62)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 64, 60, 60)        36928     
_________________________________________________________________
dropout_5 (Dropout)          (None, 64, 60, 60)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 128, 60, 60)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 128, 30, 30)       0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 128, 28, 28)       147584    
_________________________________________________________________
dropout_6 (Dropout)          (None, 128, 28, 28)       0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 100352)            0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               12845184  
_________________________________________________________________
dropout_7 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_8 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 130       
=================================================================
Total params: 13,144,738
Trainable params: 13,144,738
Non-trainable params: 0
_________________________________________________________________
None

In [25]:
np.random.seed(SEED)

(cnn_model, loss_cnn, acc_cnn, test_score_cnn) = bc.run_network(model=diff_model7_noise_reg, 
                                                                data=data, 
                                                                epochs=50, batch=64)
plt.figure(figsize=(10,10))
bc.plot_losses(loss_cnn, acc_cnn)
plt.savefig('../../figures/epoch_figures/jn_Core_CNN_Detection_Learning_Threshold_20170609.png', dpi=100)


Training model...
Train on 5278 samples, validate on 1760 samples
Epoch 1/50
1565s - loss: 1.6386 - acc: 0.5371 - val_loss: 0.7872 - val_acc: 0.5000
Epoch 2/50
1551s - loss: 0.6963 - acc: 0.5915 - val_loss: 0.7163 - val_acc: 0.4790
Epoch 3/50
1546s - loss: 0.6660 - acc: 0.6167 - val_loss: 0.6758 - val_acc: 0.6102
Epoch 4/50
1550s - loss: 0.6526 - acc: 0.6347 - val_loss: 0.6747 - val_acc: 0.6159
Epoch 5/50
1543s - loss: 0.6469 - acc: 0.6340 - val_loss: 0.6961 - val_acc: 0.5335
Epoch 6/50
1529s - loss: 0.6384 - acc: 0.6461 - val_loss: 0.6494 - val_acc: 0.6381
Epoch 7/50
1544s - loss: 0.6274 - acc: 0.6548 - val_loss: 0.6914 - val_acc: 0.5580
Epoch 8/50
1556s - loss: 0.6243 - acc: 0.6626 - val_loss: 0.6869 - val_acc: 0.6000
Epoch 9/50
1566s - loss: 0.6209 - acc: 0.6599 - val_loss: 0.6707 - val_acc: 0.6227
Epoch 10/50
1568s - loss: 0.6121 - acc: 0.6673 - val_loss: 0.6460 - val_acc: 0.6364
Epoch 11/50
1567s - loss: 0.6097 - acc: 0.6711 - val_loss: 0.6654 - val_acc: 0.6256
Epoch 12/50
1568s - loss: 0.6052 - acc: 0.6701 - val_loss: 0.6691 - val_acc: 0.6108
Epoch 13/50
1574s - loss: 0.6018 - acc: 0.6813 - val_loss: 0.6726 - val_acc: 0.5920
Epoch 14/50
1569s - loss: 0.5940 - acc: 0.6768 - val_loss: 0.6661 - val_acc: 0.6148
Epoch 15/50
1566s - loss: 0.5902 - acc: 0.6914 - val_loss: 0.6853 - val_acc: 0.5881
Epoch 16/50
1573s - loss: 0.5865 - acc: 0.6904 - val_loss: 0.6118 - val_acc: 0.6676
Epoch 17/50
1572s - loss: 0.5816 - acc: 0.6855 - val_loss: 0.6333 - val_acc: 0.6540
Epoch 18/50
1568s - loss: 0.5822 - acc: 0.6970 - val_loss: 0.6404 - val_acc: 0.6585
Epoch 19/50
1573s - loss: 0.5757 - acc: 0.6997 - val_loss: 0.6342 - val_acc: 0.6585
Epoch 20/50
1568s - loss: 0.5769 - acc: 0.6976 - val_loss: 0.6224 - val_acc: 0.6528
Epoch 21/50
1565s - loss: 0.5741 - acc: 0.7067 - val_loss: 0.6395 - val_acc: 0.6494
Epoch 22/50
1569s - loss: 0.5710 - acc: 0.7025 - val_loss: 0.5998 - val_acc: 0.6744
Epoch 23/50
1570s - loss: 0.5650 - acc: 0.7067 - val_loss: 0.6515 - val_acc: 0.6273
Epoch 24/50
1566s - loss: 0.5623 - acc: 0.7067 - val_loss: 0.6551 - val_acc: 0.6199
Epoch 25/50
1566s - loss: 0.5602 - acc: 0.7143 - val_loss: 0.7139 - val_acc: 0.5494
Epoch 26/50
1562s - loss: 0.5519 - acc: 0.7162 - val_loss: 0.6133 - val_acc: 0.6710
Epoch 27/50
1572s - loss: 0.5509 - acc: 0.7222 - val_loss: 0.6069 - val_acc: 0.6960
Epoch 28/50
1566s - loss: 0.5455 - acc: 0.7217 - val_loss: 0.6435 - val_acc: 0.6335
Epoch 29/50
1568s - loss: 0.5441 - acc: 0.7241 - val_loss: 0.6751 - val_acc: 0.5858
Epoch 30/50
1570s - loss: 0.5383 - acc: 0.7257 - val_loss: 0.6978 - val_acc: 0.5608
Epoch 31/50
1573s - loss: 0.5426 - acc: 0.7232 - val_loss: 0.6146 - val_acc: 0.6773
Epoch 32/50
1570s - loss: 0.5306 - acc: 0.7351 - val_loss: 0.6273 - val_acc: 0.6557
Epoch 33/50
1570s - loss: 0.5201 - acc: 0.7410 - val_loss: 0.5972 - val_acc: 0.6955
Epoch 34/50
1566s - loss: 0.5169 - acc: 0.7442 - val_loss: 0.6109 - val_acc: 0.6699
Epoch 35/50
1555s - loss: 0.5216 - acc: 0.7455 - val_loss: 0.6532 - val_acc: 0.6273
Epoch 36/50
1554s - loss: 0.5122 - acc: 0.7459 - val_loss: 0.5804 - val_acc: 0.7017
Epoch 37/50
1566s - loss: 0.5101 - acc: 0.7463 - val_loss: 0.6626 - val_acc: 0.5903
Epoch 38/50
1556s - loss: 0.5005 - acc: 0.7569 - val_loss: 0.5918 - val_acc: 0.6756
Epoch 39/50
1566s - loss: 0.4957 - acc: 0.7537 - val_loss: 0.6018 - val_acc: 0.6847
Epoch 40/50
1568s - loss: 0.4920 - acc: 0.7531 - val_loss: 0.6680 - val_acc: 0.5824
Epoch 41/50
1567s - loss: 0.4928 - acc: 0.7577 - val_loss: 0.6303 - val_acc: 0.6273
Epoch 42/50
1572s - loss: 0.4862 - acc: 0.7541 - val_loss: 0.6277 - val_acc: 0.6301
Epoch 43/50
1563s - loss: 0.4867 - acc: 0.7548 - val_loss: 0.6659 - val_acc: 0.5943
Epoch 44/50
1559s - loss: 0.4752 - acc: 0.7692 - val_loss: 0.5641 - val_acc: 0.7057
Epoch 45/50
1559s - loss: 0.4762 - acc: 0.7687 - val_loss: 0.5629 - val_acc: 0.7017
Epoch 46/50
1565s - loss: 0.4569 - acc: 0.7808 - val_loss: 0.5978 - val_acc: 0.6722
Epoch 47/50
1567s - loss: 0.4697 - acc: 0.7740 - val_loss: 0.5706 - val_acc: 0.6943
Epoch 48/50
1569s - loss: 0.4533 - acc: 0.7798 - val_loss: 0.6840 - val_acc: 0.5403
Epoch 49/50
1565s - loss: 0.4575 - acc: 0.7776 - val_loss: 0.5793 - val_acc: 0.6955
Epoch 50/50
1571s - loss: 0.4480 - acc: 0.7798 - val_loss: 0.6192 - val_acc: 0.6466
Training duration : 78220.1503031
Network's test score [loss, accuracy]: [0.61915694339708849, 0.64659090909090911]
CNN Error: 35.34%
<matplotlib.figure.Figure at 0x115c21fd0>

In [26]:
bc.save_model(dir_path='./weights/', model=cnn_model, name='jn_Core_CNN_Detection_Threshold_20170609')


Model and Weights Saved to Disk

In [27]:
print 'Core CNN Accuracy: {: >7.2f}%'.format(test_score_cnn[1] * 100)
print 'Core CNN Error: {: >10.2f}%'.format(100 - test_score_cnn[1] * 100)

predictOutput = bc.predict(cnn_model, X_test)

cnn_matrix = skm.confusion_matrix(y_true=[val.argmax() for val in Y_test], y_pred=predictOutput)

plt.figure(figsize=(8,6))
bc.plot_confusion_matrix(cnn_matrix, classes=class_names, normalize=True,
                         title='CNN Normalized Confusion Matrix Using Thresholded \n')
plt.tight_layout()
plt.savefig('../../figures/jn_Core_CNN_Detection_CM_Threshold_20170609.png', dpi=100)


Core CNN Accuracy:   64.66%
Core CNN Error:      35.34%
Normalized confusion matrix
[[ 0.38  0.62]
 [ 0.08  0.92]]

In [28]:
plt.figure(figsize=(8,6))
bc.plot_confusion_matrix(cnn_matrix, classes=class_names, normalize=False,
                         title='CNN Raw Confusion Matrix Using Thresholded \n')
plt.tight_layout()


Confusion matrix, without normalization
[[330 550]
 [ 72 808]]

In [29]:
bc.cat_stats(cnn_matrix)


Out[29]:
{'Accuracy': 64.66,
 'F1': 0.72,
 'NPV': 82.09,
 'PPV': 59.5,
 'Sensitivity': 91.82,
 'Specificity': 37.5}

In [ ]: