In [1]:

    
import datetime
import Image
import gc
import numpy as np # pip install numpy --upgrade 
import os
import random
from scipy import misc
import string
import time
import sys
import sklearn.metrics as skm

# Set some Theano config before initializing
os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=cpu,floatX=float32,allow_gc=False,openmp=True"
import theano

# MatPlotLib - Setup for Jupyter notebook output
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt

# Our modules
import dwdii_bc_model_helper as bc
import bc_models as models

# And Keras so we can emit the version
import keras

random.seed(20275)
np.set_printoptions(precision=2)









    



/usr/lib/python2.7/dist-packages/scipy/lib/_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
/usr/lib/python2.7/dist-packages/scipy/lib/_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
/usr/lib/python2.7/dist-packages/scipy/lib/_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
/usr/lib/python2.7/dist-packages/scipy/lib/_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
/usr/lib/python2.7/dist-packages/scipy/lib/_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
/usr/lib/python2.7/dist-packages/scipy/lib/_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
/usr/lib/python2.7/dist-packages/scipy/lib/_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
/usr/lib/python2.7/dist-packages/scipy/lib/_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
Using Theano backend.



In [2]:

    
# Print some upfront version and config settings
print "Python v" + sys.version
print "Numpy v: " + np.__version__
print "keras v: " + keras.__version__
print "device:", theano.config.device
print "floatX:",  theano.config.floatX
print "mode:", theano.config.mode
print "openmp:", theano.config.openmp
print "allow_gc:", theano.config.allow_gc









    



Python v2.7.6 (default, Jun 22 2015, 17:58:13) 
[GCC 4.8.2]
Numpy v: 1.12.1
keras v: 1.0.3
device: cpu
floatX: float32
mode: FAST_RUN
openmp: True
allow_gc: False



In [3]:

    
imagePath = "/root/bc_data/ddsm-png.25"
#imagePath = "/root/bc_data/Data_Thresholded/DDSM"
trainImagePath = imagePath
trainDataPath = "../../data/ddsm_train.csv"

# 2 Class Experiment? 
# If not, then default is 3 class experiment
normalVsAbnormal = True
categories = bc.bcNumerics()
thesePathos = None #["benign", "malignant"]
if(normalVsAbnormal):
     categories = bc.bcNormVsAbnormNumerics()

#
# Simulated training data
#
#trainImagePath = "/root/bc_data/simulated_images"
#trainDataPath = "/root/bc_data/simulated_images/simulated_images.csv"
#trainImagePath = "/root/bc_data/simulated_images_new"
#trainDataPath = "/root/bc_data/simulated_images_new/simulated_images.csv"

# Test data is always from ddsm_test.csv
testDataPath = "../../data/ddsm_test.csv"
imgResize = (150, 150)



In [4]:

    
os.listdir('../../data')









    



/usr/local/lib/python2.7/dist-packages/IPython/core/formatters.py:92: DeprecationWarning: DisplayFormatter._ipython_display_formatter_default is deprecated: use @default decorator instead.
  def _ipython_display_formatter_default(self):
/usr/local/lib/python2.7/dist-packages/IPython/core/formatters.py:98: DeprecationWarning: DisplayFormatter._formatters_default is deprecated: use @default decorator instead.
  def _formatters_default(self):
/usr/local/lib/python2.7/dist-packages/IPython/core/formatters.py:677: DeprecationWarning: PlainTextFormatter._deferred_printers_default is deprecated: use @default decorator instead.
  def _deferred_printers_default(self):
/usr/local/lib/python2.7/dist-packages/IPython/core/formatters.py:669: DeprecationWarning: PlainTextFormatter._singleton_printers_default is deprecated: use @default decorator instead.
  def _singleton_printers_default(self):
/usr/local/lib/python2.7/dist-packages/IPython/core/formatters.py:672: DeprecationWarning: PlainTextFormatter._type_printers_default is deprecated: use @default decorator instead.
  def _type_printers_default(self):
/usr/local/lib/python2.7/dist-packages/IPython/core/formatters.py:669: DeprecationWarning: PlainTextFormatter._singleton_printers_default is deprecated: use @default decorator instead.
  def _singleton_printers_default(self):
/usr/local/lib/python2.7/dist-packages/IPython/core/formatters.py:672: DeprecationWarning: PlainTextFormatter._type_printers_default is deprecated: use @default decorator instead.
  def _type_printers_default(self):
/usr/local/lib/python2.7/dist-packages/IPython/core/formatters.py:677: DeprecationWarning: PlainTextFormatter._deferred_printers_default is deprecated: use @default decorator instead.
  def _deferred_printers_default(self):






    Out[4]:





['ddsm_test.csv', 'ddsm_train.csv', 'ddsm_val.csv', 'mias_all.csv']

Load Training and Test Data

In this section, the training/validation data is loaded. The load_data function pre-balances the data set by removing images from over-represented emotion classes.

Training Data



In [5]:

    
metaData, meta2, mCounts = bc.load_training_metadata(trainDataPath, balanceViaRemoval=True, verbose=True, 
                                                     normalVsAbnormal=normalVsAbnormal)









    



Raw Balance
----------------
abnormal 1270
normal 2685
balanaceViaRemoval.avgE: 1977
balanaceViaRemoval.theshold: 1977.0

After Balancing
----------------
abnormal 1270
normal 1623



In [6]:

    
# Actually load some representative data for model experimentation
maxData = len(metaData)
X_data, Y_data = bc.load_data(trainDataPath, trainImagePath, 
                              categories=categories,
                              maxData = maxData, 
                              verboseFreq = 50, 
                              imgResize=imgResize, 
                              thesePathos=thesePathos,
                              normalVsAbnormal=normalVsAbnormal)
print X_data.shape
print Y_data.shape









    



Raw Balance
----------------
abnormal 1270
normal 2685
balanaceViaRemoval.avgE: 1977
balanaceViaRemoval.theshold: 1977.0

After Balancing
----------------
abnormal 1270
normal 1623
0.0000: A_0152_1.RIGHT_MLO.LJPEG.png
0.0173: A_0229_1.RIGHT_CC.LJPEG.png
0.0346: B_3073_1.RIGHT_MLO.LJPEG.png
0.0518: C_0010_1.RIGHT_MLO.LJPEG.png
0.0691: A_0609_1.RIGHT_MLO.LJPEG.png
0.0864: B_3426_1.RIGHT_MLO.LJPEG.png
0.1037: A_0079_1.RIGHT_MLO.LJPEG.png
0.1210: B_3122_1.LEFT_MLO.LJPEG.png
0.1383: C_0108_1.LEFT_MLO.LJPEG.png
0.1555: C_0146_1.LEFT_CC.LJPEG.png
0.1728: A_0318_1.RIGHT_CC.LJPEG.png
0.1901: B_3659_1.RIGHT_CC.LJPEG.png
0.2074: C_0488_1.RIGHT_CC.LJPEG.png
0.2247: C_0015_1.RIGHT_MLO.LJPEG.png
0.2420: C_0065_1.RIGHT_MLO.LJPEG.png
0.2592: A_0294_1.LEFT_CC.LJPEG.png
0.2765: A_0247_1.LEFT_MLO.LJPEG.png
0.2938: B_3044_1.RIGHT_CC.LJPEG.png
0.3111: B_3033_1.LEFT_CC.LJPEG.png
0.3284: B_3085_1.LEFT_CC.LJPEG.png
0.3457: B_3166_1.RIGHT_CC.LJPEG.png
0.3629: B_3377_1.RIGHT_MLO.LJPEG.png
0.3802: C_0391_1.RIGHT_CC.LJPEG.png
0.3975: B_3067_1.RIGHT_CC.LJPEG.png
0.4148: A_0247_1.RIGHT_CC.LJPEG.png
0.4321: C_0392_1.RIGHT_MLO.LJPEG.png
0.4494: B_3098_1.LEFT_MLO.LJPEG.png
0.4666: A_0390_1.LEFT_CC.LJPEG.png
0.4839: B_3495_1.LEFT_CC.LJPEG.png
0.5012: C_0075_1.LEFT_CC.LJPEG.png
0.5185: C_0490_1.RIGHT_MLO.LJPEG.png
0.5358: A_0052_1.RIGHT_CC.LJPEG.png
0.5531: A_0209_1.LEFT_MLO.LJPEG.png
0.5703: C_0005_1.RIGHT_MLO.LJPEG.png
0.5876: A_1022_1.LEFT_CC.LJPEG.png
0.6049: C_0031_1.RIGHT_CC.LJPEG.png
0.6222: A_0072_1.RIGHT_MLO.LJPEG.png
0.6395: B_3024_1.LEFT_MLO.LJPEG.png
0.6568: B_3020_1.RIGHT_CC.LJPEG.png
0.6740: A_0584_1.RIGHT_MLO.LJPEG.png
0.6913: C_0186_1.LEFT_MLO.LJPEG.png
0.7086: C_0183_1.RIGHT_MLO.LJPEG.png
0.7259: A_0416_1.RIGHT_CC.LJPEG.png
0.7432: B_3471_1.LEFT_CC.LJPEG.png
0.7605: B_3109_1.RIGHT_CC.LJPEG.png
0.7777: B_3480_1.RIGHT_CC.LJPEG.png
0.7950: A_1027_1.LEFT_CC.LJPEG.png
0.8123: A_0304_1.RIGHT_CC.LJPEG.png
0.8296: C_0488_1.LEFT_MLO.LJPEG.png
0.8469: C_0102_1.RIGHT_MLO.LJPEG.png
0.8642: B_3142_1.LEFT_MLO.LJPEG.png
0.8814: C_0388_1.LEFT_CC.LJPEG.png
0.8987: A_0490_1.LEFT_MLO.LJPEG.png
0.9160: C_0329_1.RIGHT_CC.LJPEG.png
0.9333: B_3502_1.LEFT_CC.LJPEG.png
0.9506: A_0043_1.LEFT_CC.LJPEG.png
0.9679: B_3457_1.LEFT_MLO.LJPEG.png
0.9851: A_0201_1.LEFT_CC.LJPEG.png
(2893, 150, 150)
(2893, 1)

Load Test Set



In [7]:

    
# Actually load some representative data for model experimentation
maxData = len(metaData)
X_test, Y_test = bc.load_data(testDataPath, imagePath, 
                              categories=categories,
                              maxData = maxData, 
                              verboseFreq = 50, 
                              imgResize=imgResize, 
                              thesePathos=thesePathos,
                              normalVsAbnormal=normalVsAbnormal)
print X_test.shape
print Y_test.shape









    



Raw Balance
----------------
abnormal 321
normal 658
balanaceViaRemoval.avgE: 489
balanaceViaRemoval.theshold: 489.0

After Balancing
----------------
abnormal 321
normal 405
0.0000: B_3380_1.RIGHT_MLO.LJPEG.png
0.0173: C_0082_1.LEFT_CC.LJPEG.png
0.0346: A_0359_1.LEFT_CC.LJPEG.png
0.0518: B_3092_1.LEFT_MLO.LJPEG.png
0.0691: B_3664_1.LEFT_MLO.LJPEG.png
0.0864: A_0070_1.LEFT_CC.LJPEG.png
0.1037: A_0567_1.RIGHT_MLO.LJPEG.png
0.1210: A_0343_1.LEFT_MLO.LJPEG.png
0.1383: A_1019_1.LEFT_MLO.LJPEG.png
0.1555: B_3681_1.RIGHT_CC.LJPEG.png
0.1728: A_1058_1.RIGHT_MLO.LJPEG.png
0.1901: A_0135_1.RIGHT_MLO.LJPEG.png
0.2074: B_3632_1.RIGHT_MLO.LJPEG.png
0.2247: A_0116_1.RIGHT_CC.LJPEG.png
0.2420: B_3039_1.LEFT_MLO.LJPEG.png
(726, 150, 150)
(726, 1)

Transformations

In this section, we will apply transformations to the existing images to increase of training data, as well as add a bit of noise in the hopes of improving the overall training activities.



In [8]:

    
imgDataGenCount = 3
transformCount = imgDataGenCount

newImgs = np.zeros([X_data.shape[0] * transformCount, X_data.shape[1], X_data.shape[2]])
newYs = np.zeros([Y_data.shape[0] * transformCount, Y_data.shape[1]], dtype=np.int8)
print newImgs.shape
print newYs.shape









    



(8679, 150, 150)
(8679, 1)



In [9]:

    
img = X_data[0]
img.shape









    Out[9]:





(150, 150)



In [10]:

    
ndx = 0
for i in range(X_data.shape[0]):
    img = X_data[i]
   
    for n in range(imgDataGenCount):
            imgX = models.imageDataGenTransform(img, Y_data[i])
            imgX = imgX.reshape(150, 150)
            
            #print imgX.shape
            newImgs[ndx] = imgX
            newYs[ndx] = Y_data[i]
            #misc.imsave("testX.png", imgX)
            ndx += 1    
    
    #break
    
print("Done", str(datetime.datetime.now()))









    



('Done', '2017-05-11 02:52:11.238588')



In [11]:

    
X_data2 = np.concatenate((X_data, newImgs))
Y_data2 = np.concatenate((Y_data, newYs))
print X_data2.shape
print Y_data2.shape









    



(11572, 150, 150)
(11572, 1)



In [12]:

    
performedTransforms = True
if performedTransforms:
    X_train = X_data2
    Y_train = Y_data2
else:
    X_train = X_data
    Y_train = Y_data

Training/Test Set Distribution

The following code segment splits the data into training and test data sets. Currently this is a standard 80/20 split for training and test respectively after performing a random shuffle using the unison_shuffled_copies help method.



In [13]:

    
print X_train.shape
print X_test.shape

print Y_train.shape
print Y_test.shape









    



(11572, 150, 150)
(726, 150, 150)
(11572, 1)
(726, 1)



In [14]:

    
import collections
def yDist(y):
    bcCounts = collections.defaultdict(int)
    for a in range(0, y.shape[0]):
        bcCounts[y[a][0]] += 1
    return bcCounts

print "Y_train Dist: " + str(yDist(Y_train))
print "Y_test Dist: " + str(yDist(Y_test))









    



Y_train Dist: defaultdict(<type 'int'>, {0: 6492, 1: 5080})
Y_test Dist: defaultdict(<type 'int'>, {0: 405, 1: 321})

Define and Load Trained Model



In [15]:

    
# Load the bc array for our count in the model definition
print categories
print len(categories)









    



{'abnormal': 1, 'normal': 0}
2



In [16]:

    
del sys.modules['bc_models']



In [17]:

    
# Construct the model using our help function
import bc_models as models
model = models.bc_model_v03(len(categories), verbose=True, 
                                        input_shape=(1,X_train.shape[1],X_train.shape[2]))









    



____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
====================================================================================================
convolution2d_1 (Convolution2D)    (None, 32, 143, 143)2080        convolution2d_input_1[0][0]      
____________________________________________________________________________________________________
activation_1 (Activation)          (None, 32, 143, 143)0           convolution2d_1[0][0]            
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)      (None, 32, 71, 71)  0           activation_1[0][0]               
____________________________________________________________________________________________________
dropout_1 (Dropout)                (None, 32, 71, 71)  0           maxpooling2d_1[0][0]             
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)    (None, 32, 67, 67)  25632       dropout_1[0][0]                  
____________________________________________________________________________________________________
activation_2 (Activation)          (None, 32, 67, 67)  0           convolution2d_2[0][0]            
____________________________________________________________________________________________________
maxpooling2d_2 (MaxPooling2D)      (None, 32, 33, 33)  0           activation_2[0][0]               
____________________________________________________________________________________________________
dropout_2 (Dropout)                (None, 32, 33, 33)  0           maxpooling2d_2[0][0]             
____________________________________________________________________________________________________
convolution2d_3 (Convolution2D)    (None, 64, 31, 31)  18496       dropout_2[0][0]                  
____________________________________________________________________________________________________
activation_3 (Activation)          (None, 64, 31, 31)  0           convolution2d_3[0][0]            
____________________________________________________________________________________________________
maxpooling2d_3 (MaxPooling2D)      (None, 64, 15, 15)  0           activation_3[0][0]               
____________________________________________________________________________________________________
convolution2d_4 (Convolution2D)    (None, 64, 14, 14)  16448       maxpooling2d_3[0][0]             
____________________________________________________________________________________________________
activation_4 (Activation)          (None, 64, 14, 14)  0           convolution2d_4[0][0]            
____________________________________________________________________________________________________
maxpooling2d_4 (MaxPooling2D)      (None, 64, 7, 7)    0           activation_4[0][0]               
____________________________________________________________________________________________________
flatten_1 (Flatten)                (None, 3136)        0           maxpooling2d_4[0][0]             
____________________________________________________________________________________________________
dense_1 (Dense)                    (None, 64)          200768      flatten_1[0][0]                  
____________________________________________________________________________________________________
activation_5 (Activation)          (None, 64)          0           dense_1[0][0]                    
____________________________________________________________________________________________________
dense_2 (Dense)                    (None, 2)           130         activation_5[0][0]               
____________________________________________________________________________________________________
activation_6 (Activation)          (None, 2)           0           dense_2[0][0]                    
====================================================================================================
Total params: 263554
____________________________________________________________________________________________________
None
binary_crossentropy

Training the Model

The following code segment trains the model using the run_network helper function.



In [18]:

    
loadWeights = False
weightsFileName = "dwdii-bc-v03-150-normVsAbnorm-13970-20170510.hdf5"
if loadWeights:
    model.load_weights('weights/' + weightsFileName)



In [19]:

    
# Reshape to the appropriate shape for the CNN input
testX = X_test.reshape(X_test.shape[0], 1, X_test.shape[1],X_test.shape[2])
trainX = X_train.reshape(X_train.shape[0], 1, X_train.shape[1],X_train.shape[2])



In [26]:

    
print "Training start: " + str(datetime.datetime.now())
m, losses, acc = models.run_network([trainX, testX, Y_train, Y_test], model, batch=50, epochs=20, verbosity=1)









    



Training start: 2017-05-13 00:25:39.618429
(11572, 2)
(726, 2)
Training model...
Train on 11572 samples, validate on 726 samples
Epoch 1/20
11572/11572 [==============================] - 3606s - loss: 0.1004 - acc: 0.9657 - val_loss: 2.0031 - val_acc: 0.6309
Epoch 2/20
11572/11572 [==============================] - 3089s - loss: 0.0900 - acc: 0.9679 - val_loss: 2.2913 - val_acc: 0.6226
Epoch 3/20
11572/11572 [==============================] - 2989s - loss: 0.0908 - acc: 0.9700 - val_loss: 1.7025 - val_acc: 0.6322
Epoch 4/20
11572/11572 [==============================] - 3079s - loss: 0.0934 - acc: 0.9680 - val_loss: 1.9806 - val_acc: 0.6350
Epoch 5/20
11572/11572 [==============================] - 2984s - loss: 0.0973 - acc: 0.9689 - val_loss: 2.1039 - val_acc: 0.6295
Epoch 6/20
11572/11572 [==============================] - 2963s - loss: 0.0930 - acc: 0.9699 - val_loss: 2.4273 - val_acc: 0.6267
Epoch 7/20
11572/11572 [==============================] - 2978s - loss: 0.0895 - acc: 0.9703 - val_loss: 2.2059 - val_acc: 0.6336
Epoch 8/20
11572/11572 [==============================] - 3008s - loss: 0.0939 - acc: 0.9693 - val_loss: 2.3820 - val_acc: 0.6212
Epoch 9/20
11572/11572 [==============================] - 2973s - loss: 0.0948 - acc: 0.9696 - val_loss: 2.3852 - val_acc: 0.6074
Epoch 10/20
11572/11572 [==============================] - 3110s - loss: 0.0893 - acc: 0.9707 - val_loss: 2.0672 - val_acc: 0.6405
Epoch 11/20
11572/11572 [==============================] - 2959s - loss: 0.0883 - acc: 0.9712 - val_loss: 1.9734 - val_acc: 0.6309
Epoch 12/20
11572/11572 [==============================] - 2985s - loss: 0.0841 - acc: 0.9736 - val_loss: 2.0445 - val_acc: 0.6309
Epoch 13/20
11572/11572 [==============================] - 3267s - loss: 0.0754 - acc: 0.9761 - val_loss: 2.4665 - val_acc: 0.6295
Epoch 14/20
11572/11572 [==============================] - 3811s - loss: 0.0906 - acc: 0.9714 - val_loss: 2.4859 - val_acc: 0.6336
Epoch 15/20
11572/11572 [==============================] - 3659s - loss: 0.0947 - acc: 0.9733 - val_loss: 2.3889 - val_acc: 0.6185
Epoch 16/20
11572/11572 [==============================] - 4518s - loss: 0.0920 - acc: 0.9724 - val_loss: 2.6343 - val_acc: 0.6433
Epoch 17/20
11572/11572 [==============================] - 4513s - loss: 0.0848 - acc: 0.9759 - val_loss: 2.2053 - val_acc: 0.6322
Epoch 18/20
11572/11572 [==============================] - 3863s - loss: 0.0808 - acc: 0.9752 - val_loss: 1.9753 - val_acc: 0.6253
Epoch 19/20
11572/11572 [==============================] - 3653s - loss: 0.0826 - acc: 0.9761 - val_loss: 2.7205 - val_acc: 0.6240
Epoch 20/20
11572/11572 [==============================] - 3728s - loss: 0.0767 - acc: 0.9766 - val_loss: 2.5044 - val_acc: 0.6074
Training duration : 67747.0615859
Network's test score [loss, accuracy]: [2.5043631407840192, 0.6074380163647255]
CNN Error: 39.26%



In [21]:

    
models.plot_losses(losses, acc)



In [22]:

    
fileLossesPng = '../../figures/plot_losses-' + weightsFileName + '.png'
plt.savefig(fileLossesPng)



In [45]:

    
model.save_weights('weights/' + weightsFileName, overwrite=True)

Experiment Results

Raw DDSM Images

Initial results based on "normal" being masked as "benign":

bc_model_v0 (150x150, 800/200): 182s - loss: 0.0560 - acc: 0.9813 - val_loss: 1.9918 - val_acc: 0.6800
bc_model_v0 (150x150, 2000/500): 473s - loss: 0.0288 - acc: 0.9925 - val_loss: 1.4040 - val_acc: 0.7260
- somewhat balanced, Y_train Dist {0: 1223, 1: 777}, Y_test Dist: {0: 321, 1: 179}

Revised with "normal", "benign" and "malignant" labeled seperately:

bc_model_v0 (150x150, 1311/328): 298s - loss: 0.0411 - acc: 0.9786 - val_loss: 1.3713 - val_acc: 0.6616

After creating fixed "train", "test" and "validate" data sets, using "train" and "test" as well as including the DDSM Benign cases:

bc_model_v0 (150x150, 1554/363, 03.27.2017): 264s - loss: 0.0512 - acc: 0.9730 - val_loss: 1.3120 - val_acc: 0.6116
bc_model_v0 (150x150, 2155/539, 04.02.2017): 362s - loss: 0.0600 - acc: 0.9763 - val_loss: 1.5315 - val_acc: 0.4805

bc_model_v01 - categorical_crossentropy

bc_model_v01 (150x150, 2155/539, 04.03.2017): 361s - loss: 0.0935 - acc: 0.9800 - val_loss: 2.7872 - val_acc: 0.5065
bc_model_v01 (150x150, 2132/536, 04.05.2017): 369s - loss: 0.0718 - acc: 0.9794 - val_loss: 2.5604 - val_acc: 0.5243

Thresholded Images

Using the "Data_Thresholded" images

bc_model_v0 (150x150, Thresholded, 661/171, 03.28.2017): 124s - loss: 0.0529 - acc: 0.9743 - val_loss: 1.4331 - val_acc: 0.4971

Simulated Images

Using the "simulated_images" images

bc_model_v01 (150x150, 7776/536, 04.24.2017): 1250s - loss: 0.5543 - acc: 0.7885 - val_loss: 7.1153 - val_acc: 0.4123

Using the "simulated_images_new" images

bc_model_v01 (150x150, 7776/536, 04.30.2017): 1242s - loss: 10.1318 - acc: 0.3714 - val_loss: 6.7477 - val_acc: 0.3340

Normal Vs Abnormal

Raw

bc_model_v01 (150x150, 2893/536, 04.25.2017): 496s - loss: 0.0522 - acc: 0.9865 - val_loss: 2.2328 - val_acc: 0.6309
bc_model_v03 (150x150, 11572/726, 05.10.2017, 30 epochs): 2938s - loss: 0.1378 - acc: 0.9468 - val_loss: 1.6165 - val_acc: 0.6061
- 40 epochs (+10): 2987s - loss: 0.1092 - acc: 0.9615 - val_loss: 2.0735 - val_acc: 0.6157
- 60 epochs (+20): 3728s - loss: 0.0767 - acc: 0.9766 - val_loss: 2.5044 - val_acc: 0.6074

Data Thresholded

bc_model_v01 (150x150, 924/231,04.26.2017): 154s - loss: 0.0365 - acc: 0.9892 - val_loss: 1.9738 - val_acc: 0.5628
bc_model_v01 (150x150, 2737/694,04.29.2017): 463s - loss: 0.0390 - acc: 0.9898 - val_loss: 2.4042 - val_acc: 0.6326
bc_model_v01 ((150x150, 5474/694,04.30.2017)): 1317s - loss: 0.0552 - acc: 0.9845 - val_loss: 2.8678 - val_acc: 0.6138

Benign Vs Malignant

Raw

bc_model_v01 (150x150, 13970/321, 05.05.2017, 50 epochs): 4376s - loss: 0.0163 - acc: 0.9961 - val_loss: 3.1204 - val_acc: 0.6480
bc_model_v02 (1 dropout, 13970/321, 05.06.2017, 30 epochs): 4678s - loss: 0.0222 - acc: 0.9941 - val_loss: 2.9720 - val_acc: 0.6978
bc_model_v03 (2 dropout, 13970/321, 05.07.2017, 30 epochs): 4771s - loss: 0.0419 - acc: 0.9896 - val_loss: 2.4619 - val_acc: 0.6667
- 35 Epochs (+5): 5833s - loss: 0.0355 - acc: 0.9911 - val_loss: 2.6903 - val_acc: 0.6667
- 45 Epochs (+10): 4686s - loss: 0.0355 - acc: 0.9908 - val_loss: 2.7548 - val_acc: 0.7165



In [ ]:

    
resultsValAcc = {}
#resultsValAcc["1"] = 0.6800
#resultsValAcc["2"] = 0.7260
#resultsValAcc["3"] = 0.6616
#resultsValAcc["03-27-2017"] = 0.6116
#resultsValAcc["04-02-2017"] = 0.4805
#resultsValAcc["04-03-2017"] = 0.5065
#resultsValAcc["04-05-2017"] = 0.5243
resultsValAcc[924] = 0.5628
resultsValAcc[2737] = 0.6326
resultsValAcc[5474] = 0.6138
import dwdii_test as dwdii
#cmp = matplotlib.colors.Colormap("Blues")
dwdii.barChart(resultsValAcc, filename="../../figures/shallowCnn_thresholded_2class_results_valacc.png", title="Shallow CNN - DDSM Data Thresholded 2 Class Test Accuracy", yAxisLabel="Accuracy %")

Analyze Predictions with Test Set



In [27]:

    
predictOutput = model.predict(testX, batch_size=32, verbose=1)









    



726/726 [==============================] - 84s



In [28]:

    
predClass = np.array(predictOutput[0]).argmax()
numBC = bc.reverseDict(categories)
numBC[predClass]









    Out[28]:





'normal'



In [29]:

    
numBC[Y_test[0][0]]









    Out[29]:





'abnormal'



In [30]:

    
predClasses = []
for i in range(len(predictOutput)):

    arPred = np.array(predictOutput[i])
    predictionProb = arPred.max()
    predictionNdx = arPred.argmax()
    predClassName = numBC[predictionNdx]
    predClasses.append(predictionNdx)

    #print "{0}: {1} ({2})".format(i, predClassName, predictionProb)

Confusion Matrix



In [31]:

    
# Use sklearn's helper method to generate the confusion matrix
cnf_matrix = skm.confusion_matrix(Y_test, predClasses)
cnf_matrix









    



/usr/lib/python2.7/dist-packages/scipy/sparse/coo.py:200: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.
  if np.rank(self.data) != 1 or np.rank(self.row) != 1 or np.rank(self.col) != 1:






    Out[31]:





array([[289, 116],
       [169, 152]])



In [37]:

    
class_names = numBC.values()
print class_names[1:3]
print class_names
np.set_printoptions(precision=2)









    



['abnormal']
['normal', 'abnormal']



In [38]:

    
# Plot non-normalized confusion matrix
fileCfMatrix = '../../figures/confusion_matrix-' + weightsFileName + '.png'
plt.figure()
bc.plot_confusion_matrix(cnf_matrix, classes=class_names,
                      title='Confusion matrix, without normalization, \n' + weightsFileName)
plt.savefig(fileCfMatrix)









    



Confusion matrix, without normalization
[[289 116]
 [169 152]]



In [39]:

    
# Load the image we just saved
from IPython.display import Image
Image(filename=fileCfMatrix)









    Out[39]:



In [43]:

    
# Plot normalized confusion matrix
fileCfMatrixNorm = '../../figures/confusion_matrix_norm-' + weightsFileName + '.png'
plt.figure()
bc.plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized confusion matrix \n' + weightsFileName)
plt.savefig(fileCfMatrixNorm)









    



Normalized confusion matrix
[[ 0.71  0.29]
 [ 0.53  0.47]]



In [44]:

    
# Load the image we just saved
from IPython.display import Image
Image(filename=fileCfMatrixNorm)









    Out[44]:



In [ ]: