Loading Dataset


In [17]:
from glob import glob
from keras.utils import np_utils
from sklearn.datasets import load_files
import numpy as np


def load_dataset(path):
    data = load_files(path)
    bones_files = np.array(data['filenames'])
    bones_targets = np_utils.to_categorical(np.array(data['target']), 3)
    return bones_files, bones_targets


names = [item[20:-1] for item in sorted(glob("images/bones/train/*/"))]
train_files, train_targets = load_dataset('images/bones/train')
valid_files, valid_targets = load_dataset('images/bones/valid')
test_files, test_targets = load_dataset('images/bones/test')

print('There are %d total bone disease categories.' % len(names))
print('There are %s total bone images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training bone images.' % len(train_files))
print('There are %d validation bone images.' % len(valid_files))
print('There are %d test bone images.'% len(test_files))


There are 3 total bone disease categories.
There are 304 total bone images.

There are 256 training bone images.
There are 24 validation bone images.
There are 24 test bone images.

Loading Tensors


In [18]:
from keras.preprocessing import image
from tqdm import tqdm

def path_to_tensor(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

Pre-processing Images


In [19]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
test_tensors = paths_to_tensor(test_files).astype('float32')/255


100%|██████████| 256/256 [00:02<00:00, 100.94it/s]
100%|██████████| 24/24 [00:00<00:00, 129.79it/s]
100%|██████████| 24/24 [00:00<00:00, 155.28it/s]

Creating CNN Model


In [20]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

model = Sequential()

model.add(Conv2D(filters=16, kernel_size=2, padding='same', activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(3, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(3, activation='softmax'))

model.summary()

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_4 (Conv2D)            (None, 224, 224, 16)      208       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 112, 112, 16)      0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 112, 112, 32)      2080      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 56, 56, 32)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 56, 56, 64)        8256      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 28, 28, 64)        0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 28, 28, 64)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 50176)             0         
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 150531    
_________________________________________________________________
dropout_4 (Dropout)          (None, 3)                 0         
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 12        
=================================================================
Total params: 161,087.0
Trainable params: 161,087.0
Non-trainable params: 0.0
_________________________________________________________________

Training The Model


In [21]:
from keras.callbacks import ModelCheckpoint

epochs = 10

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5',
                               verbose=1, save_best_only=True)

model.fit(train_tensors, train_targets, validation_data=(valid_tensors, valid_targets),
          epochs=epochs, batch_size=20, callbacks=[checkpointer], verbose=1)


Train on 256 samples, validate on 24 samples
Epoch 1/10
240/256 [===========================>..] - ETA: 0s - loss: 1.1742 - acc: 0.3458Epoch 00000: val_loss improved from inf to 1.09861, saving model to saved_models/weights.best.from_scratch.hdf5
256/256 [==============================] - 10s - loss: 1.1695 - acc: 0.3320 - val_loss: 1.0986 - val_acc: 0.3333
Epoch 2/10
240/256 [===========================>..] - ETA: 0s - loss: 1.0988 - acc: 0.2958Epoch 00001: val_loss did not improve
256/256 [==============================] - 9s - loss: 1.0988 - acc: 0.2930 - val_loss: 1.0986 - val_acc: 0.3333
Epoch 3/10
240/256 [===========================>..] - ETA: 0s - loss: 1.0987 - acc: 0.3167Epoch 00002: val_loss did not improve
256/256 [==============================] - 9s - loss: 1.0987 - acc: 0.3203 - val_loss: 1.0986 - val_acc: 0.3333
Epoch 4/10
240/256 [===========================>..] - ETA: 0s - loss: 1.1103 - acc: 0.3250Epoch 00003: val_loss improved from 1.09861 to 1.07959, saving model to saved_models/weights.best.from_scratch.hdf5
256/256 [==============================] - 9s - loss: 1.1095 - acc: 0.3359 - val_loss: 1.0796 - val_acc: 0.4583
Epoch 5/10
240/256 [===========================>..] - ETA: 0s - loss: 1.0966 - acc: 0.3542Epoch 00004: val_loss improved from 1.07959 to 1.03732, saving model to saved_models/weights.best.from_scratch.hdf5
256/256 [==============================] - 9s - loss: 1.0930 - acc: 0.3555 - val_loss: 1.0373 - val_acc: 0.3750
Epoch 6/10
240/256 [===========================>..] - ETA: 0s - loss: 1.0579 - acc: 0.4167Epoch 00005: val_loss improved from 1.03732 to 1.03258, saving model to saved_models/weights.best.from_scratch.hdf5
256/256 [==============================] - 10s - loss: 1.0605 - acc: 0.4023 - val_loss: 1.0326 - val_acc: 0.4167
Epoch 7/10
240/256 [===========================>..] - ETA: 0s - loss: 1.0546 - acc: 0.3917Epoch 00006: val_loss did not improve
256/256 [==============================] - 10s - loss: 1.0512 - acc: 0.4141 - val_loss: 1.0503 - val_acc: 0.4167
Epoch 8/10
240/256 [===========================>..] - ETA: 0s - loss: 1.0522 - acc: 0.4375Epoch 00007: val_loss did not improve
256/256 [==============================] - 10s - loss: 1.0470 - acc: 0.4531 - val_loss: 1.0469 - val_acc: 0.4583
Epoch 9/10
240/256 [===========================>..] - ETA: 0s - loss: 0.9733 - acc: 0.4833Epoch 00008: val_loss did not improve
256/256 [==============================] - 10s - loss: 0.9814 - acc: 0.4648 - val_loss: 1.0815 - val_acc: 0.4583
Epoch 10/10
240/256 [===========================>..] - ETA: 0s - loss: 0.9856 - acc: 0.4583Epoch 00009: val_loss did not improve
256/256 [==============================] - 10s - loss: 0.9934 - acc: 0.4609 - val_loss: 1.0393 - val_acc: 0.4167
Out[21]:
<keras.callbacks.History at 0x7fb944b62b38>

Testing The Model


In [22]:
model.load_weights('saved_models/weights.best.from_scratch.hdf5')

bone_diseases_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]

test_accuracy = 100*np.sum(np.array(bone_diseases_predictions)==np.argmax(test_targets, axis=1))/len(bone_diseases_predictions)
print('Bones test accuracy: %.4f%%' % test_accuracy)


Bones test accuracy: 37.5000%

Prediction Machine


In [23]:
categories = {
    0: 'Fracture',
    1: 'Osteomyelitis',
    2: 'Osteochondroma'
}

def prediction_machine(img_path):
    tensor = path_to_tensor(img_path)

    prediction_array = model.predict(tensor)
    print(prediction_array)
    prediction = np.argmax(prediction_array)
    return categories[prediction]

Bone Disease Diagnose


In [24]:
import matplotlib.pyplot as plt
import cv2
%matplotlib inline


final_images = np.array(glob('images/bones/final/humerus/*'))

for img_path in final_images:
    p_img = cv2.imread(img_path)
    p_img_gray = cv2.cvtColor(p_img, cv2.COLOR_BGR2GRAY)
    plt.imshow(p_img)
    plt.show()
    print(img_path)
    prediction = prediction_machine(img_path)
    print('Predicted Disease: {0}'.format(prediction))


images/bones/final/humerus/humerus_00002.jpeg
[[ 0.32990766  0.33390421  0.33618814]]
Predicted Disease: Osteochondroma
images/bones/final/humerus/humerus_00010.jpeg
[[ 0.32990766  0.33390421  0.33618814]]
Predicted Disease: Osteochondroma
images/bones/final/humerus/humerus_00001.jpeg
[[ 0.32990766  0.33390421  0.33618814]]
Predicted Disease: Osteochondroma
images/bones/final/humerus/humerus_00004.jpg
[[  1.00000000e+00   4.92268841e-34   0.00000000e+00]]
Predicted Disease: Fracture
images/bones/final/humerus/humerus_00005.jpeg
[[ 0.32990766  0.33390421  0.33618814]]
Predicted Disease: Osteochondroma
images/bones/final/humerus/humerus_00009.jpeg
[[ 0.32990766  0.33390421  0.33618814]]
Predicted Disease: Osteochondroma
images/bones/final/humerus/humerus_00008.jpeg
[[ 0.32990766  0.33390421  0.33618814]]
Predicted Disease: Osteochondroma
images/bones/final/humerus/humerus_00003.jpeg
[[ 1.  0.  0.]]
Predicted Disease: Fracture
images/bones/final/humerus/humerus_00007.jpeg
[[ 0.32990766  0.33390421  0.33618814]]
Predicted Disease: Osteochondroma
images/bones/final/humerus/humerus_00006.jpeg
[[ 0.32990766  0.33390421  0.33618814]]
Predicted Disease: Osteochondroma

Using Transfer Learning

Loading Bottleneck Features


In [54]:
# bottleneck_features = np.load('bottleneck_features/DogVGG16Data.npz')
# bf_train = bottleneck_features['train']
# bf_valid = bottleneck_features['valid']
# bf_test = bottleneck_features['test']
from keras.applications.resnet50 import ResNet50
from keras.layers import Input

base_model = ResNet50(include_top=False, weights='imagenet', input_tensor=Input(shape=(224, 224, 3)))

transfered_model = base_model.output
transfered_model.add(GlobalAveragePooling2D())
transfered_model.add(Dense(3, activation='softmax'))
transfered_model.load_weights('saved_models/weights.best.from_scratch.hdf5')
bf_model.summary()

transfered_model = Model(input=base_model.input, output=transfered_model(base_model.output))

transfered_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
global_average_pooling2d_10  (None, 512)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 3)                 1539      
=================================================================
Total params: 1,539.0
Trainable params: 1,539.0
Non-trainable params: 0.0
_________________________________________________________________
/home/andres/anaconda3/envs/ai23d/lib/python3.6/site-packages/ipykernel_launcher.py:16: UserWarning: Update your `Model` call to the Keras 2 API: `Model(inputs=Tensor("in..., outputs=Tensor("se...)`
  app.launch_new_instance()

Adding Extra Layers to Transfered Model


In [55]:
transfered_model = Model(input=base_model.input, output=transfered_model(base_model.output))

transfered_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-55-07ab78ff6223> in <module>()
----> 1 transfered_model = Model(input=base_model.input, output=transfered_model(base_model.output))
      2 
      3 transfered_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
      4 

~/anaconda3/envs/ai23d/lib/python3.6/site-packages/keras/engine/topology.py in __call__(self, inputs, **kwargs)
    552 
    553             # Actually call the layer, collecting output(s), mask(s), and shape(s).
--> 554             output = self.call(inputs, **kwargs)
    555             output_mask = self.compute_mask(inputs, previous_mask)
    556 

~/anaconda3/envs/ai23d/lib/python3.6/site-packages/keras/engine/topology.py in call(self, inputs, mask)
   1988             return self._output_tensor_cache[cache_key]
   1989         else:
-> 1990             output_tensors, _, _ = self.run_internal_graph(inputs, masks)
   1991             return output_tensors
   1992 

~/anaconda3/envs/ai23d/lib/python3.6/site-packages/keras/engine/topology.py in run_internal_graph(self, inputs, masks)
   2138                                 if 'mask' not in kwargs:
   2139                                     kwargs['mask'] = computed_mask
-> 2140                             output_tensors = _to_list(layer.call(computed_tensor, **kwargs))
   2141                             output_masks = _to_list(layer.compute_mask(computed_tensor,
   2142                                                                        computed_mask))

~/anaconda3/envs/ai23d/lib/python3.6/site-packages/keras/layers/convolutional.py in call(self, inputs)
    162                 padding=self.padding,
    163                 data_format=self.data_format,
--> 164                 dilation_rate=self.dilation_rate)
    165         if self.rank == 3:
    166             outputs = K.conv3d(

~/anaconda3/envs/ai23d/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in conv2d(x, kernel, strides, padding, data_format, dilation_rate)
   2860         strides=strides,
   2861         padding=padding,
-> 2862         data_format='NHWC')
   2863     return _postprocess_conv2d_output(x, data_format)
   2864 

~/anaconda3/envs/ai23d/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py in convolution(input, filter, padding, strides, dilation_rate, name, data_format)
    617           "number of input channels does not match corresponding dimension of filter, "
    618           "{} != {}".format(input_channels_dim, filter.get_shape()[
--> 619               num_spatial_dims]))
    620 
    621     strides, dilation_rate = _get_strides_and_dilation_rate(

ValueError: number of input channels does not match corresponding dimension of filter, 2048 != 3

Training Transfered Model


In [44]:
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.VGG16.hdf5',
                               verbose=1, save_best_only=True)

bf_model.fit(bf_train, train_targets, validation_data=(bf_valid, valid_targets),
         epochs=20, batch_size=20, callbacks=[checkpointer], verbose=1)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-44-7ac613ba8149> in <module>()
      3 
      4 bf_model.fit(bf_train, train_targets, validation_data=(bf_valid, valid_targets),
----> 5          epochs=20, batch_size=20, callbacks=[checkpointer], verbose=1)

~/anaconda3/envs/ai23d/lib/python3.6/site-packages/keras/models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
    843                               class_weight=class_weight,
    844                               sample_weight=sample_weight,
--> 845                               initial_epoch=initial_epoch)
    846 
    847     def evaluate(self, x, y, batch_size=32, verbose=1,

~/anaconda3/envs/ai23d/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
   1403             class_weight=class_weight,
   1404             check_batch_axis=False,
-> 1405             batch_size=batch_size)
   1406         # prepare validation data
   1407         if validation_data:

~/anaconda3/envs/ai23d/lib/python3.6/site-packages/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_batch_axis, batch_size)
   1305                           for (ref, sw, cw, mode)
   1306                           in zip(y, sample_weights, class_weights, self._feed_sample_weight_modes)]
-> 1307         _check_array_lengths(x, y, sample_weights)
   1308         _check_loss_and_target_compatibility(y,
   1309                                              self._feed_loss_fns,

~/anaconda3/envs/ai23d/lib/python3.6/site-packages/keras/engine/training.py in _check_array_lengths(inputs, targets, weights)
    227                          'the same number of samples as target arrays. '
    228                          'Found ' + str(list(set_x)[0]) + ' input samples '
--> 229                          'and ' + str(list(set_y)[0]) + ' target samples.')
    230     if set_y and set_w and list(set_y)[0] != list(set_w)[0]:
    231         raise ValueError('Sample_weight arrays should have '

ValueError: Input arrays should have the same number of samples as target arrays. Found 6680 input samples and 256 target samples.

In [ ]: