In [1]:
# Base de datos y kernel de lectura y visualización de la base de datos obtenidos de:
#
# https://www.kaggle.com/kmader/lungnodemalignancy
import os
import numpy as np
#from tqdm import tqdm
import matplotlib.pyplot as plt
from skimage.util.montage import montage2d
from skimage.color import label2rgb
import h5py
%matplotlib inline
In [3]:
data_path = os.path.join('data')
with h5py.File(os.path.join(data_path, 'all_patches.hdf5'), 'r') as luna_h5:
all_slices = luna_h5['ct_slices'].value
all_classes = luna_h5['slice_class'].value
print('shape of all_slices:', all_slices.shape)
print('shape of classes: ', all_classes.shape)
In [14]:
(all_classes==1).sum()
#(all_classes==0).sum()
Out[14]:
In [4]:
def draw_borders(ax, ntiles, tile_width, tile_height, color='r'):
aux1 = int(np.ceil(np.sqrt(ntiles)))
npixels_y = tile_height*aux1
for i in range(aux1-1):
aux2 = (i+1)*tile_width - 0.5
ax.plot([aux2, aux2], [0, npixels_y - 1], color)
npixels_x = tile_width*aux1
for i in range(aux1-1):
aux2 = (i+1)*tile_height - 0.5
ax.plot([0, npixels_x - 1], [aux2, aux2], color)
from skimage.util.montage import montage2d
size = 9
fig, (ax1, ax2) = plt.subplots(1,2,figsize = (16, 8))
plt_args = dict(cmap = 'bone', vmin = -600, vmax = 300)
#plt_args = dict(cmap = 'bwr', vmin = -600, vmax = 300)
ax1.imshow(montage2d(all_slices[np.random.choice(np.where(all_classes>0.5)[0],size=size)]), **plt_args)
ax1.set_title('some malignant tiles (random sample)')
draw_borders(ax1, size, all_slices.shape[1], all_slices.shape[2], 'r')
ax2.imshow(montage2d(all_slices[np.random.choice(np.where(all_classes<0.5)[0],size=size)]), **plt_args)
ax2.set_title('some benign tiles (random sample)')
draw_borders(ax2, size, all_slices.shape[1], all_slices.shape[2], 'r')
plt.show()
In [5]:
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
X_vec = (np.expand_dims(all_slices,-1) - np.mean(all_slices))/np.std(all_slices)
y_vec = to_categorical(all_classes)
X_train, X_test, y_train, y_test = train_test_split(X_vec, y_vec,
train_size = 0.75,
random_state = 1,
stratify = all_classes)
In [6]:
X_tr = np.zeros((np.shape(X_train)[0], np.shape(X_train)[1]*np.shape(X_train)[2]))
for i in range(np.shape(X_train)[0]):
X_tr[i] = X_train[i].flatten()
X_te = np.zeros((np.shape(X_test)[0], np.shape(X_test)[1]*np.shape(X_test)[2]))
for i in range(np.shape(X_test)[0]):
X_te[i] = X_test[i].flatten()
print("X_train shape:", np.shape(X_train))
print("X_tr shape: ", np.shape(X_tr))
print("X_test shape: ", np.shape(X_test))
print("X_te shape: ", np.shape(X_te))
In [7]:
from sklearn.linear_model import LogisticRegression
model2 = LogisticRegression()
model2.fit(X_tr, np.argmax(y_train,1))
Out[7]:
In [8]:
print('Train accuracy:', model2.score(X_tr, np.argmax(y_train,1)))
print('Test accuracy :', model2.score(X_te, np.argmax(y_test,1)))
In [9]:
from sklearn.metrics import classification_report, roc_curve, auc
print('')
print(classification_report(np.argmax(y_test,1),
model2.predict(X_te)))
fpr, tpr, thresholds = roc_curve(np.argmax(y_test, 1), model2.predict_proba(X_te)[:,1])
fig, ax1 = plt.subplots(1,1)
ax1.plot(fpr, tpr, 'r-.', label = 'Simple model (%2.2f)' % auc(fpr, tpr))
ax1.set_xlabel('False Positive Rate')
ax1.set_ylabel('True Positive Rate')
ax1.plot(fpr, fpr, 'b-', label = 'Random Guess')
ax1.legend()
plt.show()
In [10]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras import backend as K
from keras.losses import binary_crossentropy
from keras import optimizers
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
In [11]:
print("X_train shape:", np.shape(X_train))
print("X_test shape: ", np.shape(X_test))
print("y_train shape:", np.shape(y_train))
print("y_test shape: ", np.shape(y_test))
In [27]:
batch_size = 128
epochs = 12
input_shape = (64,64,1)
num_classes = 2
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.25))
model.add(Flatten())
#model.add(Dense(128, activation='relu'))
#model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=binary_crossentropy,
optimizer=optimizers.Adadelta(),
metrics=['accuracy'])
model.summary()
#SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))
In [15]:
#LOAD_MODEL = True
LOAD_MODEL = False
if not LOAD_MODEL:
model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(X_test, y_test))
else:
model = load_model('model_v1.h5')
In [ ]:
#model.save('model.h5')
In [16]:
score_tr = model.evaluate(X_train, y_train, verbose=0)
print('Train loss :', score_tr[0])
print('Train accuracy:', score_tr[1])
score_te = model.evaluate(X_test, y_test, verbose=0)
print('Test loss :', score_te[0])
print('Test accuracy :', score_te[1])
In [17]:
y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba,1)
print('')
print(classification_report(np.argmax(y_test,1), y_pred))
In [18]:
fpr, tpr, thresholds = roc_curve(np.argmax(y_test, 1), y_pred_proba[:,1])
fig, ax1 = plt.subplots(1,1)
ax1.plot(fpr, tpr, 'r-.', label = 'CNN (%2.2f)' % auc(fpr, tpr))
ax1.set_xlabel('False Positive Rate')
ax1.set_ylabel('True Positive Rate')
ax1.plot(fpr, fpr, 'b-', label = 'Random Guess')
ax1.legend()
plt.show()
In [20]:
model.summary()
#SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))
In [21]:
weights = model.get_weights()
print(np.shape(weights))
for i in range(len(weights)):
print('shape of weights[%d]: ' % i, np.shape(weights[i]))
In [22]:
weights[0][:,:,0,0]
Out[22]:
In [23]:
# kernels de la primera capa
nfilters = weights[0].shape[3]
lado = int(np.ceil(np.sqrt(nfilters)))
plt.subplots(lado,lado,figsize = (12, 15))
ma = abs(weights[0]).max()
for i in range(nfilters):
kernel = weights[0][:,:,0,i]
plt.subplot(lado,lado,i+1)
plt.imshow(kernel, vmin=-ma, vmax=ma, cmap='bwr')
plt.title('kernel %d' % i)
In [ ]: