In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import cv2
from tqdm import tqdm
from IPython.display import display, Markdown

%pylab inline


Populating the interactive namespace from numpy and matplotlib

Patient's data preprocessing and preparation


In [2]:
data_dir = os.path.join(os.path.expanduser('~'), 'Datasets', 'New_Exp')
cancer_dir = os.path.join(data_dir, 'cancer_png')
control_dir = os.path.join(data_dir, 'control_png')
fibro_dir = os.path.join(data_dir, 'fibroadenomas_png')

data_proc = os.path.join(os.getcwd(), 'all_data')
cancer_proc = os.path.join(data_proc, 'cancer')
control_proc = os.path.join(data_proc, 'control')
fibro_proc = os.path.join(data_proc, 'fibroadenomas')

if not os.path.exists(data_proc):
    os.mkdir(data_proc)
if not os.path.exists(cancer_proc):
    os.mkdir(cancer_proc)
if not os.path.exists(control_proc):
    os.mkdir(control_proc)
if not os.path.exists(fibro_proc):
    os.mkdir(fibro_proc)

In [3]:
import shutil

def process_image(img0: np.array) -> np.array:
    for j in range(img0.shape[1]):
        img0[img0.shape[0] - 1][j] = img0[img0.shape[0] - 3][j]
    img1 = cv2.GaussianBlur(img0, (3, 3), 0)
    return img1

def process_imdir(imdir: str, destdir: str, descr: str):
    if descr == None:
        descr = imdir
    for imname in tqdm(os.listdir(imdir), ascii=True, desc=descr):
        img0 = cv2.imread(os.path.join(imdir, imname))
        img1 = process_image(img0)
        cv2.imwrite(os.path.join(destdir, imname), img1)

def classdir_prepare(class_pathlist, class_destdir, class_name='Class None'):
    if os.path.exists(class_destdir):
        shutil.rmtree(class_destdir)
    os.makedirs(class_destdir)
    for fname in tqdm(class_pathlist, ascii=True, desc=class_name):
        if not os.path.exists(fname):
            continue
        shutil.copyfile(fname, os.path.join(class_destdir, os.path.basename(fname)))

In [140]:
process_imdir(cancer_dir, cancer_proc, 'cancer ')
process_imdir(fibro_dir, fibro_proc, 'fibro  ')


cancer : 100%|##########| 3403/3403 [00:12<00:00, 262.09it/s]
fibro  : 100%|##########| 1741/1741 [00:06<00:00, 261.15it/s]

Learning with ConvNets using Keras framework


In [36]:
from keras import backend as KBackend

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import Conv2D, MaxPooling2D, Flatten
from keras.preprocessing.image import ImageDataGenerator

from keras.preprocessing.image import array_to_img, img_to_array, load_img
from keras.models import model_from_json

from sklearn.model_selection import StratifiedKFold

print('Keras backend:', KBackend.backend())
print('Keras image format:', KBackend.image_data_format(), '\n')


Keras backend: tensorflow
Keras image format: channels_last 


In [26]:
random_seed = 27297

train_data_generator = ImageDataGenerator()
test_data_generator = ImageDataGenerator()

kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=random_seed)

cancer_list = np.array([os.path.join(cancer_proc, x) for x in os.listdir(cancer_proc)])
fibro_list = np.array([os.path.join(fibro_proc, x) for x in os.listdir(fibro_proc)])

X = np.concatenate((cancer_list, fibro_list), axis=0)
y = np.concatenate((np.zeros(cancer_list.shape), np.ones(fibro_list.shape)), axis=0)

In [27]:
def cnn_generator() -> Sequential:
    return None

model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(5, 5), strides=2, input_shape=(160, 160, 3)))
model.add(Activation('relu'))

model.add(Conv2D(filters=32, kernel_size=(3, 3)))
model.add(Activation('relu'))

model.add(Conv2D(filters=32, kernel_size=(3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [37]:
kfold_iteration = 0
for train_index, test_index in kfold.split(X, y):
    kfold_iteration += 1
    if kfold_iteration >= 2:
        break
    display(Markdown('**KFold iteration #{}**'.format(kfold_iteration)))

    cancer_trainlist = (X[train_index])[np.nonzero(1 - y[train_index])]
    fibro_trainlist = (X[train_index])[np.nonzero(y[train_index])]
    cancer_traindest = os.path.join(os.getcwd(), 'train', 'cancer')
    fibro_traindest = os.path.join(os.getcwd(), 'train', 'fibro')        
    #classdir_prepare(cancer_trainlist, cancer_traindest, '[TRAIN] generating cancer')
    #classdir_prepare(fibro_trainlist, fibro_traindest, '[TRAIN] generating fibro ')

    cancer_testlist = (X[test_index])[np.nonzero(1 - y[test_index])]
    fibro_testlist = (X[test_index])[np.nonzero(y[test_index])]
    cancer_testdest = os.path.join(os.getcwd(), 'test', 'cancer')
    fibro_testdest = os.path.join(os.getcwd(), 'test', 'fibro')
    #classdir_prepare(cancer_testlist, cancer_testdest, '[TEST ] generating cancer')
    #classdir_prepare(fibro_testlist, fibro_testdest, '[TEST ] generating fibro ')
    
    train_generator = train_data_generator.flow_from_directory(
        os.path.join(os.getcwd(), 'train'), target_size=(160, 160), class_mode='binary')
    test_generator = train_data_generator.flow_from_directory(
        os.path.join(os.getcwd(), 'test'), target_size=(160, 160), class_mode='binary')
    
    i = 0
    for img in train_generator:
        i += 1
        if i >= 2:
            break
        imgarr = img_to_array(img)
        plt.imshow(imgarr)


KFold iteration #1

Found 4628 images belonging to 2 classes.
Found 516 images belonging to 2 classes.
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-37-b2a166422b5e> in <module>()
     26 
     27     i = 0
---> 28     for img in train_generator:
     29         i += 1
     30         if i >= 2:

/home/falcon/anaconda3/envs/cancer/lib/python3.5/site-packages/keras/preprocessing/image.py in __next__(self, *args, **kwargs)
    725 
    726     def __next__(self, *args, **kwargs):
--> 727         return self.next(*args, **kwargs)
    728 
    729 

/home/falcon/anaconda3/envs/cancer/lib/python3.5/site-packages/keras/preprocessing/image.py in next(self)
    958             img = load_img(os.path.join(self.directory, fname),
    959                            grayscale=grayscale,
--> 960                            target_size=self.target_size)
    961             x = img_to_array(img, data_format=self.data_format)
    962             x = self.image_data_generator.random_transform(x)

/home/falcon/anaconda3/envs/cancer/lib/python3.5/site-packages/keras/preprocessing/image.py in load_img(path, grayscale, target_size)
    316     """
    317     if pil_image is None:
--> 318         raise ImportError('Could not import PIL.Image. '
    319                           'The use of `array_to_img` requires PIL.')
    320     img = pil_image.open(path)

ImportError: Could not import PIL.Image. The use of `array_to_img` requires PIL.

In [ ]:


In [ ]: