In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import cv2
from tqdm import tqdm
from IPython.display import display, Markdown

%pylab inline


Populating the interactive namespace from numpy and matplotlib

Patient's data preprocessing and preparation


In [2]:
data_dir = os.path.join(os.path.expanduser('~'), 'Datasets', 'New_Exp')
cancer_dir = os.path.join(data_dir, 'cancer_png')
control_dir = os.path.join(data_dir, 'control_png')
fibro_dir = os.path.join(data_dir, 'fibroadenomas_png')

data_proc = os.path.join(os.getcwd(), 'all_data')
cancer_proc = os.path.join(data_proc, 'cancer')
control_proc = os.path.join(data_proc, 'control')
fibro_proc = os.path.join(data_proc, 'fibroadenomas')

if not os.path.exists(data_proc):
    os.mkdir(data_proc)
if not os.path.exists(cancer_proc):
    os.mkdir(cancer_proc)
if not os.path.exists(control_proc):
    os.mkdir(control_proc)
if not os.path.exists(fibro_proc):
    os.mkdir(fibro_proc)

In [3]:
import shutil

def process_image(img0: np.array) -> np.array:
    for j in range(img0.shape[1]):
        img0[img0.shape[0] - 1][j] = img0[img0.shape[0] - 2][j]
    img1 = cv2.GaussianBlur(img0, (3, 3), 0)
    return img1

def process_imdir(imdir: str, destdir: str, descr: str):
    if descr == None:
        descr = imdir
    for imname in tqdm(os.listdir(imdir), ascii=True, desc=descr):
        img0 = cv2.imread(os.path.join(imdir, imname))
        img1 = process_image(img0)
        cv2.imwrite(os.path.join(destdir, imname), img1)

def classdir_prepare(class_pathlist, class_destdir, class_name='Class None'):
    if os.path.exists(class_destdir):
        shutil.rmtree(class_destdir)
    os.makedirs(class_destdir)
    for fname in tqdm(class_pathlist, ascii=True, desc=class_name):
        if not os.path.exists(fname):
            continue
        shutil.copyfile(fname, os.path.join(class_destdir, os.path.basename(fname)))

In [4]:
process_imdir(cancer_dir, cancer_proc, 'cancer ')
process_imdir(fibro_dir, fibro_proc, 'fibro  ')


cancer : 100%|##########| 3403/3403 [01:01<00:00, 55.29it/s]
fibro  : 100%|##########| 1741/1741 [00:31<00:00, 55.57it/s]

Telegram notifications


In [4]:
import telepot

bot_token = '305845736:AAFEWvma4up5MgyvioxLr8lKJWqbCYstUf4'
user_id = 77680768

telebot = telepot.Bot(bot_token)

Learning with ConvNets using Keras framework


In [13]:
from keras import backend as KBackend

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import Conv2D, MaxPooling2D, Flatten
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD

from keras.preprocessing.image import array_to_img, img_to_array, load_img
from keras.models import model_from_json
from keras.callbacks import Callback

from sklearn.model_selection import StratifiedKFold

print('Keras backend:', KBackend.backend())
print('Keras image format:', KBackend.image_data_format(), '\n')


Keras backend: tensorflow
Keras image format: channels_last 


In [6]:
random_seed = 61194
batch_size = 16

train_data_generator = ImageDataGenerator(horizontal_flip=True)
test_data_generator = ImageDataGenerator()

kfold = StratifiedKFold(n_splits=8, shuffle=True, random_state=random_seed)

cancer_list = np.array([os.path.join(cancer_proc, x) for x in os.listdir(cancer_proc)])
fibro_list = np.array([os.path.join(fibro_proc, x) for x in os.listdir(fibro_proc)])
cancer_list = cancer_list[:fibro_list.shape[0]]

X = np.concatenate((cancer_list, fibro_list), axis=0)
y = np.concatenate((np.zeros(cancer_list.shape), np.ones(fibro_list.shape)), axis=0)

In [19]:
class TelegramTrainingLog(Callback):
    def on_epoch_end(self, epoch, logs={}):
        msg = 'epoch: {}, loss: {}, acc: {}, val_loss: {}, val_acc: {}'.format(
            epoch, logs.get('loss'), logs.get('acc'), logs.get('val_loss'), logs.get('val_acc'))
        telebot.sendMessage(user_id, msg)
        

def cnn_generator() -> Sequential:
    model = Sequential()

    model.add(Conv2D(filters=32, kernel_size=(3, 3), strides=1, input_shape=(160, 160, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(filters=32, kernel_size=(3, 3), strides=1))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(filters=64, kernel_size=(5, 5), strides=1))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    
    model.add(Dense(32))
    model.add(Activation('relu'))    
    model.add(Dropout(0.5))        
    
    model.add(Dense(32))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='binary_crossentropy', optimizer=sgd)
    
    #model.compile(loss='binary_crossentropy',
    #              optimizer='rmsprop',
    #              metrics=['accuracy'])

    return model

model = cnn_generator()
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_31 (Conv2D)           (None, 158, 158, 32)      896       
_________________________________________________________________
activation_58 (Activation)   (None, 158, 158, 32)      0         
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 79, 79, 32)        0         
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 77, 77, 32)        9248      
_________________________________________________________________
activation_59 (Activation)   (None, 77, 77, 32)        0         
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 38, 38, 32)        0         
_________________________________________________________________
conv2d_33 (Conv2D)           (None, 34, 34, 64)        51264     
_________________________________________________________________
activation_60 (Activation)   (None, 34, 34, 64)        0         
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 17, 17, 64)        0         
_________________________________________________________________
flatten_11 (Flatten)         (None, 18496)             0         
_________________________________________________________________
dense_30 (Dense)             (None, 32)                591904    
_________________________________________________________________
activation_61 (Activation)   (None, 32)                0         
_________________________________________________________________
dropout_18 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_31 (Dense)             (None, 32)                1056      
_________________________________________________________________
activation_62 (Activation)   (None, 32)                0         
_________________________________________________________________
dropout_19 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_32 (Dense)             (None, 1)                 33        
_________________________________________________________________
activation_63 (Activation)   (None, 1)                 0         
=================================================================
Total params: 654,401
Trainable params: 654,401
Non-trainable params: 0
_________________________________________________________________

In [10]:
kfold_iteration = 0
for train_index, test_index in kfold.split(X, y):
    kfold_iteration += 1
    if kfold_iteration >= 2:
        break
        
    display(Markdown('**KFold iteration #{}**'.format(kfold_iteration)))

    cancer_trainlist = (X[train_index])[np.nonzero(1 - y[train_index])]
    fibro_trainlist = (X[train_index])[np.nonzero(y[train_index])]
    cancer_traindest = os.path.join(os.getcwd(), 'train', 'cancer')
    fibro_traindest = os.path.join(os.getcwd(), 'train', 'fibro')        
    classdir_prepare(cancer_trainlist, cancer_traindest, '[TRAIN] generating cancer')
    classdir_prepare(fibro_trainlist, fibro_traindest, '[TRAIN] generating fibro ')

    cancer_testlist = (X[test_index])[np.nonzero(1 - y[test_index])]
    fibro_testlist = (X[test_index])[np.nonzero(y[test_index])]
    cancer_testdest = os.path.join(os.getcwd(), 'test', 'cancer')
    fibro_testdest = os.path.join(os.getcwd(), 'test', 'fibro')
    classdir_prepare(cancer_testlist, cancer_testdest, '[TEST ] generating cancer')
    classdir_prepare(fibro_testlist, fibro_testdest, '[TEST ] generating fibro ')
    
    train_generator = train_data_generator.flow_from_directory(
            directory=os.path.join(os.getcwd(), 'train'),
            target_size=(160, 160),
            classes=['cancer', 'fibro'],
            class_mode='binary',
            seed=random_seed,
            batch_size=batch_size)
    
    test_generator = train_data_generator.flow_from_directory(
            os.path.join(os.getcwd(), 'test'),
            target_size=(160, 160),
            classes=['cancer', 'fibro'],
            class_mode='binary',
            seed=random_seed,
            batch_size=batch_size)
    
    model = cnn_generator()
    telegram_log = TelegramTrainingLog()
    
    telebot.sendMessage(user_id, '[BEGIN] new training\n[KFOLD ITERATION] {}'.format(kfold_iteration))    
    model.fit_generator(
            generator=train_generator,
            steps_per_epoch=train_index.shape[0] // batch_size,
            epochs=10,
            callbacks=[telegram_log],
            validation_data=test_generator,
            validation_steps=test_index.shape[0] // batch_size)        
        
    model.save_weights('iter-{}.h5'.format(kfold_iteration))


KFold iteration #1

[TRAIN] generating cancer: 100%|##########| 1523/1523 [00:00<00:00, 4622.43it/s]
[TRAIN] generating fibro : 100%|##########| 1523/1523 [00:00<00:00, 4608.88it/s]
[TEST ] generating cancer: 100%|##########| 218/218 [00:00<00:00, 4370.97it/s]
[TEST ] generating fibro : 100%|##########| 218/218 [00:00<00:00, 4042.86it/s]
Found 3046 images belonging to 2 classes.
Found 436 images belonging to 2 classes.
Epoch 1/10
380/380 [==============================] - 1303s - loss: 8.0105 - acc: 0.4974 - val_loss: 7.9712 - val_acc: 0.5000
Epoch 2/10
380/380 [==============================] - 1283s - loss: 7.9908 - acc: 0.4988 - val_loss: 8.0084 - val_acc: 0.4977
Epoch 3/10
380/380 [==============================] - 1285s - loss: 7.9782 - acc: 0.4996 - val_loss: 7.9712 - val_acc: 0.5000
Epoch 4/10
380/380 [==============================] - 1285s - loss: 7.9764 - acc: 0.4997 - val_loss: 7.8594 - val_acc: 0.5070
Epoch 5/10
 36/380 [=>............................] - ETA: 1102s - loss: 7.6944 - acc: 0.5174
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-10-01091c7aa84c> in <module>()
     47             callbacks=[telegram_log],
     48             validation_data=test_generator,
---> 49             validation_steps=test_index.shape[0] // batch_size)        
     50 
     51     model.save_weights('iter-{}.h5'.format(kfold_iteration))

/home/falcon/Workspace/cvtf/lib/python3.5/site-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
     86                 warnings.warn('Update your `' + object_name +
     87                               '` call to the Keras 2 API: ' + signature, stacklevel=2)
---> 88             return func(*args, **kwargs)
     89         wrapper._legacy_support_signature = inspect.getargspec(func)
     90         return wrapper

/home/falcon/Workspace/cvtf/lib/python3.5/site-packages/keras/models.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_q_size, workers, pickle_safe, initial_epoch)
   1105                                         workers=workers,
   1106                                         pickle_safe=pickle_safe,
-> 1107                                         initial_epoch=initial_epoch)
   1108 
   1109     @interfaces.legacy_generator_methods_support

/home/falcon/Workspace/cvtf/lib/python3.5/site-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
     86                 warnings.warn('Update your `' + object_name +
     87                               '` call to the Keras 2 API: ' + signature, stacklevel=2)
---> 88             return func(*args, **kwargs)
     89         wrapper._legacy_support_signature = inspect.getargspec(func)
     90         return wrapper

/home/falcon/Workspace/cvtf/lib/python3.5/site-packages/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_q_size, workers, pickle_safe, initial_epoch)
   1875                     outs = self.train_on_batch(x, y,
   1876                                                sample_weight=sample_weight,
-> 1877                                                class_weight=class_weight)
   1878 
   1879                     if not isinstance(outs, list):

/home/falcon/Workspace/cvtf/lib/python3.5/site-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight)
   1619             ins = x + y + sample_weights
   1620         self._make_train_function()
-> 1621         outputs = self.train_function(ins)
   1622         if len(outputs) == 1:
   1623             return outputs[0]

/home/falcon/Workspace/cvtf/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   2101         session = get_session()
   2102         updated = session.run(self.outputs + [self.updates_op],
-> 2103                               feed_dict=feed_dict)
   2104         return updated[:len(self.outputs)]
   2105 

/home/falcon/Workspace/cvtf/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    776     try:
    777       result = self._run(None, fetches, feed_dict, options_ptr,
--> 778                          run_metadata_ptr)
    779       if run_metadata:
    780         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/home/falcon/Workspace/cvtf/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    980     if final_fetches or final_targets:
    981       results = self._do_run(handle, final_targets, final_fetches,
--> 982                              feed_dict_string, options, run_metadata)
    983     else:
    984       results = []

/home/falcon/Workspace/cvtf/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1030     if handle is None:
   1031       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1032                            target_list, options, run_metadata)
   1033     else:
   1034       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/home/falcon/Workspace/cvtf/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1037   def _do_call(self, fn, *args):
   1038     try:
-> 1039       return fn(*args)
   1040     except errors.OpError as e:
   1041       message = compat.as_text(e.message)

/home/falcon/Workspace/cvtf/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1019         return tf_session.TF_Run(session, options,
   1020                                  feed_dict, fetch_list, target_list,
-> 1021                                  status, run_metadata)
   1022 
   1023     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

In [88]:


In [ ]: