------------- User's settings -------------


In [ ]:
# Location of digested data
input_directory = '/digested/'

# Desired location to save trained model
model_directory = '/model_directory/'

------------- (semi)-Automatic -------------


In [1]:
import keras
import numpy
import pickle
import os.path
import tensorflow
import random
import time


Using TensorFlow backend.

Configure CPU/GPU devices:


In [ ]:
# -------- If using Tensorflow-GPU: -------- #

configuration = tensorflow.ConfigProto()

configuration.gpu_options.allow_growth = True

configuration.gpu_options.visible_device_list = "0"

session = tensorflow.Session(config=configuration)

keras.backend.set_session(session)


# -------- If using Tensorflow (CPU) : -------- #

# configuration = tensorflow.ConfigProto()

# session = tensorflow.Session(config=configuration)

# keras.backend.set_session(session)

In [ ]:
if not os.path.exists(model_directory):
    os.makedirs(model_directory)

Data queueing


In [ ]:
def training_data_generator(input_x, input_y, batch_size):
    num_examples, num_labels = input_y.shape
    label_indices = []
    for i in range(num_labels):
        indices = [j for j in range(num_examples) if input_y[j,i] > 0]
        label_indices.append(indices)
        print("Label",i,":",len(indices),"examples")
    samples_per_label = int(batch_size / num_labels)

    def generator():
        while True:
            x_samples = []
            y_samples = []
            for i in range(num_labels):
                random.shuffle(label_indices[i])
                indices = label_indices[i][0:samples_per_label]
                x_samples.append( input_x[indices, ...] )
                y_samples.append( input_y[indices, ...] )
            x_samples = numpy.concatenate( x_samples )
            y_samples = numpy.concatenate( y_samples )
            batch_indices = numpy.arange(x_samples.shape[0])
            numpy.random.shuffle(batch_indices)
            x_samples = x_samples[batch_indices, ...]
            y_samples = y_samples[batch_indices, ...]
            yield (x_samples, y_samples)
    return generator()


def prediction_data_generator(input_x, input_y, batch_size):
    num_examples, num_labels = input_y.shape
    steps = int(num_examples / batch_size)
    def generator():
        i = 0
        while True:
            start = i*batch_size
            end = (i+1)*batch_size
            x_sample = input_x[start:end, ...]
            y_sample = input_y[start:end, ...]
            yield (x_sample, y_sample)
            i = i + 1 if i < steps else 0
    print("Prediction steps:",steps)        
    return generator(), steps

In [ ]:
# This function to normalize illumination discrepancy across images

def min_max_norm(x, minimum=None, maximum=None):
    channels = x.shape[-1]
    if minimum is None and maximum is None:
        minimum = []
        maximum = []
        for channel in range(channels):
            minimum.append( x[..., channel].min() )
            maximum.append( x[..., channel].max() )
    result = numpy.zeros_like(x)
    for ch in range(channels):
        result[..., ch] = 100.0*( (numpy.ndarray.astype(x[..., ch], numpy.float32) - minimum[ch])/(maximum[ch] - minimum[ch]) )
    return (result, minimum, maximum)

Load data and labels:


In [7]:
# class_weights = pickle.load(open(os.path.join(input_directory, "class_weights.sav"), 'rb'))
classes = len( pickle.load(open(os.path.join(input_directory, "class_names.sav"), 'rb')) )


All images are saved inside this tensor rank 4, "Tensor", shape: (33060, 48, 48, 1)
All labels are encoded in this one-hot label tensor rank 2, "Labels" ,shape: (33060, 7)
Training tensor "training_x" was saved, shape: (26444, 48, 48, 1)
Validation tensor "validation_x" was saved, shape: (3306, 48, 48, 1)
Testing tensor "testing_x" was saved, shape: (3310, 48, 48, 1)
Number of objects in each class:
0 Anaphase 128
1 G1 14333
2 G2 8601
3 Metaphase 552
4 Prophase 606
5 S_phase 8616
6 Telophase 224
Class weight(s) :  {0: 258.28125, 1: 2.3065652689597433, 2: 3.843739100104639, 3: 59.891304347826086, 4: 54.554455445544555, 5: 3.8370473537604455, 6: 147.58928571428572}


In [ ]:
training_x = numpy.load(os.path.join(input_directory, "training_x.npy"))

training_y = numpy.load(os.path.join(input_directory, "training_y.npy"))

validation_x = numpy.load(os.path.join(input_directory, "validation_x.npy"))

validation_y = numpy.load(os.path.join(input_directory, "validation_y.npy"))

In [ ]:
print("Loading training data")

# Use this function to normalize signal intensities across images
training_x, pix_min, pix_max = min_max_norm(training_x)

training_generator = training_data_generator(training_x, training_y, 32) 

print(training_x.shape, training_y.shape)

In [ ]:
print("Loading validation data")

# Use this function to normalize signal intensities across images
validation_x, _, _ = min_max_norm(validation_x, pix_min, pix_max)

validation_generator, validation_steps = prediction_data_generator(validation_x, validation_y, 32)

print(validation_x.shape)

Construct convolutional neural network:


In [ ]:
shape = (training_x.shape[1], training_x.shape[2], training_x.shape[3])

x = keras.layers.Input(shape)

In [ ]:
options = {"activation": None, "kernel_size": (3, 3), "padding": "same"}

# Block 1:

y = keras.layers.Conv2D(32, **options)(x)
y = keras.layers.Activation("relu")(y)
y = keras.layers.normalization.BatchNormalization()(y)

y = keras.layers.Conv2D(32, **options)(y)
y = keras.layers.Activation("relu")(y)
y = keras.layers.normalization.BatchNormalization()(y)

# Block 2:
y = keras.layers.Conv2D(64, **options)(y)
y = keras.layers.Activation("relu")(y)
y = keras.layers.normalization.BatchNormalization()(y)

y = keras.layers.Conv2D(64, **options)(y)
y = keras.layers.Activation("relu")(y)
y = keras.layers.normalization.BatchNormalization()(y)

y = keras.layers.MaxPooling2D(pool_size=2, strides=None, padding='same')(y)

# Block 3:
y = keras.layers.Conv2D(128, **options)(y)
y = keras.layers.Activation("relu")(y)
y = keras.layers.normalization.BatchNormalization()(y)

y = keras.layers.Conv2D(128, **options)(y)
y = keras.layers.Activation("relu")(y)
y = keras.layers.normalization.BatchNormalization()(y)

y = keras.layers.MaxPooling2D(pool_size=2, strides=None, padding='same')(y)

# Block 3:
y = keras.layers.Conv2D(256, **options)(y)
y = keras.layers.Activation("relu")(y)
y = keras.layers.normalization.BatchNormalization()(y)

y = keras.layers.Conv2D(256, **options)(y)
y = keras.layers.Activation("relu")(y)
y = keras.layers.normalization.BatchNormalization()(y)

y = keras.layers.MaxPooling2D(pool_size=2, strides=None, padding='same')(y)

# Block 5:
y = keras.layers.Flatten()(y)
y = keras.layers.Dense(1024, activation="relu")(y)
y = keras.layers.Dropout(0.5)(y)
y = keras.layers.Dense(classes)(y)
y = keras.layers.Activation("softmax")(y)

In [ ]:
model = keras.models.Model(x, y)

In [15]:
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         (None, 48, 48, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 48, 48, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 48, 48, 32)        9248      
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 48, 48, 64)        18496     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 48, 48, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 24, 24, 64)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 24, 24, 128)       73856     
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 24, 24, 128)       147584    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 12, 12, 128)       0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 12, 12, 256)       295168    
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 12, 12, 256)       590080    
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 6, 6, 256)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              9438208   
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 7)                 7175      
_________________________________________________________________
activation_1 (Activation)    (None, 7)                 0         
=================================================================
Total params: 10,617,063.0
Trainable params: 10,617,063.0
Non-trainable params: 0.0
_________________________________________________________________
/usr/local/lib/python3.6/site-packages/simplejson/encoder.py:291: DeprecationWarning: Interpreting naive datetime as local 2017-04-21 19:56:08.266801. Please add timezone info to timestamps.
  chunks = self.iterencode(o, _one_shot=True)

In [ ]:
loss = keras.losses.categorical_crossentropy

optimizer = keras.optimizers.Adam(0.00001)

model.compile(
    loss=loss, 
    metrics=[
        "accuracy"
    ],
    optimizer=optimizer
)

Train the network


In [ ]:
start = time.time()

# -------- If using Tensorflow (CPU) : -------- #

# init = tensorflow.global_variables_initializer()

# session.run(init)

# -------------------------------------------- #


model.fit_generator(
    callbacks=[
        keras.callbacks.CSVLogger( os.path.join(model_directory, 'training.csv') ),
        keras.callbacks.ModelCheckpoint( os.path.join(model_directory, 'checkpoint.hdf5') )
    ],
    epochs=18,
    generator = training_generator,
    verbose = 0, # ON/OFF printing output
    max_q_size = 256,
    steps_per_epoch=2500,
    validation_data = validation_generator,
    validation_steps = 2500
)

stop = time.time()
print(stop - start, "s")

In [ ]:
model.save( os.path.join(model_directory, 'model.h5') )

In [ ]:
session.close()