------------- User's settings -------------



In [ ]:

    
# Location of digested data
input_directory = '/digested/'

# Location of saved trained model
model_directory = '/model_directory/'

# Desired location for outputs
output_directory = '/output_directory/'

------------- (semi)-Automatic -------------



In [ ]:

    
%matplotlib inline
import keras
import pickle
from keras.layers import *
from keras.models import Sequential
import numpy
import os
import os.path

import matplotlib.pyplot
import pandas
import seaborn
import sklearn.metrics
import tensorflow

from tensorflow.contrib.tensorboard.plugins import projector

Configure GPU/CPU devices:



In [ ]:

    
# -------- If using Tensorflow-GPU: -------- #

configuration = tensorflow.ConfigProto()

configuration.gpu_options.allow_growth = True

configuration.gpu_options.visible_device_list = "0"

session = tensorflow.Session(config=configuration)

keras.backend.set_session(session)


# -------- If using Tensorflow (CPU) : -------- #

# configuration = tensorflow.ConfigProto()

# session = tensorflow.Session(config=configuration)

# keras.backend.set_session(session)



In [ ]:

    
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

Data queueing



In [ ]:

    
def training_data_generator(input_x, input_y, batch_size):
    num_examples, num_labels = input_y.shape
    label_indices = []
    for i in range(num_labels):
        indices = [j for j in range(num_examples) if input_y[j,i] > 0]
        label_indices.append(indices)
        print("Label",i,":",len(indices),"examples")
    samples_per_label = int(batch_size / num_labels)

    def generator():
        while True:
            x_samples = []
            y_samples = []
            for i in range(num_labels):
                random.shuffle(label_indices[i])
                indices = label_indices[i][0:samples_per_label]
                x_samples.append( input_x[indices, ...] )
                y_samples.append( input_y[indices, ...] )
            x_samples = numpy.concatenate( x_samples )
            y_samples = numpy.concatenate( y_samples )
            batch_indices = numpy.arange(x_samples.shape[0])
            numpy.random.shuffle(batch_indices)
            x_samples = x_samples[batch_indices, ...]
            y_samples = y_samples[batch_indices, ...]
            yield (x_samples, y_samples)
    return generator()


def prediction_data_generator(input_x, input_y, batch_size):
    num_examples, num_labels = input_y.shape
    steps = int(num_examples / batch_size)
    def generator():
        i = 0
        while True:
            start = i*batch_size
            end = (i+1)*batch_size
            x_sample = input_x[start:end, ...]
            y_sample = input_y[start:end, ...]
            yield (x_sample, y_sample)
            i = i + 1 if i < steps else 0
    print("Prediction steps:",steps)        
    return generator(), steps



In [ ]:

    
# This function to normalize illumination discrepancy across images

def min_max_norm(x, minimum=None, maximum=None):
    channels = x.shape[-1]
    if minimum is None and maximum is None:
        minimum = []
        maximum = []
        for channel in range(channels):
            minimum.append( x[..., channel].min() )
            maximum.append( x[..., channel].max() )
    result = numpy.zeros_like(x)
    for ch in range(channels):
        result[..., ch] = 100.0*( (numpy.ndarray.astype(x[..., ch], numpy.float32) - minimum[ch])/(maximum[ch] - minimum[ch]) )
    return (result, minimum, maximum)

Load data:



In [ ]:

    
training_x = numpy.load(os.path.join(input_directory, "training_x.npy"))

training_y = numpy.load(os.path.join(input_directory, "training_y.npy"))

# input_directory = "/path/to/other/input_directory/if/needed"

testing_x = numpy.load(os.path.join(input_directory, "testing_x.npy"))

testing_y = numpy.load(os.path.join(input_directory, "testing_y.npy"))



In [ ]:

    
print("Loading training data")

# Use this function to normalize signal intensities across images
training_x, pix_min, pix_max = min_max_norm(training_x)

training_generator = training_data_generator(training_x, training_y, 32) 

print(training_x.shape, training_y.shape)



In [ ]:

    
print("Loading test data")

# Use this function to normalize signal intensities across images
testing_x, _, _ = min_max_norm(testing_x, pix_min, pix_max)

testing_generator, testing_steps = prediction_data_generator(testing_x, testing_y, 32)

print(testing_x.shape)

Load trained model:

(can also load checkpoints)



In [ ]:

    
model = keras.models.load_model( os.path.join(model_directory, 'model.h5') )



In [ ]:

    
model.load_weights(os.path.join(model_directory, 'model.h5'))

Evaluate testing set



In [ ]:

    
model.evaluate_generator(
    generator=testing_generator, 
    steps=256
)

Extract the most crucial layer



In [ ]:

    
layers = model.layers



In [ ]:

    
model.summary()

Look for the densely/fully connected layer nearest to the classier, which is the one that has the shape of (None, number-of-classes)

==================================================================

Example 1: in case of classification of 7 classes, the last few layers are:

dense_1 (Dense) (None, 1024) 943820

dropout_1 (Dropout) (None, 1024) 0

dense_2 (Dense) (None, 7) 7175

activation_1 (Activation) (None, 7) 0

then look for the layer dense_1 , which has a shape of (None, 1024)

==================================================================

Example 2: in case of classification of 5 classes, the last few layers are:

activation_49 (Activation) (None, 8, 8, 2048) 0

avg_pool (AveragePooling2D) (None, 1, 1, 2048) 0

global_average_pooling2d_1 (Glob (None, 2048) 0

dense_2 (Dense) (None, 5) 10245

then look for the layer global_average_pooling2d_1 , which has a shape of (None, 2048)



In [ ]:

    
print(layers[-4])



In [ ]:

    
abstract_model = None # Clear cached abstract_model
abstract_model = Sequential([layers[-4]])



In [ ]:

    
extracted_features = abstract_model.predict_generator(
    generator=testing_generator,
    steps=256)

Metadata for embeddings



In [ ]:

    
print('Converting numeric labels into class names...')

class_names = pickle.load(open(os.path.join(input_directory, "class_names.sav"), 'rb'))

def save_metadata(file):
    with open(file, 'w') as f:
        for i in range(test_y.shape[0]):
            f.write('{}\n'.format( class_names[test_y[i]] ))     

save_metadata( os.path.join(output_directory, 'metadata.tsv') )

print('Done.')

Predicted values in .TXT

To be uploaded and viewed on http://projector.tensorflow.org



In [ ]:

    
numpy.savetxt( os.path.join(output_directory, 'table_of_features.txt' ), extracted_features, delimiter='\t')

Note:

Once finished, open http://projector.tensorflow.org on web-browser.

Click "Load data" on the left panel.

Step 1: Load a TSV file of vectors >> Choose file: 'table_of_features.txt'
Step 2: Load a TSV file of metadata >> Choose file: 'metadata.tsv'

Hit ESC or click outside the load data window to dismiss.

Predicted values in .NPY

Used for generating Tensorboard embeddings to be viewed locally on http://localhost:6006



In [ ]:

    
numpy.save( os.path.join(output_directory, 'table_of_features.npy' ), extracted_features )



In [ ]:

    
extracted_features = numpy.load( 'table_of_features.npy' )
embedding_var = tensorflow.Variable(extracted_features)

embedSess = tensorflow.Session()

# save variable in session
embedSess.run(embedding_var.initializer)

# save session (only used variable) to file
saver = tensorflow.train.Saver([embedding_var])
saver.save(embedSess, 'tf.ckpt')

summary_writer = tensorflow.summary.FileWriter('./')

config = tensorflow.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
embedding.metadata_path = 'metadata.tsv' # this metadata_path need to be modified later. See note.
tensorflow.contrib.tensorboard.plugins.projector.visualize_embeddings(summary_writer, config)

embedSess.close()

Note:

Tensorboard embeddings files will be saved in the same location with this script.

Collect the following files into one folder:

metadata.tsv
checkpoint
projector_config.pbtxt
tf.ckpt.index
tf.ckpt.meta
tf.ckpt.data-00000-of-00001

Open with any text editor : "projector_config.pbtxt"

"/path/to/logdir/metadata.tsv" has to be specified, CANNOT be relative path "./metadata.tsv", nor "~/metadata.tsv"

Then type command in terminal: tensorboard --logdir="/path/to/logdir"

Next, open web-browser, connect to http://localhost:6006

Plot categorical accuracy and loss



In [ ]:

    
metrics = pandas.read_csv(os.path.join(model_directory, 'training.csv') )



In [ ]:

    
print(metrics)



In [ ]:

    
matplotlib.pyplot.plot(metrics["acc"])
matplotlib.pyplot.plot(metrics["val_acc"])



In [ ]:

    
matplotlib.pyplot.plot(metrics["loss"])
matplotlib.pyplot.plot(metrics["val_loss"])

Confusion matrix



In [ ]:

    
predicted = model.predict(
    batch_size=50,
    x=testing_x
)

predicted = numpy.argmax(predicted, -1)
expected = numpy.argmax(testing_y[:, :], -1)



In [ ]:

    
confusion = sklearn.metrics.confusion_matrix(expected, predicted)

confusion = pandas.DataFrame(confusion)

matplotlib.pyplot.figure(figsize=(12, 8))

seaborn.heatmap(confusion, annot=True)

matplotlib.pyplot.savefig( os.path.join(output_directory, 'confusion_matrix.eps') , format='eps', dpi=600)