# Location of digested data
input_directory = '/digested/'
# Location of saved trained model
model_directory = '/model_directory/'
# Desired location for outputs
output_directory = '/output_directory/'
%matplotlib inline
import keras
import pickle
from keras.layers import *
from keras.models import Sequential
import numpy
import os
import os.path
import matplotlib.pyplot
import pandas
import seaborn
import sklearn.metrics
import tensorflow
from tensorflow.contrib.tensorboard.plugins import projector
Configure GPU/CPU devices:
# -------- If using Tensorflow-GPU: -------- #
configuration = tensorflow.ConfigProto()
configuration.gpu_options.allow_growth = True
configuration.gpu_options.visible_device_list = "0"
session = tensorflow.Session(config=configuration)
# -------- If using Tensorflow (CPU) : -------- #
# configuration = tensorflow.ConfigProto()
# session = tensorflow.Session(config=configuration)
# keras.backend.set_session(session)
if not os.path.exists(output_directory):
def training_data_generator(input_x, input_y, batch_size):
num_examples, num_labels = input_y.shape
label_indices = []
for i in range(num_labels):
indices = [j for j in range(num_examples) if input_y[j,i] > 0]
samples_per_label = int(batch_size / num_labels)
def generator():
while True:
x_samples = []
y_samples = []
for i in range(num_labels):
indices = label_indices[i][0:samples_per_label]
x_samples.append( input_x[indices, ...] )
y_samples.append( input_y[indices, ...] )
x_samples = numpy.concatenate( x_samples )
y_samples = numpy.concatenate( y_samples )
batch_indices = numpy.arange(x_samples.shape[0])
x_samples = x_samples[batch_indices, ...]
y_samples = y_samples[batch_indices, ...]
yield (x_samples, y_samples)
return generator()
def prediction_data_generator(input_x, input_y, batch_size):
num_examples, num_labels = input_y.shape
steps = int(num_examples / batch_size)
def generator():
i = 0
while True:
start = i*batch_size
end = (i+1)*batch_size
x_sample = input_x[start:end, ...]
y_sample = input_y[start:end, ...]
yield (x_sample, y_sample)
i = i + 1 if i < steps else 0
print("Prediction steps:",steps)
return generator(), steps
# This function to normalize illumination discrepancy across images
def min_max_norm(x, minimum=None, maximum=None):
channels = x.shape[-1]
if minimum is None and maximum is None:
minimum = []
maximum = []
for channel in range(channels):
minimum.append( x[..., channel].min() )
maximum.append( x[..., channel].max() )
result = numpy.zeros_like(x)
for ch in range(channels):
result[..., ch] = 100.0*( (numpy.ndarray.astype(x[..., ch], numpy.float32) - minimum[ch])/(maximum[ch] - minimum[ch]) )
return (result, minimum, maximum)
training_x = numpy.load(os.path.join(input_directory, "training_x.npy"))
training_y = numpy.load(os.path.join(input_directory, "training_y.npy"))
# input_directory = "/path/to/other/input_directory/if/needed"
testing_x = numpy.load(os.path.join(input_directory, "testing_x.npy"))
testing_y = numpy.load(os.path.join(input_directory, "testing_y.npy"))
print("Loading training data")
# Use this function to normalize signal intensities across images
training_x, pix_min, pix_max = min_max_norm(training_x)
training_generator = training_data_generator(training_x, training_y, 32)
print(training_x.shape, training_y.shape)
print("Loading test data")
# Use this function to normalize signal intensities across images
testing_x, _, _ = min_max_norm(testing_x, pix_min, pix_max)
testing_generator, testing_steps = prediction_data_generator(testing_x, testing_y, 32)
model = keras.models.load_model( os.path.join(model_directory, 'model.h5') )
model.load_weights(os.path.join(model_directory, 'model.h5'))
layers = model.layers
Look for the densely/fully connected layer nearest to the classier, which is the one that has the shape of (None, number-of-classes)
Example 1: in case of classification of 7 classes, the last few layers are:
dense_1 (Dense) (None, 1024) 943820
dropout_1 (Dropout) (None, 1024) 0
dense_2 (Dense) (None, 7) 7175
activation_1 (Activation) (None, 7) 0
then look for the layer dense_1 , which has a shape of (None, 1024)
Example 2: in case of classification of 5 classes, the last few layers are:
activation_49 (Activation) (None, 8, 8, 2048) 0
avg_pool (AveragePooling2D) (None, 1, 1, 2048) 0
global_average_pooling2d_1 (Glob (None, 2048) 0
dense_2 (Dense) (None, 5) 10245
then look for the layer global_average_pooling2d_1 , which has a shape of (None, 2048)
abstract_model = None # Clear cached abstract_model
abstract_model = Sequential([layers[-4]])
extracted_features = abstract_model.predict_generator(
print('Converting numeric labels into class names...')
class_names = pickle.load(open(os.path.join(input_directory, "class_names.sav"), 'rb'))
def save_metadata(file):
with open(file, 'w') as f:
for i in range(test_y.shape[0]):
f.write('{}\n'.format( class_names[test_y[i]] ))
save_metadata( os.path.join(output_directory, 'metadata.tsv') )
To be uploaded and viewed on
numpy.savetxt( os.path.join(output_directory, 'table_of_features.txt' ), extracted_features, delimiter='\t')
Once finished, open on web-browser.
Click "Load data" on the left panel.
Step 1: Load a TSV file of vectors >> Choose file: 'table_of_features.txt'
Step 2: Load a TSV file of metadata >> Choose file: 'metadata.tsv'
Hit ESC or click outside the load data window to dismiss.
Used for generating Tensorboard embeddings to be viewed locally on http://localhost:6006
In [ ]: os.path.join(output_directory, 'table_of_features.npy' ), extracted_features )
extracted_features = numpy.load( 'table_of_features.npy' )
embedding_var = tensorflow.Variable(extracted_features)
embedSess = tensorflow.Session()
# save variable in session
# save session (only used variable) to file
saver = tensorflow.train.Saver([embedding_var]), 'tf.ckpt')
summary_writer = tensorflow.summary.FileWriter('./')
config = tensorflow.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name =
embedding.metadata_path = 'metadata.tsv' # this metadata_path need to be modified later. See note.
tensorflow.contrib.tensorboard.plugins.projector.visualize_embeddings(summary_writer, config)
Tensorboard embeddings files will be saved in the same location with this script.
Collect the following files into one folder:
Open with any text editor : "projector_config.pbtxt"
"/path/to/logdir/metadata.tsv" has to be specified, CANNOT be relative path "./metadata.tsv", nor "~/metadata.tsv"
Then type command in terminal: tensorboard --logdir="/path/to/logdir"
Next, open web-browser, connect to http://localhost:6006
metrics = pandas.read_csv(os.path.join(model_directory, 'training.csv') )
predicted = model.predict(
predicted = numpy.argmax(predicted, -1)
expected = numpy.argmax(testing_y[:, :], -1)
confusion = sklearn.metrics.confusion_matrix(expected, predicted)
confusion = pandas.DataFrame(confusion)
matplotlib.pyplot.figure(figsize=(12, 8))
seaborn.heatmap(confusion, annot=True)
matplotlib.pyplot.savefig( os.path.join(output_directory, 'confusion_matrix.eps') , format='eps', dpi=600)