In [1]:
# A bit of setups
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import gen_nn_ops
from lib.datasets import *
from matplotlib import pyplot as plt
from matplotlib.pyplot import imshow
import matplotlib.image as mpimg
%matplotlib inline
%reload_ext autoreload
%autoreload 2
Download the vgg16 pretrained model from the link: ftp://mi.eng.cam.ac.uk/pub/mttt2/models/vgg16.npy <br\ > Download the AlexNet pretrained model from the link BVLC_ALEXNET <br\ > And save the two models (.npy files) to lib/tf_models/
In [2]:
import numpy as np
from lib.tf_models import vgg16
import os
import math
In [3]:
# Define the vgg network for visualizations
vgg_viz = vgg16.Vgg16()
vgg_viz.load()
vgg_viz.setup()
In [4]:
# Useful function to arrange the images to be shown as a squared grid
def viz_grid(Xs, ubound=255.0, padding=1):
N, H, W, C = Xs.shape
grid_size = int(math.ceil(math.sqrt(N)))
grid_height = H * grid_size + padding * (grid_size - 1)
grid_width = W * grid_size + padding * (grid_size - 1)
grid = np.zeros((grid_height, grid_width, C))
next_idx = 0
y0, y1 = 0, H
for y in xrange(grid_size):
x0, x1 = 0, W
for x in xrange(grid_size):
if next_idx < N:
img = Xs[next_idx]
grid[y0:y1, x0:x1] = img
next_idx += 1
x0 += W + padding
x1 += W + padding
y0 += H + padding
y1 += H + padding
return grid
In [5]:
model_paths = {
"Vgg-16": os.path.join("lib/tf_models", "vgg16.npy"),
"AlexNet": os.path.join("lib/tf_models", "bvlc_alexnet.npy")
}
for net in sorted(model_paths):
model_path = model_paths[net]
print("Model from {}".format(model_path))
pretrained = np.load(model_path, encoding='latin1').item()
print("Pretrained {} successfully loaded!".format(net))
first_conv = "conv1_1" if net == "Vgg-16" else "conv1"
conv1 = pretrained[first_conv]
#############################################################################
# TODO: Extract the weight and bias from conv1 #
# HINT: What's the data type of conv1? #
#############################################################################
w, b = conv1
print(w.shape)
#############################################################################
# TODO: Scale the kernel weights to [0,1] #
#############################################################################
w_01 = (w - np.amin(w))
w_01 /= (np.amax(w) - np.amin(w))
#############################################################################
# TODO: Transpose the scaled kernel weights so that the #
# number of filters comes first in the dimension as (n, H, W, C) #
#############################################################################
wT = np.transpose(w_01, axes=[3,1,0,2])
# Define a figure
fig = plt.figure(figsize=(8,8))
ax1 = plt.subplot(111)
rgb_w = [] # list of filters
n, H, W, C = wT.shape
#############################################################################
# TODO: Store each of the n transposed kernel weights #
# to the rgb_w list so the list is of dimension (n, (H, W, C)) #
#############################################################################
rgb_w = [wT[i, :] for i in range(n)]
# Transform the python list to numpy array
rgb_w = np.asarray(rgb_w)
# Grid the rgb_w
grid = viz_grid(rgb_w)
ax1.imshow(grid[...,::-1])
ax1.set_title('{} Learned First Conv Filters'.format(net), fontsize=16)
In [6]:
data_train, labels_train, data_test, labels_test = CIFAR10('data/cifar-10-batches-py')
In [7]:
tf.reset_default_graph()
sess = tf.Session()
In [8]:
def conv2d(inputs, kernel_size, stride, num_filter):
stride_shape = [1, stride, stride, 1]
filter_shape = [kernel_size, kernel_size, inputs.get_shape()[3], num_filter]
W = tf.get_variable('w', filter_shape, tf.float32, tf.random_normal_initializer(0.0, 0.02))
b = tf.get_variable('b', [1, 1, 1, num_filter], initializer=tf.constant_initializer(0.0))
return tf.nn.conv2d(inputs, W, stride_shape, padding='SAME') + b
def max_pool(inputs, kernel_size, stride):
ksize = [1, kernel_size, kernel_size, 1]
strides = [1, stride, stride, 1]
return tf.nn.max_pool(inputs, ksize=ksize, strides=strides, padding='SAME')
#############################################################################
# TODO: You can add any layers (fully-connected, normalization) #
#############################################################################
def fc(inputs, num_outputs):
return tf.contrib.layers.fully_connected(inputs,
num_outputs,
activation_fn=None,
weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
biases_initializer=tf.constant_initializer(),
)
In [9]:
class BaseModel(object):
def __init__(self):
self.num_epoch = 5
self.batch_size = 128
self.log_step = 50
self._build_model()
def _model(self):
print('-' * 5 + ' Sample model ' + '-' * 5)
print('intput layer: ' + str(self.X.get_shape()))
with tf.variable_scope('conv1'):
self.conv1 = conv2d(self.X, 7, 1, 32)
self.relu1 = tf.nn.relu(self.conv1)
self.pool1 = max_pool(self.relu1, 3, 2)
print('conv1 layer: ' + str(self.pool1.get_shape()))
with tf.variable_scope('conv2'):
#############################################################################
# TODO: Complete the following functions #
#############################################################################
# 5x5 convolutional layer with 64 filters, stride of 1, and padding 'SAME'
self.conv2 = conv2d(self.pool1, 5, 1, 64)
self.relu2 = tf.nn.relu(self.conv2)
# 3x3 max pooling layer with a stride of 2
self.pool2 = max_pool(self.relu2, 3, 2)
#############################################################################
# END OF YOUR CODE #
#############################################################################
print('conv2 layer: ' + str(self.pool2.get_shape()))
#############################################################################
# TODO: Flatten the output tensor from conv2 layer #
#############################################################################
self.flat = tf.reshape(self.pool2, [-1, 8 * 8 * 64])
#############################################################################
# END OF YOUR CODE #
#############################################################################
print('flat layer: ' + str(self.flat.get_shape()))
with tf.variable_scope('fc3'):
#############################################################################
# TODO: Complete the following functions #
#############################################################################
# Fully-connected layer with 384 output units (4096 -> 384)
# ReLU activation layer
self.fc3 = fc(self.flat, 384)
self.relu3 = tf.nn.relu(self.fc3)
#############################################################################
# END OF YOUR CODE #
#############################################################################
print('fc3 layer: ' + str(self.relu3.get_shape()))
with tf.variable_scope('fc4'):
#############################################################################
# TODO: Complete the following functions #
#############################################################################
# Fully-connected layer with 10 output units (384 -> 10)
self.fc4 = fc(self.relu3, 10)
#############################################################################
# END OF YOUR CODE #
#############################################################################
print('fc4 layer: ' + str(self.fc4.get_shape()))
# Return the last layer
return self.fc4
def _input_ops(self):
# Placeholders
self.X = tf.placeholder(tf.float32, [None, 32, 32, 3])
self.Y = tf.placeholder(tf.int64, [None])
#############################################################################
# TODO: You can add any placeholders #
#############################################################################
self.is_train = None
self.keep_prob = None
#############################################################################
# END OF YOUR CODE #
#############################################################################
def _build_optimizer(self):
# Adam optimizer 'self.train_op' that minimizes 'self.loss_op'
#############################################################################
# TODO: Complete the following functions #
#############################################################################
global_step = tf.Variable(0, name='global_step', trainable=False)
learning_rate = tf.train.exponential_decay(5e-4,
global_step,
500,
0.96,
staircase=True)
self.train_op = tf.train.AdamOptimizer(learning_rate=learning_rate,
beta1=0.9,
beta2=0.999,
epsilon=1e-08,
use_locking=False,
name='Adam').minimize(self.loss_op, global_step=global_step)
#############################################################################
# END OF YOUR CODE #
#############################################################################
def _loss(self, labels, logits):
# Softmax cross entropy loss 'self.loss_op'
#############################################################################
# TODO: Complete the following functions #
#############################################################################
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)
self.loss_op = tf.reduce_sum(cross_entropy)
#############################################################################
# END OF YOUR CODE #
#############################################################################
def _build_model(self):
# Define input variables
self._input_ops()
# Convert Y to one-hot vector
labels = tf.one_hot(self.Y, 10)
# Build a model and get logits
logits = self._model()
# Compute loss
self._loss(labels, logits)
# Build optimizer
self._build_optimizer()
# Compute accuracy
predict = tf.argmax(logits, 1)
correct = tf.equal(predict, self.Y)
self.accuracy_op = tf.reduce_mean(tf.cast(correct, tf.float32))
def train(self, sess, X_train, Y_train, X_val, Y_val):
sess.run(tf.global_variables_initializer())
step = 0
losses = []
accuracies = []
print('-' * 5 + ' Start training ' + '-' * 5)
for epoch in range(self.num_epoch):
print('train for epoch %d' % epoch)
for i in range(num_training // self.batch_size):
X_ = X_train[i * self.batch_size:(i + 1) * self.batch_size][:]
Y_ = Y_train[i * self.batch_size:(i + 1) * self.batch_size]
#############################################################################
# TODO: You can change feed data as you want #
#############################################################################
feed_dict = {self.X: X_, self.Y:Y_}
#############################################################################
# END OF YOUR CODE #
#############################################################################
fetches = [self.train_op, self.loss_op, self.accuracy_op]
_, loss, accuracy = sess.run(fetches, feed_dict=feed_dict)
losses.append(loss)
accuracies.append(accuracy)
if step % self.log_step == 0:
print('iteration (%d): loss = %.3f, accuracy = %.3f' %
(step, loss, accuracy))
step += 1
#############################################################################
# TODO: Plot training curves #
#############################################################################
# Graph 1. X: epoch, Y: training loss
fig = plt.figure(figsize=(12,6))
ax = plt.subplot(121)
n_iter = num_training // self.batch_size
epochs = range(n_iter)
ax.plot(epochs, losses[epoch * n_iter: (epoch+1)*n_iter ])
ax.set_xlabel('epoch')
ax.set_ylabel('training loss')
ax.set_title('Epoch {} loss'.format(epoch))
plt.grid(True)
# Graph 2. X: epoch, Y: training accuracy
ax = plt.subplot(122)
ax.plot(epochs, accuracies[epoch * n_iter: (epoch+1)*n_iter ])
ax.set_xlabel('epoch')
ax.set_ylabel('accuracy')
ax.set_title('Epoch {} accuracy'.format(epoch))
plt.grid(True)
plt.tight_layout()
plt.show()
#############################################################################
# END OF YOUR CODE #
#############################################################################
# Print validation results
print('validation for epoch %d' % epoch)
val_accuracy = self.evaluate(sess, X_val, Y_val)
print('- epoch %d: validation accuracy = %.3f' % (epoch, val_accuracy))
def evaluate(self, sess, X_eval, Y_eval):
eval_accuracy = 0.0
eval_iter = 0
for i in range(X_eval.shape[0] // self.batch_size):
X_ = X_eval[i * self.batch_size:(i + 1) * self.batch_size][:]
Y_ = Y_eval[i * self.batch_size:(i + 1) * self.batch_size]
#############################################################################
# TODO: You can change feed data as you want #
#############################################################################
feed_dict = {self.X: X_, self.Y:Y_}
#############################################################################
# END OF YOUR CODE #
#############################################################################
accuracy = sess.run(self.accuracy_op, feed_dict=feed_dict)
eval_accuracy += accuracy
eval_iter += 1
return eval_accuracy / eval_iter
In [10]:
model = BaseModel()
In [11]:
# Restore the model using parameters dict
variables = tf.global_variables()
param_dict = {}
for var in variables:
var_name = var.name[:-2]
print('Loading {} from checkpoint. Name: {}'.format(var.name, var_name))
param_dict[var_name] = var
saver = tf.train.Saver()
saver.restore(sess, "lib/tf_models/problem2/csci-599_sample.ckpt")
In [12]:
# TODO:
with sess.as_default():
with tf.variable_scope("conv1", reuse=True):
conv1 = tf.get_variable("w")
b1 = tf.get_variable("b")
print conv1.shape, b1.shape
#############################################################################
# TODO: Extract the weight and bias from conv1 #
# For tf models, you should use .eval() function on variables and #
# sess.run() function to extract the features to ndarray #
#############################################################################
w, b = conv1.eval(), b1.eval()
#############################################################################
# TODO: Scale the kernel weights to [0,1] #
#############################################################################
w_01 = (w - np.amin(w))
w_01 /= (np.amax(w) - np.amin(w))
print w_01.shape
#############################################################################
# TODO: Transpose the scaled kernel weights so that the #
# number of filters comes first in the dimension as (n, H, W, C) #
#############################################################################
wT = np.transpose(w_01, axes=[3,1,0,2])
print wT.shape
# Define a figure
fig = plt.figure(figsize=(8,8))
ax1 = plt.subplot(111)
rgb_w = [] # list of filters
n, H, W, C = wT.shape
#############################################################################
# TODO: Store each of the n transposed kernel weights #
# to the rgb_w list so the list is of dimension (n, (H, W, C)) #
#############################################################################
rgb_w = [wT[i, :] for i in range(n)]
# Transform the python list to numpy array
rgb_w = np.asarray(rgb_w)
grid = viz_grid(rgb_w)
ax1.imshow(grid[...,::-1])
ax1.set_title('BaseModel Learned First Conv Filters', fontsize=16)
Now we saw the learned filters, and observed that they have some patterns. We will go one step forward to visualize the activation maps produced by different convolutional filters. You will see that as we go deeper through the layers of a network the activation maps gradually represent higher and higher levels of abstraction in the images. <br\ > Now, let's get some warm-ups by running the following visualization code blocks for a simple model trained on MNIST dataset.
In [13]:
import tensorflow.contrib.slim as slim
from tensorflow.examples.tutorials.mnist import input_data
In [14]:
mnist = input_data.read_data_sets("data/MNIST_data/", one_hot=True)
In [15]:
tf.reset_default_graph()
x = tf.placeholder(tf.float32, [None, 784],name="x-in")
y = tf.placeholder(tf.float32, [None, 10],name="y-in")
keep_prob = tf.placeholder("float")
x_reshaped = tf.reshape(x,[-1,28,28,1])
x_tiled = tf.tile(x_reshaped, [1,1,1,3])
sconv_1 = slim.conv2d(x_tiled,5,[5,5])
spool_1 = slim.max_pool2d(sconv_1,[2,2])
sconv_2 = slim.conv2d(spool_1,5,[5,5])
spool_2 = slim.max_pool2d(sconv_2,[2,2])
sconv_3 = slim.conv2d(spool_2,20,[5,5])
s_dropout3 = slim.dropout(sconv_3, keep_prob)
output = slim.fully_connected(slim.flatten(s_dropout3), 10, activation_fn=tf.nn.softmax)
cross_entropy = -tf.reduce_sum(y * tf.log(output))
correct_prediction = tf.equal(tf.argmax(output,1 ), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
In [16]:
batchSize = 50
dropout_p = 0.5
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# Train the network
for i in range(2001):
batch = mnist.train.next_batch(batchSize)
sess.run(train_step, feed_dict={x:batch[0], y:batch[1], keep_prob:dropout_p})
if i % 100 == 0 and i != 0:
trainAccuracy = sess.run(accuracy, feed_dict={x:batch[0], y:batch[1], keep_prob:1.0})
print("step %d, training accuracy %g"%(i, trainAccuracy))
In [17]:
testAccuracy = sess.run(accuracy, feed_dict={x:mnist.test.images,y:mnist.test.labels, keep_prob:1.0})
print("test accuracy {}".format(testAccuracy))
In [18]:
# function for visualizing the activations
def getActivations_mnist(layer, features):
outs = sess.run(layer, feed_dict={x:np.reshape(features,[1,784],order='F'), keep_prob:1.0})
outs = np.transpose(outs, [3, 1, 2, 0])
fig = plt.figure(figsize=(4,4))
ax1 = plt.subplot(111)
grid = viz_grid(outs)
ax1.imshow(grid[...,0])
ax1.set_title('{} Activations'.format(layer.name), fontsize=16)
In [19]:
imageToUse = mnist.test.images[0]
imageToShow = np.expand_dims(np.reshape(imageToUse,[28,28]), axis=-1)
imageToShow = np.tile(imageToShow, (1,1,3))
plt.imshow(imageToShow, interpolation="nearest", cmap="gray")
print "The Image for activation visualizations:"
In [20]:
# Visualize the first 3 activation maps after convs
getActivations_mnist(sconv_1,imageToUse)
getActivations_mnist(sconv_2,imageToUse)
getActivations_mnist(sconv_3,imageToUse)
In [29]:
print "There are total {} images in test set".format(len(data_test))
#############################################################################
# TODO: Try out some indices you want to see! #
#############################################################################
query_idx = 101
# Process the indicated issue
query_idx = min(max(query_idx, 0), 999)
cifar10ToUse = data_test[query_idx]
cifar10ToUse= cifar10ToUse[...,::-1]
plt.imshow(cifar10ToUse)
print "Image {} in test set".format(query_idx)
In [30]:
tf.reset_default_graph()
sess = tf.Session()
# Restore the model using parameters dict
model = BaseModel()
variables = tf.global_variables()
param_dict = {}
for var in variables:
var_name = var.name[:-2]
print('Loading {} from checkpoint. Name: {}'.format(var.name, var_name))
param_dict[var_name] = var
saver = tf.train.Saver()
saver.restore(sess, "lib/tf_models/problem2/csci-599_sample.ckpt")
In [31]:
def getActivations_cifar10(layer, stimuli):
#############################################################################
# TODO: Fill out the following block #
#############################################################################
#print (stimuli.shape)
#print (layer.name)
#print (layer.eval(stimuli))
#X = tf.placeholder(tf.float32, [None, 32, 32, 3], name='X')
keep_prob = tf.placeholder("float")
outs = sess.run(layer, feed_dict={model.X: stimuli.reshape([1,32,32,3]), keep_prob:1.0})
#print (layer.eval.__doc__)
#outs = layer.eval(sess, stimuli)
#sess.run(layer, feed_dict={x:stimuli, keep_prob:1.0})
outs = np.transpose(outs, [3, 1, 2, 0])
fig = plt.figure(figsize=(4,4))
ax1 = plt.subplot(111)
grid = viz_grid(outs)
ax1.imshow(grid[...,0])
ax1.set_title('{} Activations'.format(layer.name), fontsize=16)
In [32]:
cifar10ToUse_with_batch = np.expand_dims(cifar10ToUse, axis=0)
#############################################################################
# TODO: Visualize the activations of each conv layer in your model #
#############################################################################
getActivations_cifar10(model.conv1, cifar10ToUse_with_batch)
getActivations_cifar10(model.relu1, cifar10ToUse_with_batch)
getActivations_cifar10(model.pool1, cifar10ToUse_with_batch)
getActivations_cifar10(model.conv2, cifar10ToUse_with_batch)
getActivations_cifar10(model.relu2, cifar10ToUse_with_batch)
getActivations_cifar10(model.pool2, cifar10ToUse_with_batch)
First convolution layer seems to achieve the strongest activation. The first layer resembles the input image closely. Hence the first layer is the most discriminative. As we travel through the network the activation seems to grow sparser and hence weaker (in terms of being discriminative). The conv2 pool2 layer for example looks like a noisy image with no resembling feature of the input.
Grad-CAM is a technique for "visually interpreting" the predictions of a Convolutional Neural Network (CNN)-based model. This technique essentially uses the gradients of any target concept (a predicted class such as "cat"), flowing into the final convolutional layer to produce a coarse localization map attending regions in the image that are important for prediction of the concept. Please read the original paper Grad-CAM for more details.
In [25]:
# Replace vanila relu to guided relu to get guided backpropagation.
@ops.RegisterGradient("GuidedRelu")
def _GuidedReluGrad(op, grad):
return tf.where(0. < grad, gen_nn_ops._relu_grad(grad, op.outputs[0]), tf.zeros(grad.get_shape()))
In [26]:
import cv2
def imgread(path):
print "Image:", path.split("/")[-1]
# Read in the image using python opencv
img = cv2.imread(path)
img = img / 255.0
print "Raw Image Shape: ", img.shape
# Center crop the image
short_edge = min(img.shape[:2])
W, H, C = img.shape
to_crop = min(W, H)
cent_w = int((img.shape[1] - short_edge) / 2)
cent_h = int((img.shape[0] - short_edge) / 2)
img_cropped = img[cent_h:cent_h+to_crop, cent_w:cent_w+to_crop]
print "Cropped Image Shape: ", img_cropped.shape
# Resize the cropped image to 224 by 224 for VGG16 network
img_resized = cv2.resize(img_cropped, (224, 224), interpolation=cv2.INTER_LINEAR)
print "Resized Image Shape: ", img_resized.shape
return img_resized
def predicted_labels(score, synset_path):
fi = open(synset_path, "rb")
synset = []
for line in fi:
synset.append(line.rstrip().lstrip())
# The predictions, reverse ordered
pred = np.argsort(score)[::-1]
# Top 1 and Top 5
top1 = synset[pred[0]]
print "\nTop1, Label: {}, score: {}".format(top1, score[pred[0]])
top5 = [(synset[pred[i]], score[pred[i]]) for i in xrange(5)]
for i in xrange(1,5):
print "Top{}, Label: {} score: {}".format(i+1, top5[i][0], top5[i][1])
return top1, top5
In [27]:
def visualize(image, output, grads, gb_grads):
# Reverse the BGR channel to RGB
gb_grads = gb_grads[...,::-1]
# Initialzie CAM weights
CAM = np.ones(output.shape[0 : 2], dtype = np.float32)
# Taking a weighted average
cam_w = np.mean(grads, axis = (0, 1))
for i, w in enumerate(cam_w):
CAM += w * output[:, :, i]
# Passing through ReLU
CAM = np.maximum(CAM, 0)
# scale CAM to [0,1]
CAM /= np.max(CAM)
# Resize the CAM to 224 by 224
CAM = cv2.resize(CAM, (224, 224), interpolation=cv2.INTER_LINEAR)
# scale guided backprop gradients to [0,1]
gb_grads -= np.min(gb_grads)
gb_grads /= np.max(gb_grads)
# scale the original to [0,1]
img_toshow = image.astype(float)
img_toshow -= np.min(img_toshow)
img_toshow /= img_toshow.max()
# Render the CAM heatmap
heatmap = cv2.applyColorMap(np.uint8(CAM*255.0), cv2.COLORMAP_JET)
# Grad-CAM
CAM_gb = CAM.copy()
CAM_gb = np.expand_dims(np.squeeze(CAM_gb), axis=-1)
gd_gb = gb_grads * np.tile(CAM_gb, (1,1,3))
# Draw the results figures
fig = plt.figure(figsize=(10,10))
ax1 = plt.subplot(221)
ax2 = plt.subplot(222)
ax3 = plt.subplot(223)
ax4 = plt.subplot(224)
ax1.imshow(img_toshow[...,::-1])
ax1.set_title('Input Image')
ax2.imshow(heatmap)
ax2.set_title('Grad-CAM')
ax3.imshow(gb_grads)
ax3.set_title('guided backpropagation')
ax4.imshow(gd_gb)
ax4.set_title('guided Grad-CAM')
# Show the resulting image
plt.show()
In [28]:
num_classes = 1000
# Read in the image
img1 = imgread("images/corgi.jpg")
img2 = imgread("images/cat_and_dog.jpg")
img3 = imgread("images/cat_and_dog.jpg")
# Expand one dimension to take on the batch dimension
img1 = np.expand_dims(img1, axis=0)
img2 = np.expand_dims(img2, axis=0)
img3 = np.expand_dims(img3, axis=0)
# Define a all zero gradients of the shape 1000
zero_grads = np.array([0 for i in xrange(num_classes)])
# The indices of the classes are provided for you
class_num1 = 263 # Pembroke, Pembroke Welsh corgi
class_num2 = 254 # Pug, pug-dog
class_num3 = 282 # Tiger cat
# Define a one-hot gradient vector where the only activated gradient
# is of the corresponding indices from above
one_hot_grad1 = zero_grads.copy()
one_hot_grad2 = zero_grads.copy()
one_hot_grad3 = zero_grads.copy()
one_hot_grad1[class_num1] = 1.0
one_hot_grad2[class_num2] = 1.0
one_hot_grad3[class_num3] = 1.0
one_hot_grad1 = np.expand_dims(one_hot_grad1, axis=0)
one_hot_grad2 = np.expand_dims(one_hot_grad2, axis=0)
one_hot_grad3 = np.expand_dims(one_hot_grad3, axis=0)
#############################################################################
# TODO: Construct a minibatch of data and labels (one-hot vectors) of the #
# images using concatenate #
#############################################################################
minibatch = np.concatenate([img1, img2, img3], 0)
one_hot_grads = np.concatenate([one_hot_grad1, one_hot_grad2, one_hot_grad3], 0)
# Define the batch size
batch_size = 3
# Create tensorflow graph for evaluation
graph = tf.Graph()
with graph.as_default():
with graph.gradient_override_map({'Relu': 'GuidedRelu'}):
# Define the VGG16 network and setup
# Please take a look at the lib/tf_models/vgg16.py for more details
# of the VGG16 network
vgg = vgg16.Vgg16()
vgg.load()
vgg.setup()
#############################################################################
# TODO: Define the signal and the loss #
# HINT: To construnct the signal, simply extract the final fully connected #
# layer and perform a matrix multiplication on the one-hot vectors #
# The loss is then defined as the average of the signal #
#############################################################################
signal = tf.multiply(vgg.fc8, one_hot_grads)
#tf.matmul(tf.cast(one_hot_grads.T, tf.float32), vgg.fc8)
#np.multiply(vgg.fc8, one_hot_grads)#tf.nn.bias_add(tf.matmul(one_hot_grads, vgg.get_fc_weight('fc8')), vgg.get_bias('fc8'))
#tf.matmul(vgg.get_fc_weight('fc8'), one_hot_grads)
#vgg.fc8
#np.multiply(vgg.get_fc_weight('fc8'), one_hot_grads)
loss = tf.reduce_mean(signal)#, axis=0) #signal.mean(axis=0) #tf.reduce_mean(signal)
#print (signal)#(tf.sqrt(tf.reduce_mean(tf.square(gb_grad))))
#print(loss.get_shape())
#############################################################################
# TODO: Compute the gradient of pool5 layer for generating Grad-CAM #
#############################################################################
pool5 = vgg.pool5
pool5_grads = tf.gradients(loss, pool5)[0]
#############################################################################
# TODO: Perform a guided backpropagtion back to the input layer #
#############################################################################
gb_grad = tf.gradients(loss, vgg.inputs)[0]
eps = tf.constant(1e-5)
#############################################################################
# TODO: Normalize the gradients, and add a small number epsilon to it #
#############################################################################
pool5_grads_norm = tf.div(pool5_grads,
tf.sqrt(tf.reduce_mean(tf.square(pool5_grads))) +
eps)
#############################################################################
# TODO: Initializer for the tf variables #
#############################################################################
init = tf.global_variables_initializer()
# Run tensorflow
with tf.Session(graph=graph) as sess:
sess.run(init)
#############################################################################
# TODO: Run the session to get guided backpropagation gradients to the #
# input, activation of pool5, normalized pool5 gradients, and the #
# prediction probability #
#############################################################################
prob, gb_grads, pool5_act, pool5_grads = sess.run([vgg.prob, gb_grad, pool5, pool5_grads_norm],
feed_dict = {vgg.inputs : minibatch,
vgg.labels : one_hot_grads})
# Visualize the Grad-CAM
for i in xrange(batch_size):
top1, top5 = predicted_labels(prob[i], "lib/synset.txt")
visualize(minibatch[i], pool5_act[i], pool5_grads[i], gb_grads[i])
Grad-CAM seems to be able to discriminate the original labels accurately. For img1, the focus is on the face which is the most discriminative feature given the training label of 'Pembroke, Pembroke Welsh corgi'. For img2, when the original labels is given to be 'pug', the highlighted feature is on the dog. When the same image is labeled 'Tiger cat', Grad Cam highlights both the dog and the cat. It however seems to focus more on the cat like features as compared to img2 label of 'pug, pug-dog' since the stripes are highlighted.
In [ ]: