In [ ]:
%matplotlib inline
import warnings
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(1)
In [ ]:
# Load a pre-trained ResNet50
# We use include_top = False for now,
# as we'll import output Dense Layer later
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50
base_model = ResNet50(include_top=False)
print(base_model.output_shape)
In [ ]:
#print(base_model.summary())
In [ ]:
res5c = base_model.layers[-1]
type(res5c)
In [ ]:
res5c.output_shape
res5c residual block, the resnet outputs a tensor of shape $W \times H \times 2048$. The regular ResNet head after the base model is as follows:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1000)(x)
x = Softmax()(x)
Here is the full definition of the model: https://github.com/keras-team/keras-applications/blob/master/keras_applications/resnet50.py
We build the following Custom Layer to apply a softmax only to the last dimension of a tensor:
In [ ]:
from tensorflow.keras import layers
# A custom layer in Keras must implement the four following methods:
class SoftmaxMap(layers.Layer):
# Init function
def __init__(self, axis=-1, **kwargs):
self.axis = axis
super(SoftmaxMap, self).__init__(**kwargs)
# There's no parameter, so we don't need this one
def build(self, input_shape):
pass
# This is the layer we're interested in:
# very similar to the regular softmax but note the additional
# that we accept x.shape == (batch_size, w, h, n_classes)
# which is not the case in Keras by default.
# Note also that we substract the logits by their maximum to
# make the softmax numerically stable.
def call(self, x, mask=None):
e = tf.exp(x - tf.math.reduce_max(x, axis=self.axis, keepdims=True))
s = tf.math.reduce_sum(e, axis=self.axis, keepdims=True)
return e / s
# The output shape is the same as the input shape
def get_output_shape_for(self, input_shape):
return input_shape
Let's check that we can use this layer to normalize the classes probabilities of some random spatial predictions:
In [ ]:
n_samples, w, h, n_classes = 10, 3, 4, 5
random_data = np.random.randn(n_samples, w, h, n_classes).astype("float32")
random_data.shape
Because those predictions are random, if we some accross the classes dimensions we get random values instead of class probabilities that would need to some to 1:
In [ ]:
random_data[0].sum(axis=-1)
Let's create a SoftmaxMap function from the layer and process our test data:
In [ ]:
softmaxMap = SoftmaxMap()
softmax_mapped_data = softmaxMap(random_data).numpy()
softmax_mapped_data.shape
All the values are now in the [0, 1] range:
In [ ]:
softmax_mapped_data[0]
The last dimension now approximately sum to one, we can therefore be used as class probabilities (or parameters for a multinouli distribution):
In [ ]:
softmax_mapped_data[0].sum(axis=-1)
Note that the highest activated channel for each spatial location is still the same before and after the softmax map. The ranking of the activations is preserved as softmax is a monotonic function (when considered element-wise):
In [ ]:
random_data[0].argmax(axis=-1)
In [ ]:
softmax_mapped_data[0].argmax(axis=-1)
base_model.layers to find which layer to remove
In [ ]:
from tensorflow.keras.layers import Convolution2D
from tensorflow.keras.models import Model
input = base_model.layers[0].input
# TODO: compute per-area class probabilites
output = input
fully_conv_ResNet = Model(inputs=input, outputs=output)
In [ ]:
# %load solutions/fully_conv.py
You can use the following random data to check that it's possible to run a forward pass on a random RGB image:
In [ ]:
prediction_maps = fully_conv_ResNet(np.random.randn(1, 200, 300, 3)).numpy()
prediction_maps.shape
How do you explain the resulting output shape?
The class probabilities should sum to one in each area of the output map:
In [ ]:
prediction_maps.sum(axis=-1)
In [ ]:
import h5py
with h5py.File('weights_dense.h5', 'r') as h5f:
w = h5f['w'][:]
b = h5f['b'][:]
In [ ]:
last_layer = fully_conv_ResNet.layers[-2]
print("Loaded weight shape:", w.shape)
print("Last conv layer weights shape:", last_layer.get_weights()[0].shape)
In [ ]:
# reshape the weights
w_reshaped = w.reshape((1, 1, 2048, 1000))
# set the conv layer weights
last_layer.set_weights([w_reshaped, b])
In [ ]:
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from skimage.io import imread
from skimage.transform import resize
def forward_pass_resize(img_path, img_size):
img_raw = imread(img_path)
print("Image shape before resizing: %s" % (img_raw.shape,))
img = resize(img_raw, img_size, mode='reflect', preserve_range=True)
img = preprocess_input(img[np.newaxis])
print("Image batch size shape before forward pass:", img.shape)
prediction_map = fully_conv_ResNet(img).numpy()
return prediction_map
In [ ]:
output = forward_pass_resize("dog.jpg", (800, 600))
print("prediction map shape", output.shape)
ImageNet uses an ontology of concepts, from which classes are derived. A synset corresponds to a node in the ontology.
For example all species of dogs are children of the synset n02084071 (Dog, domestic dog, Canis familiaris):
In [ ]:
# Helper file for importing synsets from imagenet
import imagenet_tool
synset = "n02084071" # synset corresponding to dogs
ids = imagenet_tool.synset_to_dfs_ids(synset)
print("All dog classes ids (%d):" % len(ids))
print(ids)
In [ ]:
for dog_id in ids[:10]:
print(imagenet_tool.id_to_words(dog_id))
print('...')
In [ ]:
def build_heatmap(prediction_map, synset):
class_ids = imagenet_tool.synset_to_dfs_ids(synset)
class_ids = np.array([id_ for id_ in class_ids if id_ is not None])
each_dog_proba_map = prediction_map[0, :, :, class_ids]
# this style of indexing a tensor by an other array has the following shape effect:
# (H, W, 1000) indexed by (118) ==> (118, H, W)
any_dog_proba_map = each_dog_proba_map.sum(axis=0)
print("size of heatmap: " + str(any_dog_proba_map.shape))
return any_dog_proba_map
In [ ]:
def display_img_and_heatmap(img_path, heatmap):
dog = imread(img_path)
plt.figure(figsize=(12, 8))
plt.subplot(1, 2, 1)
plt.imshow(dog)
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(heatmap, interpolation='nearest', cmap="viridis")
plt.axis('off')
Exercise
"dog.jpg", with the following sizes:(200, 320)(400, 640)(800, 1280)(1600, 2560) (optional, requires a lot of memory)You may plot a heatmap using the above function display_img_and_heatmap. You might also want to reuse forward_pass_resize to compute the class maps them-selves
In [ ]:
# dog synset
s = "n02084071"
# TODO
In [ ]:
# %load solutions/build_heatmaps.py
In [ ]:
from skimage.transform import resize
# TODO
In [ ]:
# %load solutions/geom_avg.py
Bonus
Experiment with Semantic segmentation. You may train on COCO dataset http://mscoco.org/dataset/#overview
To go further, consider open source implementation of models rather than building your own from scratch. For instance, FAIR's detection lib (in Caffe2) provides a lot of state of the art models. https://github.com/facebookresearch/Detectron
In [ ]: