In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
First let's make sure that we have access to a subset of image files from the PASCAL VOC dataset:
In [2]:
import os.path as op
from zipfile import ZipFile
if not op.exists("images_resize"):
print('Extracting image files...')
zf = ZipFile('images_pascalVOC.zip')
zf.extractall('.')
In [3]:
from keras.applications.resnet50 import ResNet50
from keras.models import Model
from keras.preprocessing import image
model = ResNet50(include_top=True, weights='imagenet')
In [4]:
# print(model.summary())
Exercise
decode_predictions
from KerasNotes:
preprocess_input
for preprocessing the image. "images_resize/000007.jpg"
In [5]:
from scipy.misc import imread, imresize
from keras.applications.imagenet_utils import preprocess_input
from keras.applications.imagenet_utils import decode_predictions
path = "images_resize/000007.jpg"
# TODO
In [6]:
# %load solutions/predict_image.py
from scipy.misc import imread, imresize
from keras.applications.imagenet_utils import preprocess_input
from keras.applications.imagenet_utils import decode_predictions
path = "images_resize/000007.jpg"
img = imread(path)
plt.imshow(img)
img = imresize(img, (224,224)).astype("float32")
# add a dimension for a "batch" of 1 image
img_batch = preprocess_input(img[np.newaxis])
predictions = model.predict(img_batch)
decoded_predictions= decode_predictions(predictions)
for s, name, score in decoded_predictions[0]:
print(name, score)
In [7]:
input = model.layers[0].input
output = model.layers[-2].output
base_model = Model(input, output)
In [8]:
representation = base_model.predict(img_batch)
print("shape of representation:", representation.shape)
print("proportion of zero valued axis: %0.3f"
% np.mean(representation[0]==0))
Computing representations of all images can be time consuming. This is usually made by large batches on a GPU for massive performance gains.
For the remaining part, we will use pre-computed representations saved in h5 format
For those interested, this is done using the process_images.py
script
In [9]:
import os
paths = ["images_resize/" + path
for path in sorted(os.listdir("images_resize/"))]
In [10]:
import h5py
# Load pre-calculated representations
h5f = h5py.File('img_emb.h5','r')
out_tensors = h5f['img_emb'][:]
h5f.close()
The representations are dense.
Exercise
In [11]:
# %load solutions/representations.py
# Proportion of zeros in a representation
print("proportion of zeros", np.mean(out_tensors[0]==0.0))
# For all representations:
plt.hist(np.mean(out_tensors==0.0, axis=1));
# These 0 values come from the different reLU units.
# They propagate through the layers, and there can be many.
# If a network has too many of them, a lot of computation
# / memory is wasted.
In [12]:
from sklearn.manifold import TSNE
img_emb_tsne = TSNE(perplexity=30).fit_transform(out_tensors)
In [13]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 10))
plt.scatter(img_emb_tsne[:, 0], img_emb_tsne[:, 1]);
plt.xticks(()); plt.yticks(());
plt.show()
Let's add thumnails of the original images at their TSNE locations:
In [14]:
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from scipy.misc import imread, imresize
def imscatter(x, y, paths, ax=None, zoom=1, linewidth=0):
if ax is None:
ax = plt.gca()
x, y = np.atleast_1d(x, y)
artists = []
for x0, y0, p in zip(x, y, paths):
try:
im = imread(p)
except:
print(p)
continue
im = imresize(im,(224,224))
im = OffsetImage(im, zoom=zoom)
ab = AnnotationBbox(im, (x0, y0), xycoords='data',
frameon=True, pad=0.1,
bboxprops=dict(edgecolor='red',
linewidth=linewidth))
artists.append(ax.add_artist(ab))
ax.update_datalim(np.column_stack([x, y]))
ax.autoscale()
return artists
In [15]:
fig, ax = plt.subplots(figsize=(50, 50))
imscatter(img_emb_tsne[:, 0], img_emb_tsne[:, 1], paths, zoom=0.5, ax=ax)
plt.savefig('tsne.png')
In [16]:
def display(img):
plt.figure()
img = imread(img)
plt.imshow(img)
In [17]:
idx = 57
def most_similar(idx, top_n=5):
dists = np.linalg.norm(out_tensors - out_tensors[idx], axis = 1)
sorted_dists = np.argsort(dists)
return sorted_dists[:top_n]
sim = most_similar(idx)
[display(paths[s]) for s in sim];
Using these representations, it may be possible to build a nearest neighbor classifier. However, the representations are learnt on ImageNet, which are centered images, when we input images from PascalVOC, more plausible inputs of a real world system.
The next section explores this possibility by computing the histogram of similarities between one image and the others.
In [18]:
out_norms = np.linalg.norm(out_tensors, axis=1, keepdims=True)
normed_out_tensors = out_tensors / out_norms
In [19]:
item_idx = 208
dists_to_item = np.linalg.norm(out_tensors - out_tensors[item_idx],
axis=1)
cos_to_item = np.dot(normed_out_tensors, normed_out_tensors[item_idx])
plt.hist(cos_to_item)
display(paths[item_idx])
Unfortunately there is no clear separation of class boundaries visible in the histogram of similarities alone. We need some supervision to be able to classify images.
With a labeled dataset, even with very little labels per class, one would be able to:
These approximate classifiers are useful in practice.
See the cat vs dog
home assignment with GPU for another example of this approach.
In [21]:
items = np.where(cos_to_item > 0.44)
print(items)
[display(paths[s]) for s, _ in zip(items[0], range(10))];
In [ ]: