python build_label_vec.py ilsvrc12_test1k
python build_label_vec.py ilsvrc12_test1k_2hop
Image2Vec
), label embedding (Synset2Vec
) and zero-shot image tagging (ZeroShotTagger
).
In [1]:
import sys
sys.path.append('..')
from synset2vec import Synset2Vec
from im2vec import Image2Vec
from tagger import ZeroshotTagger
i2v = Image2Vec()
s2v = Synset2Vec()
tagger = ZeroshotTagger()
Load the label set $Y_0$:
In [2]:
labels = map(str.strip, open('../data/synsets_ilsvrc12_test1k.txt').readlines())
In [3]:
from PIL import Image
import os
import numpy as np
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable
In [4]:
#model = models.alexnet(pretrained=True)
model = models.resnet18(pretrained=True)
#model = models.resnet50(pretrained=True)
model.eval() # set it to the test mode
Out[4]:
In [5]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
])
In [6]:
img_fname = 'lemon_shark.jpg'
img_pil = Image.open(img_fname).convert('RGB')
img_pil
Out[6]:
Now, predict $Y_0$ using the pretrained model.
In [7]:
img_tensor = preprocess(img_pil)
img_tensor.unsqueeze_(0)
img_var = Variable(img_tensor)
result = model(img_var)
softmax_layer = nn.Softmax()
probs = softmax_layer(result)
probs = probs.data.cpu().numpy()
probs = probs[0]
sorted_inds = [i[0] for i in sorted(enumerate(-probs), key=lambda x:x[1])]
for i in range(5):
index = sorted_inds[i]
print('Probability %0.2f%% => %s [%s]' % (probs[index] * 100, labels[index], s2v.explain(labels[index])))
Predict $Y_1$ using hierse
In [8]:
img_vec = i2v.embedding(probs)
res = tagger.predict(img_vec, topk=5)
print ([(label, s2v.explain(label), score) for (label,score) in res])
for i in range(5):
label,score = res[i]
print('Probability %0.2f%% => %s [%s]' % (score * 100, label, s2v.explain(label)))
The following code shows how to perform zero-shot image tagging for the sample set imagenet2hop-random2k
. First, download the image data:
cd $HOME/VisualSearch
wget http://lixirong.net/data/sigir2015/imagenet2hop-random2k-images.tar
tar xf imagenet2hop-random2k-images.tar
Read image-ids of imagenet2hop-random2k
:
In [9]:
rootpath = os.path.join(os.environ['HOME'], 'VisualSearch')
collection = 'imagenet2hop-random2k'
imset_fname = os.path.join(rootpath, collection, 'ImageSets', '%s.txt' % collection)
imset = map(str.strip, open(imset_fname).readlines())
print ('nr of test images: %d' % len(imset))
In [12]:
def predict_y0(model, img_fname):
img_pil = Image.open(img_fname).convert('RGB')
img_tensor = preprocess(img_pil)
img_tensor.unsqueeze_(0)
img_var = Variable(img_tensor)
result = model(img_var)
softmax_layer = nn.Softmax()
probs = softmax_layer(result)
probs = probs.data.cpu().numpy()
probs = probs[0]
return probs
import random
random.seed(42)
random.shuffle(imset)
Ns = [1, 2, 5, 10]
hitN = [0] * len(Ns)
nr_metrics = len(Ns)
for progress,imageid in enumerate(imset):
true_label = imageid.split('_',1)[0]
img_fname = os.path.join(rootpath, collection, 'ImageData', '%s.jpg' % imageid)
score_y0 = predict_y0(model, img_fname)
img_vec = i2v.embedding(score_y0)
res = tagger.predict(img_vec, topk=10)
for i in range(nr_metrics):
top = Ns[i]
pred_set = set([x[0] for x in res[:top]])
hitN[i] += (true_label in pred_set)
if (progress+1)%100 == 0:
print (progress+1), 'images tagged ->', ' '.join(['%.1f' % (100.0*float(x)/(progress+1)) for x in hitN])
hitN = [float(x)/len(imset) for x in hitN]
print ' '.join(['hit%d' % x for x in Ns])
print ' '.join(['%.1f' % (x*100) for x in hitN])