An example showing how to do tag assignment by the TagVote method, using train10k as training data and mirflickr08 as test data.
Download vggnet features
cd $HOME/VisualSearch
wget http://lixirong.net/data/csur2016/train10k-vggnet16-fc7relu.tar.gz
wget http://lixirong.net/data/csur2016/mirflickr08-vggnet16-fc7relu.tar.gz
Download tag data of train10k
wget http://lixirong.net/data/csur2016/train10k-tag.tar.gz
Download annotation files of mirflickr08
wget http://lixirong.net/data/csur2016/mirflickr08-anno.tar.gz
In [1]:
from instance_based.tagvote import TagVoteTagger
trainCollection = 'train10k'
annotationName = 'concepts130.txt'
feature = 'vgg-verydeep-16-fc7relu'
tagger = TagVoteTagger(collection=trainCollection, annotationName=annotationName, feature=feature, distance='cosine')
In [2]:
from basic.constant import ROOT_PATH
from util.simpleknn.bigfile import BigFile
import os
rootpath = ROOT_PATH
testCollection = 'mirflickr08'
feat_dir = os.path.join(rootpath, testCollection, 'FeatureData', feature)
feat_file = BigFile(feat_dir)
In [3]:
# load image ids of mirflickr08
from basic.util import readImageSet
testimset = readImageSet(testCollection)
# load a subset of 200 images for test
import random
testimset = random.sample(testimset, 200)
renamed, vectors = feat_file.read(testimset)
In [4]:
import time
s_time = time.time()
results = [tagger.predict(vec) for vec in vectors]
timespan = time.time() - s_time
print ('processing %d images took %g seconds' % (len(renamed), timespan))
In [5]:
from basic.annotationtable import readConcepts, readAnnotationsFrom
testAnnotationName = 'conceptsmir14.txt'
concepts = readConcepts(testCollection, testAnnotationName)
nr_of_concepts = len(concepts)
label2imset = {}
im2labelset = {}
for i,concept in enumerate(concepts):
names,labels = readAnnotationsFrom(testCollection, testAnnotationName, concept)
pos_set = [x[0] for x in zip(names,labels) if x[1]>0]
print ('%s has %d positives' % (concept, len(pos_set)))
for im in pos_set:
label2imset.setdefault(concept, set()).add(im)
im2labelset.setdefault(im, set()).add(concept)
In [6]:
# sort images to compute AP scores per concept
ranklists = {}
for _id, res in zip(renamed,results):
for tag,score in res:
ranklists.setdefault(tag, []).append((_id, score))
from basic.metric import getScorer
scorer = getScorer('AP')
mean_ap = 0.0
for i,concept in enumerate(concepts):
pos_set = label2imset[concept]
ranklist = ranklists[concept]
ranklist.sort(key=lambda v:(v[1], v[0]), reverse=True) # sort images by scores in descending order
sorted_labels = [2*int(x[0] in pos_set)-1 for x in ranklist]
perf = scorer.score(sorted_labels)
print ('%s %.3f' % (concept, perf))
mean_ap += perf
mean_ap /= len(concepts)
print ('meanAP %.3f' % mean_ap)
In [7]:
# compute iAP per image
miap = 0.0
for _id, res in zip(renamed,results):
pos_set = im2labelset.get(_id, set()) # some images might be negatives to all the 14 concepts
ranklist = [x for x in res if x[0] in label2imset] # evaluate only concepts with ground truth
sorted_labels = [2*int(x[0] in pos_set)-1 for x in ranklist]
perf = scorer.score(sorted_labels)
miap += perf
miap /= len(renamed)
print ('miap %.3f' % miap)