In [1]:
%matplotlib inline
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import scipy.io as sio
plt.rcParams['figure.figsize'] = (10.0, 8.0)
import os
import pickle
import nltk
from nltk.corpus import wordnet as wn
In [14]:
# training set
# dataType='train2014'
# usingSet='10000coco'
# validation set
dataType='val2014'
usingSet='5000coco'
#===================
dataDir='/media/haoran/DATA/Dataset/COCO/tools'
usingSetDir = '/media/haoran/Data/Dataset/VIPcoco/%s'%usingSet
InsFile='%s/annotations/instances_%s.json'%(dataDir,dataType)
CapFile='%s/annotations/captions_%s.json'%(dataDir,dataType)
Ins_coco=COCO(InsFile)
Cap_coco=COCO(CapFile)
In [4]:
# SALICON_filename = os.listdir(usingSetDir)
# SALICON_filename.sort()
# SALICON_id = [int(item[0:-4]) for item in SALICON_filename]
In [23]:
# SALICON_filename = os.listdir(usingSetDir)
# SALICON_filename.sort()
# SALICON_id = [int(item[0:-4]) for item in SALICON_filename]
In [24]:
# SALICON = {}
# SALICON['SALICON_filename'] = SALICON_filename
# SALICON['SALICON_id'] = SALICON_id
# pickle.dump(SALICON,open('data/5000coco.p','wb'))
In [3]:
SALICON = pickle.load(open('data/5000coco.p','rb'))
Ins_ID = pickle.load(open('data/Ins_ID_10k.p','rb'))
category = pickle.load(open('data/category.p','rb'))
cat_list = Ins_coco.cats#category list (official)
category_idx = pickle.load(open('data/cat_dict_idx.p','rb'))#eg., person -- 1
category_supercategory_idx = pickle.load(open('data/cat_dict_supercat.p','rb')) #eg., person--human
supercategory_idx = pickle.load(open('data/supercate_id.p','rb'))#eg., food--1
In [5]:
def findID(word, im_idd):
if word in category:
return category_idx[word]
else:
temp_idlist={}
for item in category_idx.keys():
for item1 in wn.synsets(item, wn.NOUN):
for word1 in wn.synsets(word, wn.NOUN):
dist = item1.wup_similarity(word1)
if item not in temp_idlist.keys():
temp_idlist[category_idx[item]] = dist
continue
if dist > temp_idlist[category_idx[item]]:
temp_idlist[category_idx[item]] = dist
temp_idlist = sorted(temp_idlist.iteritems(), key=lambda d: d[1], reverse = True)
temp_idlist = temp_idlist[0:2]
for n in temp_idlist:
if n[0] in Ins_ID[im_idd]:
return n[0]
return 0
In [8]:
print SALICON['SALICON_filename'][901]
print SALICON.keys()
print SALICON['SALICON_id'][0]
print SALICON['SALICON_filename'][0]
In [7]:
WORDMAT = sio.loadmat('data/word_mat_april10.mat')
WORDMAT = WORDMAT['word_mat']
In [8]:
WORDMAT_dup = WORDMAT[:,0]
In [26]:
COCO10k_nounlist = []
COCO10k_nounID = []
for im_id, captions_perim in enumerate(WORDMAT_dup):
noun_im = []
nounID_im = []
for caption in captions_perim:
noun_perst = []
nounid_perst = []
for noun in caption[0]:
word = (noun.item())[0]
word = wn.morphy(word, wn.NOUN)
if word is None:
continue
I_ID = findID(word, im_id)
if I_ID == 0:
continue
noun_perst.append(word)
nounid_perst.append(I_ID)
noun_im.append(noun_perst)
nounID_im.append(nounid_perst)
COCO10k_nounlist.append(noun_im)
COCO10k_nounID.append(nounID_im)
if im_id%100 == 0:
# break
print im_id
In [35]:
pickle.dump(COCO10k_nounlist,open('data/10000coco_nounlist.p','wb'))
pickle.dump(COCO10k_nounID,open('data/10000coco_nounID.p','wb'))
pickle.dump(Cardi_Noun,open('data/10000coco_Cardi.p','wb'))
pickle.dump(Seque_Noun,open('data/10000coco_Seque.p','wb'))
In [29]:
Cardi_Noun=[]
Seque_Noun=[]
for group in COCO10k_nounID:
imdict = {}
cardi=[]
for item in group:
if not item:
continue
for idx in item:
cardi.append(idx)
u_set = list(set(cardi))
n_obj = len(u_set)
for uitem in u_set:
num = cardi.count(uitem)
imdict[uitem] = num
imdict= sorted(imdict.iteritems(), key=lambda d:d[1], reverse = True)
Cardi_Noun.append(imdict)
seque={}
seq = [0]*n_obj
iid = 0
for iid, item in enumerate(u_set):
for imseq in group:
if not imseq or item not in imseq:
continue
wid = imseq.index(item)
if type(wid)==list:
for wwid in wid:
seq[iid]+=n_obj/(wwid+1)
else:
seq[iid]+=n_obj/(wid+1)
seque[item] = seq[iid]
seque= sorted(seque.iteritems(), key=lambda d:d[1], reverse = True)
Seque_Noun.append(seque)