In [1]:
%matplotlib inline
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import scipy.io as sio
plt.rcParams['figure.figsize'] = (10.0, 8.0)
import os
import pickle
import nltk
#from nltk.corpus import wordnet as wn
In [14]:
# training set
# dataType='train2014'
# usingSet='10000coco'
# validation set
dataType='val2014'
usingSet='5000coco'
#===================
dataDir='H:/SG_code/Dataset/COCO/tools'
usingSetDir = 'H:/SG_code/VIPcoco/%s'%usingSet
InsFile='%s/annotations/instances_%s.json'%(dataDir,dataType)
CapFile='%s/annotations/captions_%s.json'%(dataDir,dataType)
Ins_coco=COCO(InsFile)
Cap_coco=COCO(CapFile)
In [21]:
#for test
SALICON_filename = os.listdir(usingSetDir)
SALICON_filename.sort()
SALICON_id = [int(item[0:-4]) for item in SALICON_filename]
In [11]:
#for train
SALICON_filename = os.listdir(usingSetDir)
SALICON_filename.sort()
SALICON_id = [int(item[15:-4]) for item in SALICON_filename]
In [23]:
# SALICON_filename = os.listdir(usingSetDir)
# SALICON_filename.sort()
# SALICON_id = [int(item[0:-4]) for item in SALICON_filename]
In [12]:
SALICON = {}
SALICON['SALICON_filename'] = SALICON_filename
SALICON['SALICON_id'] = SALICON_id
# pickle.dump(SALICON,open('data/5000coco.p','wb'))
In [19]:
pickle.dump(SALICON,open('data/10000coco.p','wb'))
In [8]:
cat_list = Ins_coco.cats
In [3]:
cat_data=sio.loadmat('data/category.mat')
cat_data = cat_data['category']
category = []
for i in range(0, cat_data.size):
cat_item = cat_data[i].item()
cat_item = np.array2string(cat_item)
cat_item= cat_item[3:-2]
category.append(cat_item)
category[12] = 'sign'
category[27] = 'umbrella'
category[9] = 'light'
category[13] = 'meter'
category[38] = 'bat'
category[39] = 'glove'
category[42] = 'racket'
category[45] = 'glass'
category[63] = 'plant'
category[66] = 'table'
category[76] = 'phone'
category[87] = 'teddy'
category[88] = 'drier'
In [9]:
cat_dict_idx = {}
for idx in cat_list.keys():
cat_dict_idx[category[idx-1]] = idx
cat_dict_supercat = {}
for idx in cat_list.keys():
cat_dict_supercat[category[idx-1]] = cat_list[idx]['supercategory']
supercate_id = {}
spcat = []
for idx in cat_list.keys():
spcat.append(cat_list[idx]['supercategory'])
spcat = list(set(spcat))
for idx,item in enumerate(spcat):
supercate_id[item] = idx
# category[9] = 'light'
# category[13] = 'meter'
# category[38] = 'bat'
# category[39] = 'glove'
# category[42] = 'racket'
# category[45] = 'glass'
# category[63] = 'plant'
# category[66] = 'table'
# category[76] = 'phone'
# category[87] = 'teddy'
# category[88] = 'drier'
pickle.dump(category,open('data/category.p','wb'))
pickle.dump(cat_dict_idx,open('data/cat_dict_idx.p','wb'))
pickle.dump(cat_dict_supercat,open('data/cat_dict_supercat.p','wb'))
pickle.dump(supercate_id,open('data/supercate_id.p','wb'))
In [13]:
AnID_list = []
for im_id in range(10000):
annIds = Ins_coco.getAnnIds(SALICON['SALICON_id'][im_id])
anns = Ins_coco.loadAnns(annIds)
AnID_list_item = []
for item in anns:
AnID_list_item.append(item['category_id'])
AnID_list.append(list(set(AnID_list_item)))
pickle.dump(AnID_list,open('data/Ins_ID_10000coco.p','wb'))
In [3]:
SALICON = pickle.load(open('data/5000coco.p','rb'))
Ins_ID = pickle.load(open('data/Ins_ID_10k.p','rb'))
category = pickle.load(open('data/category.p','rb'))
cat_list = Ins_coco.cats#category list (official)
category_idx = pickle.load(open('data/cat_dict_idx.p','rb'))#eg., person -- 1
category_supercategory_idx = pickle.load(open('data/cat_dict_supercat.p','rb')) #eg., person--human
supercategory_idx = pickle.load(open('data/supercate_id.p','rb'))#eg., food--1
In [ ]:
In [5]:
def findID(word, im_idd):
if word in category:
return category_idx[word]
else:
temp_idlist={}
for item in category_idx.keys():
for item1 in wn.synsets(item, wn.NOUN):
for word1 in wn.synsets(word, wn.NOUN):
dist = item1.wup_similarity(word1)
if item not in temp_idlist.keys():
temp_idlist[category_idx[item]] = dist
continue
if dist > temp_idlist[category_idx[item]]:
temp_idlist[category_idx[item]] = dist
temp_idlist = sorted(temp_idlist.iteritems(), key=lambda d: d[1], reverse = True)
temp_idlist = temp_idlist[0:2]
for n in temp_idlist:
if n[0] in Ins_ID[im_idd]:
return n[0]
return 0
In [8]:
print SALICON['SALICON_filename'][901]
print SALICON.keys()
print SALICON['SALICON_id'][0]
print SALICON['SALICON_filename'][0]
In [7]:
WORDMAT = sio.loadmat('data/word_mat_april10.mat')
WORDMAT = WORDMAT['word_mat']
In [8]:
WORDMAT_dup = WORDMAT[:,0]
In [26]:
COCO10k_nounlist = []
COCO10k_nounID = []
for im_id, captions_perim in enumerate(WORDMAT_dup):
noun_im = []
nounID_im = []
for caption in captions_perim:
noun_perst = []
nounid_perst = []
for noun in caption[0]:
word = (noun.item())[0]
word = wn.morphy(word, wn.NOUN)
if word is None:
continue
I_ID = findID(word, im_id)
if I_ID == 0:
continue
noun_perst.append(word)
nounid_perst.append(I_ID)
noun_im.append(noun_perst)
nounID_im.append(nounid_perst)
COCO10k_nounlist.append(noun_im)
COCO10k_nounID.append(nounID_im)
if im_id%100 == 0:
# break
print im_id
In [35]:
pickle.dump(COCO10k_nounlist,open('data/10000coco_nounlist.p','wb'))
pickle.dump(COCO10k_nounID,open('data/10000coco_nounID.p','wb'))
pickle.dump(Cardi_Noun,open('data/10000coco_Cardi.p','wb'))
pickle.dump(Seque_Noun,open('data/10000coco_Seque.p','wb'))
In [29]:
Cardi_Noun=[]
Seque_Noun=[]
for group in COCO10k_nounID:
imdict = {}
cardi=[]
for item in group:
if not item:
continue
for idx in item:
cardi.append(idx)
u_set = list(set(cardi))
n_obj = len(u_set)
for uitem in u_set:
num = cardi.count(uitem)
imdict[uitem] = num
imdict= sorted(imdict.iteritems(), key=lambda d:d[1], reverse = True)
Cardi_Noun.append(imdict)
seque={}
seq = [0]*n_obj
iid = 0
for iid, item in enumerate(u_set):
for imseq in group:
if not imseq or item not in imseq:
continue
wid = imseq.index(item)
if type(wid)==list:
for wwid in wid:
seq[iid]+=n_obj/(wwid+1)
else:
seq[iid]+=n_obj/(wid+1)
seque[item] = seq[iid]
seque= sorted(seque.iteritems(), key=lambda d:d[1], reverse = True)
Seque_Noun.append(seque)