In [2]:
%matplotlib inline

from pycocotools.coco import COCO
import os
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pickle
from nltk.corpus import wordnet as wn
plt.rcParams['figure.figsize'] = (10.0, 8.0)
from CaptionSaliency import CaptionSaliency as CS


/home/haoran/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-2-dd4da6e2f53c> in <module>()
      1 get_ipython().magic(u'matplotlib inline')
      2 
----> 3 from pycocotools.coco import COCO
      4 import os
      5 import numpy as np

ImportError: No module named pycocotools.coco

In [3]:
dataType='val2014'
usingSet='5000coco'
dataDir='H:/SG_code/Dataset/COCO/tools'   #<====coco path
savefileDir = 'data'

CapSal_train = CS(dataType,usingSet,dataDir,savefileDir)


loading annotations into memory...
0:00:08.189000
creating index...
index created!
loading annotations into memory...
0:00:00.646000
creating index...
index created!

In [4]:
CapSal_train.compute_distance()


begin to compute distance...
this may take a couple of hours...
progress will be printed after an interval of 1000 images
0
1000
2000
3000
4000
4:51:08.831000
saving data...!
caption saliency value computed...!

In [2]:
import scipy.io as sio 

a = [1,2,3,4]

sio.savemat('data/a.mat',{'a' : a})

test TFIDF


In [6]:
from sklearn.feature_extraction.text import TfidfTransformer

In [7]:
transformer = TfidfTransformer()

In [6]:
counts = [['men','kid','boy'],
...           ['men', 'boy'],
...           ['kid', 'men', 'kid','kid'],
...           ['woman', 'boy', 'girl'],
...           ['baby', 'men', 'woman','boy'],
...           ['kid']]

In [15]:
# counts2 = [[1,1,1],[1,0,1],[1,3,0],[0,0,1],[1,0,1],[0,1,0]]
counts2 = [[4,1],[3,1]]
tfidf = transformer.fit_transform(counts2)

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [9]:
vectorizer = TfidfVectorizer(min_df=1)

In [10]:
vectorizer.fit_transform(counts)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-10-b653fc3f6ee2> in <module>()
----> 1 vectorizer.fit_transform(counts)

F:\Anaconda2\lib\site-packages\sklearn\feature_extraction\text.pyc in fit_transform(self, raw_documents, y)
   1303             Tf-idf-weighted document-term matrix.
   1304         """
-> 1305         X = super(TfidfVectorizer, self).fit_transform(raw_documents)
   1306         self._tfidf.fit(X)
   1307         # X is already a transformed view of raw_documents so

F:\Anaconda2\lib\site-packages\sklearn\feature_extraction\text.pyc in fit_transform(self, raw_documents, y)
    815 
    816         vocabulary, X = self._count_vocab(raw_documents,
--> 817                                           self.fixed_vocabulary_)
    818 
    819         if self.binary:

F:\Anaconda2\lib\site-packages\sklearn\feature_extraction\text.pyc in _count_vocab(self, raw_documents, fixed_vocab)
    750         indptr.append(0)
    751         for doc in raw_documents:
--> 752             for feature in analyze(doc):
    753                 try:
    754                     j_indices.append(vocabulary[feature])

F:\Anaconda2\lib\site-packages\sklearn\feature_extraction\text.pyc in <lambda>(doc)
    236 
    237             return lambda doc: self._word_ngrams(
--> 238                 tokenize(preprocess(self.decode(doc))), stop_words)
    239 
    240         else:

F:\Anaconda2\lib\site-packages\sklearn\feature_extraction\text.pyc in <lambda>(x)
    202 
    203         if self.lowercase:
--> 204             return lambda x: strip_accents(x.lower())
    205         else:
    206             return strip_accents

AttributeError: 'list' object has no attribute 'lower'

In [1]:
import nltk
nltk.download()


showing info http://www.nltk.org/nltk_data/
Out[1]:
True

In [ ]: