In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import word2vec

In [3]:
word2vec.word2vec('/Users/danielfrg/Downloads/text8', '/Users/danielfrg/Downloads/text8.bin', size=100, verbose=True)


Starting training using file /Users/danielfrg/Downloads/text8
Vocab size: 71291
Words in train file: 16718843
Alpha: 0.000002  Progress: 100.03%  Words/thread/sec: 276.90k  

In [4]:
word2vec.word2vec('/Users/danielfrg/Downloads/text8', '/Users/danielfrg/Downloads/text8.txt', size=100, binary=0, verbose=True)


Starting training using file /Users/danielfrg/Downloads/text8
Vocab size: 71291
Words in train file: 16718843
Alpha: 0.000002  Progress: 100.03%  Words/thread/sec: 277.29k  

In [4]:


In [2]:
import word2vec

In [11]:
binary = word2vec.load('/Users/danielfrg/Downloads/text8.bin', kind='bin')

In [15]:
text = word2vec.load('/Users/danielfrg/Downloads/text8.txt', kind='txt')

In [16]:
binary.vocab


Out[16]:
array([u'</s>', u'the', u'of', ..., u'bredon', u'skirting', u'santamaria'], 
      dtype='<U78')

In [17]:
text.vocab


Out[17]:
array([u'</s>', u'the', u'of', ..., u'bredon', u'skirting', u'santamaria'], 
      dtype='<U78')

In [18]:
binary.cosine('dog')


Out[18]:
(array([ 2600, 11158, 13739,  4850,  9586,  8185,  2971,  9102,  4140, 11877]),
 array([ 0.83075334,  0.77584524,  0.77085914,  0.76748576,  0.76346954,
         0.74456394,  0.73442467,  0.73413262,  0.73061051,  0.71777623]))

In [19]:
text.cosine('dog')


Out[19]:
(array([ 2600, 11158,  9586, 13739,  4850,  8185,  9102,  4140,  2971,  8664]),
 array([ 0.81635393,  0.7741477 ,  0.7587773 ,  0.75698403,  0.75485644,
         0.73609652,  0.73227508,  0.72156542,  0.71674759,  0.71594169]))

Original


In [19]:
%%timeit -n1 -r1
word2vec.load('/Users/danielfrg/Downloads/text8.bin', kind='bin')


1 loops, best of 1: 2.99 s per loop

In [20]:
%%timeit -n1 -r1
word2vec.load('/Users/danielfrg/Downloads/text8.txt', kind='txt')


1 loops, best of 1: 12.3 s per loop

In [22]:
%%timeit -n1 -r1
word2vec.load('/Users/danielfrg/Downloads/text8.txt', kind='txt', secure=True)


1 loops, best of 1: 4.4 s per loop

New


In [21]:
%%timeit -n3 -r3
word2vec.load('/Users/danielfrg/Downloads/text8.txt', kind='txt')


3 loops, best of 3: 4.23 s per loop

In [23]:
%%timeit -n3 -r3
word2vec.load('/Users/danielfrg/Downloads/text8.txt', kind='txt')


3 loops, best of 3: 4.29 s per loop

In [ ]: