In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import word2vec

In [3]:
import numpy as np

In [4]:
model = word2vec.load('/Users/danielfrg/Downloads/text8.bin')

Cosine


In [5]:
%%timeit
indexes, metrics = model.cosine('word', n=10)


10 loops, best of 3: 22.9 ms per loop

In [6]:
%%timeit
indexes, metrics = model.cosine('socks', n=10)
model.generate_response(indexes, metrics)


10 loops, best of 3: 22.8 ms per loop

In [7]:
%%timeit
indexes, metrics = model.cosine('word', n=5000)


10 loops, best of 3: 26.2 ms per loop

In [8]:
%%timeit
indexes, metrics = model.cosine('word', n=5000)
model.generate_response(indexes, metrics)


10 loops, best of 3: 26.4 ms per loop

Analogy


In [9]:
%%timeit
indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=10)


10 loops, best of 3: 27.5 ms per loop

In [10]:
%%timeit
indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=10)
model.generate_response(indexes, metrics)


10 loops, best of 3: 31.9 ms per loop

In [11]:
%%timeit
indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=5000)


10 loops, best of 3: 28.6 ms per loop

In [12]:
%%timeit
indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=5000)
model.generate_response(indexes, metrics)


10 loops, best of 3: 29.7 ms per loop

In [13]:
%%timeit
indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=5000)
model.generate_response(indexes, metrics).tolist()


10 loops, best of 3: 26.7 ms per loop

Cluster


In [14]:
clusters = word2vec.load_clusters('/Users/danielfrg/Downloads/text8-clusters.txt')

In [15]:
model.clusters = clusters

In [16]:
%%timeit
indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=10)
model.generate_response(indexes, metrics)


10 loops, best of 3: 26.3 ms per loop

In [17]:
%%timeit
indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=5000)
model.generate_response(indexes, metrics)


10 loops, best of 3: 28.5 ms per loop

In [18]:
%%timeit
indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=5000)
model.generate_response(indexes, metrics).tolist()


10 loops, best of 3: 28.7 ms per loop

In [ ]: