In [2]:
import word2vec
# https://github.com/danielfrg/word2vec
In [3]:
# Note -- you need to unzip text8.zip on the command-line before you start!
%time word2vec.word2phrase('text8', 'text8-phrases', verbose=False)
In [4]:
%time word2vec.word2vec('text8-phrases', 'text8.bin', size=100, verbose=False)
In [5]:
%time word2vec.word2clusters('text8', 'text8-clusters.txt', 100, verbose=False)
In [8]:
%time model = word2vec.load('text8.bin')
In [9]:
model.vectors.shape
Out[9]:
In [10]:
model['dog'].shape
Out[10]:
In [11]:
indexes, metrics = model.cosine('dog')
In [12]:
model.vocab[indexes]
Out[12]:
In [13]:
metrics
Out[13]:
In [14]:
indexes, metrics = model.analogy(pos=['king', 'woman'], neg=['man'], n=10)
In [15]:
model.vocab[indexes]
Out[15]:
In [19]:
clusters = word2vec.load_clusters('text8-clusters.txt')
In [20]:
clusters.get_words_on_cluster(90)
Out[20]:
In [ ]: