In [1]:
from gensim.models import Word2Vec
import os
In [2]:
model_path = os.path.expanduser('~/cltk_data/user_data/word2vec_tlg_lemma.model')
model = Word2Vec.load(model_path)
In [3]:
#vocab = model.vocab
vocab = model.vocab
In [ ]:
vocab_len = len(vocab)
counter = 0
final_list = []
for x in vocab:
counter += 1
if counter % 1000 == 0:
print(counter, '/', vocab_len)
syn_list = []
pairs = model.most_similar(x)
for k,v in pairs:
if v > 0.50:
syn_list.append(k)
if syn_list:
line = x + '\t' + str(syn_list)
final_list.append(line)
path = os.path.expanduser('~/cltk_data/user_data/greek_word2vec_most_sims.txt')
# empty file if exists
if os.path.exists(path):
with open(path, 'w') as fo:
fo.write('')
final_str = '\n'.join(final_list)
with open(path, 'a') as fo:
fo.write(final_str)
In [ ]: