In [1]:
from gensim.models import Word2Vec
import os

In [2]:
model_path = os.path.expanduser('~/cltk_data/user_data/word2vec_tlg_lemma.model')
model = Word2Vec.load(model_path)

In [3]:
#vocab = model.vocab
vocab = model.vocab

In [ ]:
vocab_len = len(vocab)
counter = 0
final_list = []
for x in vocab:
    counter += 1
    if counter % 1000 == 0:
        print(counter, '/', vocab_len)
    syn_list = []
    pairs = model.most_similar(x)
    for k,v in pairs:
        if v > 0.50:
            syn_list.append(k)

    if syn_list:
        line = x + '\t' + str(syn_list)
        final_list.append(line)
        
path = os.path.expanduser('~/cltk_data/user_data/greek_word2vec_most_sims.txt')
# empty file if exists
if os.path.exists(path):
    with open(path, 'w') as fo:
        fo.write('')

final_str = '\n'.join(final_list)

with open(path, 'a') as fo:            
    fo.write(final_str)

In [ ]: