notebook.community

Edit and run



In [1]:

    
from gensim.models import Word2Vec
import os



In [2]:

    
model_path = os.path.expanduser('~/cltk_data/user_data/word2vec_tlg_lemma.model')
model = Word2Vec.load(model_path)



In [3]:

    
#vocab = model.vocab
vocab = model.vocab



In [ ]:

    
vocab_len = len(vocab)
counter = 0
final_list = []
for x in vocab:
    counter += 1
    if counter % 1000 == 0:
        print(counter, '/', vocab_len)
    syn_list = []
    pairs = model.most_similar(x)
    for k,v in pairs:
        if v > 0.50:
            syn_list.append(k)

    if syn_list:
        line = x + '\t' + str(syn_list)
        final_list.append(line)
        
path = os.path.expanduser('~/cltk_data/user_data/greek_word2vec_most_sims.txt')
# empty file if exists
if os.path.exists(path):
    with open(path, 'w') as fo:
        fo.write('')

final_str = '\n'.join(final_list)

with open(path, 'a') as fo:            
    fo.write(final_str)



In [ ]: