notebook.community

Edit and run



In [8]:

    
import logging
from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
import os
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)



In [4]:

    
class InputSentences(object):
    def __init__(self, dirname):
        self.dirname = dirname
 
    def __iter__(self):
        for fname in os.listdir(self.dirname):
            for line in open(os.path.join(self.dirname, fname)):
                yield line.split()



In [9]:

    
# this is a very simple, use-case
sentences = [['Rome', 'Italy'], ['Beijing', 'China']]
# train word2vec on the two sentences
model = Word2Vec(sentences, min_count=1)



In [7]:

    
model.most_similar(positive=['Rome'], topn=1)









    Out[7]:





[('Beijing', 0.12166289240121841)]