In [8]:
import logging
from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
import os
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [4]:
class InputSentences(object):
    def __init__(self, dirname):
        self.dirname = dirname
 
    def __iter__(self):
        for fname in os.listdir(self.dirname):
            for line in open(os.path.join(self.dirname, fname)):
                yield line.split()

In [9]:
# this is a very simple, use-case
sentences = [['Rome', 'Italy'], ['Beijing', 'China']]
# train word2vec on the two sentences
model = Word2Vec(sentences, min_count=1)

In [7]:
model.most_similar(positive=['Rome'], topn=1)


Out[7]:
[('Beijing', 0.12166289240121841)]