This file is used to train word2vec vector.
About word2vec
In [5]:
import logging
import os.path
import sys
import multiprocessing
#import gensim.models.word2vec
from gensim.models.word2vec import Word2Vec
from gensim.models.word2vec import LineSentence
In [6]:
if __name__ == '__main__':
program = os.path.basename(sys.argv[0])
logger = logging.getLogger(program)
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
logging.root.setLevel(level=logging.INFO)
logger.info("running %s" % ' '.join(sys.argv))
file_path = "./dummy/processed_reviews.txt"
model_path = "./dummy/model.model"
vector_path = "./dummy/vector.txt"
count = multiprocessing.cpu_count()
model = Word2Vec(LineSentence(file_path),size=200, window=10, min_count=5, workers=count)
model.save(model_path)
model.save_word2vec_format(vector_path, binary=False)
In [10]:
"""
Testing word2vec vector
"""
model = Word2Vec.load_word2vec_format('./dummy/vector.txt', binary=False)
model.most_similar('king')
Out[10]:
In [15]:
model.most_similar(positive=['pool', 'gym'] , negative=['swim'])
Out[15]:
In [17]:
model.most_similar(['spa','internet','pool','gym'])
Out[17]:
In [ ]: