In [1]:
from gensim.models.word2vec import Word2Vec
In [2]:
ls
In [3]:
model = Word2Vec.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True)
In [4]:
model.most_similar(positive=['woman', 'king'], negative=['man'])
Out[4]:
In [9]:
model.most_similar(positive=['kitten', 'dog'], negative=['cat'])
Out[9]:
In [11]:
model.most_similar(positive=['biggest', 'small'], negative=['big'])
Out[11]:
In [12]:
model.most_similar(positive=['Greece', 'Oslo'], negative=['Athens'])
Out[12]:
In [14]:
model.most_similar(positive=['sister', 'grandson'], negative=['brother'])
Out[14]:
In [17]:
model.most_similar(positive=['Paris', 'Italy'], negative=['France'])
Out[17]:
In [18]:
model.most_similar(positive=['Paris', 'Japan'], negative=['France'])
Out[18]:
In [19]:
model.most_similar(positive=['Paris', 'Florida'], negative=['France'])
Out[19]:
In [20]:
model.most_similar(positive=['scientist', 'Messi'], negative=['Einstein'])
Out[20]:
In [21]:
model.most_similar(positive=['scientist', 'Mozart'], negative=['Einstein'])
Out[21]:
In [22]:
model.most_similar(positive=['scientist', 'Picasso'], negative=['Einstein'])
Out[22]:
In [23]:
model.most_similar(positive=['sushi', 'Germany'], negative=['Japan'])
Out[23]:
In [24]:
model.most_similar(positive=['sushi', 'France'], negative=['Japan'])
Out[24]:
In [25]:
model.most_similar(positive=['sushi', 'USA'], negative=['Japan'])
Out[25]:
In [29]:
model.most_similar(positive=['Beijing', 'Russia'], negative=['China'])
Out[29]:
In [30]:
model.most_similar(positive=['Beijing', 'Japan'], negative=['China'])
Out[30]:
In [32]:
model.most_similar(positive=['Beijing', 'Poland'], negative=['China'])
Out[32]:
In [33]:
model.most_similar(positive=['Beijing', 'Germany'], negative=['China'])
Out[33]:
In [34]:
model.most_similar(positive=['Beijing', 'France'], negative=['China'])
Out[34]:
In [35]:
model.most_similar(positive=['Beijing', 'Italy'], negative=['China'])
Out[35]:
In [36]:
model.most_similar(positive=['Beijing', 'Greece'], negative=['China'])
Out[36]:
In [37]:
model.most_similar(positive=['Beijing', 'Spain'], negative=['China'])
Out[37]:
In [44]:
model.most_similar(positive=['Steve_Ballmer', 'Apple'], negative=['Microsoft'])
Out[44]:
In [45]:
model.most_similar(positive=['Steve_Ballmer', 'Google'], negative=['Microsoft'])
Out[45]:
In [46]:
model.most_similar(positive=['Steve_Ballmer', 'IBM'], negative=['Microsoft'])
Out[46]:
In [47]:
model.most_similar(positive=['Steve_Ballmer', 'Amazon'], negative=['Microsoft'])
Out[47]:
In [48]:
model.most_similar(positive=['Czech', 'currency'])
Out[48]:
In [49]:
model.most_similar(positive=['Vietnam', 'capital'])
Out[49]:
In [50]:
model.most_similar(positive=['German', 'airlines'])
Out[50]:
In [63]:
model.most_similar(positive=['woman', 'emperor'], negative=['man'])
Out[63]:
In [65]:
model.most_similar(positive=['woman', 'brother'], negative=['man'])
Out[65]:
In [ ]:
# Athens Greece Oslo Norway
# Chicago Illinois Stockton California
# brother sister grandson granddaughter
# apparent apparently rapid rapidly
# possibly impossibly ethical unethical
# great greater tough tougher
# easy easiest lucky luckiest
# think thinking read reading
# walking walked swimming swam
# mouse mice dollar dollars
# France - Paris Italy: Rome Japan: Tokyo Florida: Tallahassee
# big - bigger small: larger cold: colder quick: quicker
# Miami - Florida Baltimore: Maryland Dallas: Texas Kona: Hawaii
# Einstein - scientist Messi: midfielder Mozart: violinist Picasso: painter
# Sarkozy - France Berlusconi: Italy Merkel: Germany Koizumi: Japan
# copper - Cu zinc: Zn gold: Au uranium: plutonium
# Berlusconi - Silvio Sarkozy: Nicolas Putin: Medvedev Obama: Barack
# Microsoft - Windows Google: Android IBM: Linux Apple: iPhone
# Microsoft - Ballmer Google: Yahoo IBM: McNealy Apple: Jobs
# Japan - sushi Germany: bratwurst France: tapas USA: pizza
In [ ]:
model.most_similar(positive=['biggest', 'small'], negative=['big'])
In [ ]:
model.most_similar(positive=['biggest', 'small'], negative=['big'])
In [ ]:
model.most_similar(positive=['biggest', 'small'], negative=['big'])
In [5]:
model.doesnt_match("breakfast cereal dinner lunch".split())
Out[5]:
In [6]:
model.similarity('woman', 'man')
Out[6]:
In [ ]: