Download GloVe pre-trained embedding glove.6B.zip from: https://nlp.stanford.edu/projects/glove/
In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
In [4]:
# load the whole embedding into memory
embeddings = dict()
f = open('glove.6B.200d.txt')
for line in f:
values = line.split()
word = values[0]
embedding = np.array(values[1:], dtype='float32')
embeddings[word] = embedding
f.close()
print('Loaded %s word vectors.' % len(embeddings))
In [21]:
x = embeddings['was']
# print(x)
In [22]:
y = embeddings['is']
# print(y)
In [23]:
z = embeddings['are']
# print(z)
In [24]:
x = np.subtract(x, y)
x = np.add(x, z)
predict = None
max_similarity = 0
for word, embedding in embeddings.items():
y = np.array(embedding)
similarity = np.dot(x, y)
if similarity > max_similarity:
predict = word
print("Similarity: ", similarity, "Prediction: ", word)
max_similarity = similarity
In [ ]: