GloVe embedding

Download GloVe pre-trained embedding glove.6B.zip from: https://nlp.stanford.edu/projects/glove/


In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np

In [4]:
# load the whole embedding into memory
embeddings = dict()
f = open('glove.6B.200d.txt')
for line in f:
    values = line.split()
    word = values[0]
    embedding = np.array(values[1:], dtype='float32')
    embeddings[word] = embedding
f.close()
print('Loaded %s word vectors.' % len(embeddings))


Loaded 400000 word vectors.

In [21]:
x = embeddings['was']
# print(x)

In [22]:
y = embeddings['is']
# print(y)

In [23]:
z = embeddings['are']
# print(z)

In [24]:
x = np.subtract(x, y)
x = np.add(x, z)
predict = None
max_similarity = 0
for word, embedding in embeddings.items():
    y = np.array(embedding)
    similarity = np.dot(x, y)
    if similarity > max_similarity:
        predict = word
        print("Similarity: ", similarity, "Prediction: ", word)
        max_similarity = similarity


Similarity:  20.964674 Prediction:  the
Similarity:  21.173893 Prediction:  to
Similarity:  23.63844 Prediction:  and
Similarity:  29.421112 Prediction:  was
Similarity:  29.430035 Prediction:  be
Similarity:  30.773361 Prediction:  are
Similarity:  33.251774 Prediction:  have
Similarity:  44.052734 Prediction:  were

In [ ]: