In [40]:
import pickle
from gensim.models.word2vec import Vocab
In [41]:
vocab = pickle.load(open('./pydslm/myvocab.pkl'))
embeddings = pickle.load(open('./pydslm/myembeddings.pkl'))
In [42]:
def get_embedding_word(word, loc):
index = vocab[word].index
main_emb = embeddings["MAIN"][index]
delta_emb = embeddings[loc][index]
return main_emb + delta_emb
In [43]:
loc = ["us", "gb"]
In [44]:
us_emb = np.array([get_embedding_word(w, "us") for w in sorted(vocab.keys())])
In [45]:
gb_emb = np.array([get_embedding_word(w, "gb") for w in sorted(vocab.keys())])
In [46]:
words = sorted(vocab.keys())
In [47]:
print words
In [52]:
us_txt = words
gb_txt = words
fig, ax = plt.subplots()
scatter(list(us_emb[:,0]), list(us_emb[:,1]), c='b')
for i, txt in enumerate(us_txt):
ax.annotate(txt, (us_emb[:,0][i],us_emb[:,1][i]))
scatter(list(gb_emb[:,0]), list(gb_emb[:,1]), c='r')
for i, txt in enumerate(gb_txt):
ax.annotate(txt, (gb_emb[:,0][i],gb_emb[:,1][i]))
In [54]:
print embeddings["MAIN"]
In [55]:
print embeddings["gb"]
In [57]:
print embeddings["us"]
In [ ]: