In [40]:
import pickle
from gensim.models.word2vec import Vocab

In [41]:
vocab = pickle.load(open('./pydslm/myvocab.pkl'))
embeddings = pickle.load(open('./pydslm/myembeddings.pkl'))

In [42]:
def get_embedding_word(word, loc):
    index = vocab[word].index
    main_emb = embeddings["MAIN"][index]
    delta_emb = embeddings[loc][index]
    return main_emb + delta_emb

In [43]:
loc = ["us", "gb"]

In [44]:
us_emb = np.array([get_embedding_word(w, "us") for w in sorted(vocab.keys())])

In [45]:
gb_emb = np.array([get_embedding_word(w, "gb") for w in sorted(vocab.keys())])

In [46]:
words = sorted(vocab.keys())

In [47]:
print words


[u'and', u'gay', u'happy', u'lesbian', u'sad']

In [52]:
us_txt = words
gb_txt = words
fig, ax = plt.subplots()
scatter(list(us_emb[:,0]), list(us_emb[:,1]), c='b')
for i, txt in enumerate(us_txt):
    ax.annotate(txt, (us_emb[:,0][i],us_emb[:,1][i]))
    
scatter(list(gb_emb[:,0]), list(gb_emb[:,1]), c='r')
for i, txt in enumerate(gb_txt):
    ax.annotate(txt, (gb_emb[:,0][i],gb_emb[:,1][i]))



In [54]:
print embeddings["MAIN"]


[[-1.08998859  1.00217354]
 [-4.02917671 -3.84636998]
 [ 3.39260459 -3.315027  ]
 [ 0.98617351 -3.91857505]
 [ 0.55363989 -1.04641557]]

In [55]:
print embeddings["gb"]


[[-0.20267276 -0.02359975]
 [-1.89993203 -2.02385378]
 [ 0.          0.        ]
 [-5.77808523 -0.84161723]
 [ 0.4856236  -0.53701729]]

In [57]:
print embeddings["us"]


[[-0.89458281  0.904239  ]
 [-1.89917827 -2.02278471]
 [ 3.23652887 -3.54219031]
 [ 6.91423368 -2.96366215]
 [ 0.0899322  -0.48022962]]

In [ ]: