notebook.community

Edit and run



In [40]:

    
import pickle
from gensim.models.word2vec import Vocab



In [41]:

    
vocab = pickle.load(open('./pydslm/myvocab.pkl'))
embeddings = pickle.load(open('./pydslm/myembeddings.pkl'))



In [42]:

    
def get_embedding_word(word, loc):
    index = vocab[word].index
    main_emb = embeddings["MAIN"][index]
    delta_emb = embeddings[loc][index]
    return main_emb + delta_emb



In [43]:

    
loc = ["us", "gb"]



In [44]:

    
us_emb = np.array([get_embedding_word(w, "us") for w in sorted(vocab.keys())])



In [45]:

    
gb_emb = np.array([get_embedding_word(w, "gb") for w in sorted(vocab.keys())])



In [46]:

    
words = sorted(vocab.keys())



In [47]:

    
print words









    



[u'and', u'gay', u'happy', u'lesbian', u'sad']



In [52]:

    
us_txt = words
gb_txt = words
fig, ax = plt.subplots()
scatter(list(us_emb[:,0]), list(us_emb[:,1]), c='b')
for i, txt in enumerate(us_txt):
    ax.annotate(txt, (us_emb[:,0][i],us_emb[:,1][i]))
    
scatter(list(gb_emb[:,0]), list(gb_emb[:,1]), c='r')
for i, txt in enumerate(gb_txt):
    ax.annotate(txt, (gb_emb[:,0][i],gb_emb[:,1][i]))



In [54]:

    
print embeddings["MAIN"]









    



[[-1.08998859  1.00217354]
 [-4.02917671 -3.84636998]
 [ 3.39260459 -3.315027  ]
 [ 0.98617351 -3.91857505]
 [ 0.55363989 -1.04641557]]



In [55]:

    
print embeddings["gb"]









    



[[-0.20267276 -0.02359975]
 [-1.89993203 -2.02385378]
 [ 0.          0.        ]
 [-5.77808523 -0.84161723]
 [ 0.4856236  -0.53701729]]



In [57]:

    
print embeddings["us"]









    



[[-0.89458281  0.904239  ]
 [-1.89917827 -2.02278471]
 [ 3.23652887 -3.54219031]
 [ 6.91423368 -2.96366215]
 [ 0.0899322  -0.48022962]]



In [ ]: