notebook.community

Edit and run



In [1]:

    
from gensim.models import Word2Vec
import numpy as np
from sklearn.manifold import TSNE
import pandas as pd
from bokeh.charts import Scatter, show, output_file
from bokeh.models import LabelSet, ColumnDataSource



In [2]:

    
model = Word2Vec.load("300features_40minwords_10context_AnnouncementTitle")
#lower the dimention of each word vector
tsne=TSNE(random_state=0)
np.set_printoptions(suppress=True)



In [3]:

    
model.syn0.shape









    Out[3]:





(602, 100)



In [3]:

    
vec = np.empty((0,100), float)
dic= list(model.vocab.keys())
for i in dic:
    vec = np.vstack((vec, model[i]))
low_dim = tsne.fit_transform(vec)



In [ ]:

    
## create word graph
df = pd.DataFrame(low_dim, columns=['x','y'])
df['vocab'] = dic
source = ColumnDataSource(data=dict(x=df['x'],
                                    y=df['y'],
                                    vocab=dic))
p = Scatter(df, x='x', y='y', title = "vocab in hypterdimention")
labels = LabelSet(x='x', y='y', text='vocab', level='glyph',
              x_offset=5, y_offset=5, source=source, render_mode='canvas')
p.add_layout(labels)
# output_file("word2vec.html", title="word2vec")
show(p)