In [ ]:
from gensim.models import AuthorTopicModel
model = AuthorTopicModel.load('model.atmodel')
In [ ]:
model.show_topics(num_topics=100)
In [ ]:
aut_top = {}
for key, value in model.id2author.items():
aut_top[value] = model.get_author_topics(value)
In [ ]:
aut_top
In [ ]:
%%time
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=0)
smallest_author = 0 # Ignore authors with documents less than this.
authors = [model.author2id[a] for a in model.author2id.keys() if len(model.author2doc[a]) >= smallest_author]
_ = tsne.fit_transform(model.state.gamma[authors, :]) # Result stored in tsne.embedding_
In [ ]:
from bokeh.io import output_file
output_file('grafica.html')
In [ ]:
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource
x = tsne.embedding_[:, 0]
y = tsne.embedding_[:, 1]
author_names = [model.id2author[a] for a in authors]
scale = 0.01
author_sizes = [len(model.author2doc[a]) for a in author_names]
radii = [size * scale for size in author_sizes]
source = ColumnDataSource(
data=dict(
x=x,
y=y,
author_names=author_names,
author_sizes=author_sizes,
radii=radii,
)
)
hover = HoverTool(
tooltips=[
("author", "@author_names"),
("size", "@author_sizes"),
]
)
p = figure(tools=[hover, 'crosshair,pan,wheel_zoom,box_zoom,reset,save,lasso_select'])
p.scatter('x', 'y', radius='radii', source=source, fill_alpha=0.6, line_color=None)
show(p)
In [ ]: