In [384]:
figure(figsize=(15,15))
plt.xkcd()
x = np.arange(10)
yys = [i+x+(i*x)**2 for i in range(optimal_n_clusters)]
colors = cm.nipy_spectral(np.linspace(0, 1, optimal_n_clusters))
labels = ['cluster{0}'.format(i) for i in range(optimal_n_clusters)]
for idx in range(0,optimal_n_clusters):
plt.scatter(xs[numpy.where(k_fit==idx)], ys[numpy.where(k_fit==idx)], s=100, label=str(idx), c=colors[idx])
# annotate the first dot in the list
plt.annotate('{0}'.format(idx), (xs[numpy.where(k_fit==idx)][0], ys[numpy.where(k_fit==idx)][0]))
plt.legend()
plt.title('Topics clustered with cluster number = 24')
Out[384]:
In [383]:
figure(figsize=(15,15))
plt.xkcd()
# get_run_name(topic_run, 1)
x = np.arange(10)
yys = [i+x+(i*x)**2 for i in range(optimal_n_clusters)]
colors = cm.nipy_spectral(np.linspace(0, 1, optimal_n_clusters))
labels = ['cluster{0}'.format(i) for i in range(optimal_n_clusters)]
for run_number, id_list in topic_run.iteritems():
plt.scatter(xs[id_list], ys[id_list], s=100, label=run_number, c=colors_run[topic_run.keys().index(run_number)])
# plt.annotate('{0}'.format(get_run_name(topic_run, idx)), (xs[numpy.where(k_fit==idx)][0], ys[numpy.where(k_fit==idx)][0]))
plt.legend()
plt.title('Clusters by run')
Out[383]:
In [421]:
from wordcloud import WordCloud
plt.xkcd()
figure(figsize=(20,50))
for idx,topic in enumerate(topic_weights):
wc = WordCloud(background_color="white")
ww = [(word,weight) for word,weight in topic.iteritems()]
img = wc.generate_from_frequencies(ww)
subplot(len(topic_weights),2,2*idx+1)
imshow(img)
axis('off')
# subplot(nTopics,2,2*idx+2)
# plot(topicsByAge[:,idx])
# axis([10, 100, 0, 1.0])
# title('Topic #%2d'%(idx))