notebook.community

Edit and run



In [384]:

    
figure(figsize=(15,15))
plt.xkcd()
x = np.arange(10)
yys = [i+x+(i*x)**2 for i in range(optimal_n_clusters)]
colors = cm.nipy_spectral(np.linspace(0, 1, optimal_n_clusters))
labels = ['cluster{0}'.format(i) for i in range(optimal_n_clusters)]
for idx in range(0,optimal_n_clusters):
    plt.scatter(xs[numpy.where(k_fit==idx)], ys[numpy.where(k_fit==idx)], s=100, label=str(idx), c=colors[idx])
    # annotate the first dot in the list
    plt.annotate('{0}'.format(idx), (xs[numpy.where(k_fit==idx)][0], ys[numpy.where(k_fit==idx)][0]))
plt.legend()
plt.title('Topics clustered with cluster number = 24')









    Out[384]:





<matplotlib.text.Text at 0x7f67c9a8ef10>



In [383]:

    
figure(figsize=(15,15))
plt.xkcd()
# get_run_name(topic_run, 1)
x = np.arange(10)
yys = [i+x+(i*x)**2 for i in range(optimal_n_clusters)]
colors = cm.nipy_spectral(np.linspace(0, 1, optimal_n_clusters))
labels = ['cluster{0}'.format(i) for i in range(optimal_n_clusters)]
for run_number, id_list in topic_run.iteritems():
    plt.scatter(xs[id_list], ys[id_list], s=100, label=run_number, c=colors_run[topic_run.keys().index(run_number)])
#     plt.annotate('{0}'.format(get_run_name(topic_run, idx)), (xs[numpy.where(k_fit==idx)][0], ys[numpy.where(k_fit==idx)][0]))
plt.legend()
plt.title('Clusters by run')









    Out[383]:





<matplotlib.text.Text at 0x7f67c9be5790>



In [421]:

    
from wordcloud import WordCloud
plt.xkcd()
figure(figsize=(20,50))
for idx,topic in enumerate(topic_weights):
    wc = WordCloud(background_color="white")
    ww = [(word,weight) for word,weight in topic.iteritems()]
    img = wc.generate_from_frequencies(ww)
    subplot(len(topic_weights),2,2*idx+1)
    imshow(img)
    axis('off')
    
#    subplot(nTopics,2,2*idx+2)
#    plot(topicsByAge[:,idx])
#    axis([10, 100, 0, 1.0])
#    title('Topic #%2d'%(idx))



In [423]:

    
from wordcloud import WordCloud
plt.xkcd()
figure(figsize=(20,50))
for idx,topic in enumerate(topic_weights):
    wc = WordCloud(background_color="white")
    ww = [(word,weight) for word,weight in topic.iteritems()]
    img = wc.generate_from_frequencies(ww)
    subplot(len(topic_weights),2,2*idx+1)
    imshow(img)
    axis('off')
    
#    subplot(nTopics,2,2*idx+2)
#    plot(topicsByAge[:,idx])
#    axis([10, 100, 0, 1.0])
#    title('Topic #%2d'%(idx))

The biggest "cluster" dispersed



In [426]:

    
from wordcloud import WordCloud
plt.xkcd()
figure(figsize=(20,50))
for idx,topic in enumerate(topic_weights):
    wc = WordCloud(background_color="white")
    ww = [(word,weight) for word,weight in topic.iteritems()]
    img = wc.generate_from_frequencies(ww)
    subplot(len(topic_weights),2,2*idx+1)
    imshow(img)
    axis('off')
    
#    subplot(nTopics,2,2*idx+2)
#    plot(topicsByAge[:,idx])
#    axis([10, 100, 0, 1.0])
#    title('Topic #%2d'%(idx))