In [103]:
from sklearn.externals import joblib
In [104]:
# To get the topic words from the model
def get_topics(ldamodel, num_topics, num_words, multiplier=1000):
topics = []
for topic_id, topic in ldamodel.show_topics(num_topics=num_topics, num_words=num_words, formatted=False):
# print(topic)
# break
topic_words = ''
for word, rank in topic:
topic_words = ' '.join(word * int(multiplier*rank))
print (StringUtils.repeat(word, " ", int(multiplier*rank)))
print (topic_words)
break
topics.append(topic_words)
return topics
In [105]:
## word clouds
from os import path
import matplotlib.pyplot as plt
from wordcloud import WordCloud
fname = "data/eos/lda/LDAmodel_2014_01_K_14.pkl"
num_topics = 14
num_words = 30
ldamodel = joblib.load(fname)
topics = get_topics(ldamodel, num_topics, num_words)
print (topics)
# def terms_to_wordcounts(topics, multiplier=1000):
# return " ".join([" ".join(int(multiplier*i[0]) * [i[1]]) for i in topics])
# print (terms_to_wordcounts(topics))
# wordcloud = WordCloud(font_path="Impact_Label.ttf", background_color="black").generate(terms_to_wordcounts(terms), 1000)
# plt.imshow(wordcloud)
# plt.axis("off")
# plt.savefig("terms1")
# plt.close()
In [ ]:
In [ ]: