In [1]:
from gensim import corpora, models
corpus = corpora.BleiCorpus('./data/ap/ap.dat', './data/ap/vocab.txt')
In [2]:
model = models.ldamodel.LdaModel(corpus, num_topics=100, id2word=corpus.id2word)
doc = corpus.docbyoffset(0)
topics = model[doc]
In [4]:
import matplotlib.pyplot as plt
num_topics_used = [len(model[doc]) for doc in corpus]
plt.hist(num_topics_used)
Out[4]:
In [ ]:
model1 = models.ldamodel.LdaModel(corpus, num_topics=100, id2word=corpus.id2word, alpah=1)