In [1]:
from gensim import corpora, models
corpus = corpora.BleiCorpus('./data/ap/ap.dat', './data/ap/vocab.txt')

In [2]:
model = models.ldamodel.LdaModel(corpus, num_topics=100, id2word=corpus.id2word)
doc = corpus.docbyoffset(0)
topics = model[doc]


WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy

In [4]:
import matplotlib.pyplot as plt
num_topics_used = [len(model[doc]) for doc in corpus]
plt.hist(num_topics_used)


Out[4]:
(array([ 180.,  293.,  392.,  427.,  359.,  389.,  117.,   57.,   25.,    7.]),
 array([  1. ,   3.2,   5.4,   7.6,   9.8,  12. ,  14.2,  16.4,  18.6,
         20.8,  23. ]),
 <a list of 10 Patch objects>)

In [ ]:
model1 = models.ldamodel.LdaModel(corpus, num_topics=100, id2word=corpus.id2word, alpah=1)