Following tutorial "Topic Modeling for Fun and Profit"

Load this with jupyter notebook --NotebookApp.iopub_data_rate_limit=10000000


In [1]:
import itertools
import logging
import os
import pickle
import time

from cltk.stop.greek.stops import STOPS_LIST
import gensim
from gensim.corpora.mmcorpus import MmCorpus
from gensim.utils import simple_preprocess
import pyLDAvis.gensim

In [2]:
pyLDAvis.enable_notebook()

In [3]:
logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)
logging.root.level = logging.INFO  # ipython sometimes messes up the logging setup; restore

In [4]:
user_dir = os.path.expanduser('~/cltk_data/user_data/lda_tlg/')
try:
    os.makedirs(user_dir)
except FileExistsError:
    pass

In [5]:
# load bow dict
bow_name = 'gensim_bow_tlg_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.mm'
bow_path = os.path.join(user_dir, bow_name)
corpus_bow_tlg = gensim.corpora.MmCorpus(bow_path)


INFO : loaded corpus index from /home/kyle/cltk_data/user_data/lda_tlg/gensim_bow_tlg_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.mm.index
INFO : initializing corpus reader from /home/kyle/cltk_data/user_data/lda_tlg/gensim_bow_tlg_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.mm
INFO : accepted corpus with 1516 documents, 88039 features, 4131789 non-zero entries

In [6]:
PREPROCESS_DEACCENT = False
no_below = 20
no_above = 0.1
NUM_TOPICS_LIST = [5, 10, 20, 40, 60, 120]
PASSES = 100

TOK_MIN = 3  # rm words shorter than
TOK_MAX = 20  # rm words longer than
DOC_MIN = 50  # drop docs shorter than

In [7]:
dict_name = 'gensim_dict_id2word_tlg_nobelow{}_noabove{}_tokmin{}_tokmax{}_docmin{}_deaccent{}.dict'.format(no_below, 
                                                                                                            no_above, 
                                                                                                            TOK_MIN, 
                                                                                                            TOK_MAX, 
                                                                                                            DOC_MIN, 
                                                                                                            PREPROCESS_DEACCENT)
dict_path = os.path.join(user_dir, dict_name)

id2word_tlg = gensim.corpora.dictionary.Dictionary.load(dict_path)


INFO : loading Dictionary object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_dict_id2word_tlg_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.dict
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_dict_id2word_tlg_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.dict

In [8]:
# # Examples of how to use the model
# lda_model.print_topics(-1)  # print a few most important words for each LDA topic
# # transform text into the bag-of-words space
# bow_vector = id2word_tlg.doc2bow(tokenize(doc))
# print([(id2word_tlg[id], count) for id, count in bow_vector])

# # transform into LDA space
# lda_vector = lda_model[bow_vector]
# print(lda_vector)

# # print the document's single most prominent LDA topic
# print(lda_model.print_topic(max(lda_vector, key=lambda item: item[1])[0]))

In [9]:
all_paths = []
for num_topics in NUM_TOPICS_LIST:
    lda_model_name = 'gensim_lda_model_tlg_numtopics{}_numpasses{}_nobelow{}_noabove{}_tokmin{}_tokmax{}_docmin{}_deaccent{}.model'.format(num_topics, 
                                                                                                                                           PASSES, 
                                                                                                                                           no_below, 
                                                                                                                                           no_above, 
                                                                                                                                           TOK_MIN, 
                                                                                                                                           TOK_MAX, 
                                                                                                                                           DOC_MIN, 
                                                                                                                                           PREPROCESS_DEACCENT)
    path_lda = os.path.join(user_dir, lda_model_name)
    all_paths.append(path_lda)

In [10]:
def load_lda_model(path_lda):
    lda_model = gensim.models.LdaMulticore.load(path_lda)
    return lda_model

In [11]:
all_paths


Out[11]:
['/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics5_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model',
 '/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics10_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model',
 '/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics20_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model',
 '/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics40_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model',
 '/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics60_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model',
 '/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics120_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model']

5 topics


In [12]:
lda_path5 = '/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics5_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model'

In [13]:
lda_model = load_lda_model(lda_path5)
lda_model.show_topics()


INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics5_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading expElogbeta from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics5_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.expElogbeta.npy with mmap=None
INFO : setting ignored attribute dispatcher to None
INFO : setting ignored attribute id2word to None
INFO : setting ignored attribute state to None
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics5_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics5_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics5_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
Out[13]:
[(0,
  '0.002*"πλα" + 0.002*"επι" + 0.002*"πλεονασμῷ" + 0.002*"διφθόγγου" + 0.001*"τροπῇ" + 0.001*"ἐθνικὸν" + 0.001*"σημείωσαι" + 0.001*"ἀρίσταρχος" + 0.001*"ὀξύνεται" + 0.001*"αἰτιατικῇ"'),
 (1,
  '0.002*"συμπέρασμα" + 0.001*"χρῶ" + 0.001*"χυμῶν" + 0.001*"προτάσεις" + 0.001*"συμβεβηκός" + 0.001*"προτάσεων" + 0.001*"ὄξει" + 0.001*"κηροῦ" + 0.001*"ὑποκειμένῳ" + 0.001*"κατηγορεῖται"'),
 (2,
  '0.001*"καῖσαρ" + 0.001*"σφίσι" + 0.001*"μιχαὴλ" + 0.001*"κωνσταντίνου" + 0.001*"ξὺν" + 0.001*"ἱππεῖς" + 0.001*"σώκρατες" + 0.000*"τριήρεις" + 0.000*"καίσαρα" + 0.000*"αὐτοκράτωρ"'),
 (3,
  '0.003*"τᾶς" + 0.003*"μοιρῶν" + 0.003*"αβγ" + 0.003*"γωνία" + 0.003*"ἴσαι" + 0.002*"τριγώνου" + 0.002*"διάμετρον" + 0.002*"περιφέρεια" + 0.002*"περιφερείας" + 0.002*"περιφέρειαν"'),
 (4,
  '0.002*"ἰσραὴλ" + 0.001*"ἐπίσκοπος" + 0.001*"ἀπόστολος" + 0.001*"ἰησοῦν" + 0.001*"ἀβραὰμ" + 0.001*"χριστός" + 0.001*"ἀγάπης" + 0.001*"ἱερουσαλὴμ" + 0.001*"ἀδὰμ" + 0.001*"θεότητος"')]

In [14]:
pyLDAvis.gensim.prepare(lda_model, corpus_bow_tlg, id2word_tlg)


/home/kyle/venv3/lib/python3.6/site-packages/pyLDAvis/_prepare.py:387: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  topic_term_dists = topic_term_dists.ix[topic_order]
Out[14]:

10 topics


In [15]:
lda_path10 = '/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics10_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model'
lda_model = load_lda_model(lda_path10)
lda_model.show_topics()


INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics10_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading expElogbeta from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics10_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.expElogbeta.npy with mmap=None
INFO : setting ignored attribute id2word to None
INFO : setting ignored attribute state to None
INFO : setting ignored attribute dispatcher to None
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics10_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics10_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics10_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
Out[15]:
[(0,
  '0.001*"ἱππεῖς" + 0.001*"δικασταί" + 0.001*"θηβαίων" + 0.001*"συμμάχων" + 0.000*"θεόπομπος" + 0.000*"τριήρεις" + 0.000*"θηβαῖοι" + 0.000*"ψήφισμα" + 0.000*"δημοσθένους" + 0.000*"σφίσι"'),
 (1,
  '0.003*"χρῶ" + 0.002*"χυμῶν" + 0.002*"ὄξει" + 0.002*"κηροῦ" + 0.002*"μυῶν" + 0.002*"ὄξους" + 0.002*"ἱπποκράτης" + 0.002*"ἀρτηρίας" + 0.002*"νεύρων" + 0.002*"σμύρνης"'),
 (2,
  '0.002*"ἰσραὴλ" + 0.002*"ἀπόστολος" + 0.001*"ἰησοῦν" + 0.001*"χριστός" + 0.001*"ἀβραὰμ" + 0.001*"ἀγάπης" + 0.001*"θεότητος" + 0.001*"ἰσραήλ" + 0.001*"ἀδὰμ" + 0.001*"ἱερουσαλὴμ"'),
 (3,
  '0.002*"σώκρατες" + 0.001*"κεφαλαίων" + 0.001*"οὐθὲν" + 0.001*"χρύσιππος" + 0.001*"εὐριπίδου" + 0.001*"φεύγοντος" + 0.001*"ὅρῳ" + 0.001*"ζήτημα" + 0.001*"ἕξις" + 0.001*"ῥητορικὴ"'),
 (4,
  '0.021*"πλα" + 0.010*"αβγ" + 0.009*"γωνία" + 0.008*"μοιρῶν" + 0.007*"ἴσαι" + 0.007*"περιφέρεια" + 0.006*"περιφερείας" + 0.006*"περιφέρειαν" + 0.006*"ὀρθὰς" + 0.005*"διάμετρον"'),
 (5,
  '0.002*"νοητῶν" + 0.002*"νοητὸν" + 0.002*"ἕνωσιν" + 0.002*"μονάδος" + 0.001*"στάδιοι" + 0.001*"μονάδα" + 0.001*"ζῳδίων" + 0.001*"νοερῶν" + 0.001*"ζῴδιον" + 0.001*"μονὰς"'),
 (6,
  '0.004*"συμπέρασμα" + 0.003*"ὑποκειμένῳ" + 0.003*"συμβεβηκός" + 0.003*"προτάσεις" + 0.003*"προτάσεων" + 0.002*"κατηγορεῖται" + 0.002*"κινοῦν" + 0.002*"ὁρισμὸν" + 0.002*"ὁρισμὸς" + 0.002*"οὐσίαι"'),
 (7,
  '0.004*"ἐπίσκοπος" + 0.002*"καῖσαρ" + 0.002*"κωνσταντίνου" + 0.002*"μιχαὴλ" + 0.002*"ἐπισκόπου" + 0.001*"σφίσι" + 0.001*"κωνσταντῖνος" + 0.001*"ὑπέγραψα" + 0.001*"ξὺν" + 0.001*"ἰωάννην"'),
 (8,
  '0.005*"πλεονασμῷ" + 0.005*"διφθόγγου" + 0.004*"τροπῇ" + 0.004*"ἐθνικὸν" + 0.003*"ὀξύνεται" + 0.003*"αἰτιατικῇ" + 0.003*"βαρύνεται" + 0.003*"κλίνεται" + 0.003*"δίφθογγον" + 0.003*"πληθυντικῶν"'),
 (9,
  '0.002*"σημείωσαι" + 0.001*"τρώων" + 0.001*"ἀρίσταρχος" + 0.001*"ἕκτωρ" + 0.001*"ὄφρα" + 0.001*"ὀδυσσέα" + 0.001*"ζηνόδοτος" + 0.001*"ἕκτορος" + 0.001*"αἴας" + 0.001*"ἕκτορα"')]

In [15]:
pyLDAvis.gensim.prepare(lda_model, corpus_bow_tlg, id2word_tlg)


/home/kyle/venv3/lib/python3.6/site-packages/pyLDAvis/_prepare.py:387: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  topic_term_dists = topic_term_dists.ix[topic_order]
Out[15]:

20 topics


In [16]:
lda_path20 = '/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics20_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model'
lda_model = load_lda_model(lda_path20)
lda_model.show_topics()


INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics20_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading expElogbeta from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics20_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.expElogbeta.npy with mmap=None
INFO : setting ignored attribute dispatcher to None
INFO : setting ignored attribute id2word to None
INFO : setting ignored attribute state to None
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics20_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics20_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics20_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
Out[16]:
[(5,
  '0.001*"χριστός" + 0.001*"διαβόλου" + 0.001*"ἀμήν" + 0.001*"ἀβραὰμ" + 0.001*"παύλου" + 0.001*"ἀγάπης" + 0.001*"ἀδὰμ" + 0.001*"ἰουδαῖοι" + 0.001*"ἀπόστολος" + 0.001*"ἀγάπην"'),
 (16,
  '0.013*"ἐθνικὸν" + 0.006*"στάδιοι" + 0.005*"ἑκαταῖος" + 0.004*"θεόπομπος" + 0.003*"ἑλλάνικος" + 0.002*"ὄφρα" + 0.002*"καρίας" + 0.002*"φερεκύδης" + 0.002*"ὅττι" + 0.002*"στράβων"'),
 (9,
  '0.010*"ἱερώνυμος" + 0.003*"καρύστιος" + 0.003*"ἱστορικοῖς" + 0.002*"ὑπομνήμασιν" + 0.002*"ἐφέσιοι" + 0.002*"περγαμηνὸς" + 0.001*"βάτων" + 0.001*"ἡράκλειτον" + 0.001*"ἱερωνύμου" + 0.001*"ῥόδιος"'),
 (11,
  '0.016*"αβγ" + 0.014*"τᾶς" + 0.012*"ἴσαι" + 0.012*"γωνία" + 0.009*"ὀρθὰς" + 0.008*"τριγώνου" + 0.008*"κώνου" + 0.008*"ἐπιπέδῳ" + 0.007*"βάσις" + 0.007*"ἐλάσσων"'),
 (13,
  '0.243*"πλα" + 0.183*"επι" + 0.017*"γεν" + 0.015*"τρι" + 0.015*"παναγίας" + 0.014*"ὁσίου" + 0.014*"σύρου" + 0.014*"ἐφραίμ" + 0.010*"κον" + 0.008*"τερ"'),
 (17,
  '0.002*"καῖσαρ" + 0.002*"μιχαὴλ" + 0.002*"κωνσταντίνου" + 0.001*"κωνσταντῖνος" + 0.001*"καίσαρα" + 0.001*"πατριάρχης" + 0.001*"αὐτοκράτωρ" + 0.001*"σφίσι" + 0.001*"ἰωάννην" + 0.001*"συγκλήτου"'),
 (6,
  '0.002*"αἰτιατικῇ" + 0.001*"ἀττικοί" + 0.001*"κώλων" + 0.001*"μεταφορᾶς" + 0.001*"ἀττικοὶ" + 0.001*"μένανδρος" + 0.001*"αἰσχύλος" + 0.001*"ἀκατάληκτον" + 0.001*"δοτικῇ" + 0.001*"κρατῖνος"'),
 (12,
  '0.005*"ἰσραὴλ" + 0.003*"ἱερουσαλὴμ" + 0.003*"ἰούδα" + 0.003*"ἰακὼβ" + 0.002*"ἰσραήλ" + 0.002*"ἀβραὰμ" + 0.002*"δαβὶδ" + 0.002*"σύμμαχος" + 0.002*"μωυσῆς" + 0.002*"ἀπόστολος"'),
 (7,
  '0.008*"ἐπίσκοπος" + 0.006*"ἰσραὴλ" + 0.004*"ἐπισκόπου" + 0.003*"ἁγίοις" + 0.003*"χριστός" + 0.003*"ὑπέγραψα" + 0.003*"θεότητος" + 0.003*"θεσπέσιος" + 0.002*"ἰησοῦν" + 0.002*"ἐπισκόπων"'),
 (15,
  '0.003*"ξὺν" + 0.002*"σφίσι" + 0.002*"ἱππεῖς" + 0.002*"τους" + 0.001*"ἰνδῶν" + 0.001*"σφι" + 0.001*"πελοπόννησον" + 0.001*"δαρεῖος" + 0.001*"στρατῷ" + 0.001*"ὡσὰν"')]

In [17]:
pyLDAvis.gensim.prepare(lda_model, corpus_bow_tlg, id2word_tlg)


/home/kyle/venv3/lib/python3.6/site-packages/pyLDAvis/_prepare.py:387: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  topic_term_dists = topic_term_dists.ix[topic_order]
Out[17]:

40 topics


In [18]:
lda_path40 = '/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics40_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model'
lda_model = load_lda_model(lda_path40)
lda_model.show_topics()


INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics40_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading expElogbeta from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics40_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.expElogbeta.npy with mmap=None
INFO : setting ignored attribute dispatcher to None
INFO : setting ignored attribute id2word to None
INFO : setting ignored attribute state to None
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics40_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics40_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics40_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
Out[18]:
[(20,
  '0.003*"ἰσραὴλ" + 0.003*"ἀπόστολος" + 0.002*"δαβὶδ" + 0.002*"ἱερουσαλὴμ" + 0.001*"ἀβραὰμ" + 0.001*"ἰησοῦν" + 0.001*"θεότητος" + 0.001*"ἰσραήλ" + 0.001*"σύμμαχος" + 0.001*"ἰούδα"'),
 (0,
  '0.007*"ἄρατος" + 0.006*"δύνει" + 0.005*"ἀνατέλλει" + 0.005*"νότος" + 0.005*"εὔδοξος" + 0.004*"πνεῖ" + 0.004*"καρκίνου" + 0.003*"δύεται" + 0.003*"σκορπίου" + 0.003*"βόρεια"'),
 (5,
  '0.009*"ἴδον" + 0.006*"ἐπιμενίδης" + 0.006*"εἶπέν" + 0.004*"οὐαὶ" + 0.003*"ἐπιμενίδην" + 0.003*"κρὴς" + 0.002*"ἐνώπιον" + 0.002*"γενεὰς" + 0.002*"ὁράσει" + 0.002*"κρῆτες"'),
 (30,
  '0.008*"τοῖσιν" + 0.007*"τῇσι" + 0.006*"ξὺν" + 0.006*"σφι" + 0.005*"τουτέων" + 0.005*"ἐπὴν" + 0.005*"ὅκως" + 0.005*"ὀδύνη" + 0.004*"πυρετὸς" + 0.004*"οἷσι"'),
 (13,
  '0.005*"πλεονασμῷ" + 0.005*"αἰτιατικῇ" + 0.003*"τροπῇ" + 0.003*"γενικῇ" + 0.003*"δοτικῇ" + 0.003*"ἀττικοί" + 0.002*"ἀττικοὶ" + 0.002*"παράγωγον" + 0.002*"ἐπίρρημα" + 0.001*"συγκοπῇ"'),
 (31,
  '0.025*"χριστιανός" + 0.014*"ἔπαρχος" + 0.009*"ἰουστῖνος" + 0.007*"χριστιανὸς" + 0.003*"ἐζεκίας" + 0.002*"ἡσαΐας" + 0.002*"κροτωνιάτης" + 0.002*"ἱέραξ" + 0.002*"μανασσῆ" + 0.002*"ἱερουσαλὴμ"'),
 (34,
  '0.002*"ἐνώπιον" + 0.002*"ἀπόστολος" + 0.001*"ἀδὰμ" + 0.001*"ἰησοῦν" + 0.001*"δαυῒδ" + 0.001*"θέλημα" + 0.001*"ἀγάπην" + 0.001*"ἐλάλησεν" + 0.001*"ἀββᾶ" + 0.001*"διάβολος"'),
 (35,
  '0.001*"ἱππεῖς" + 0.001*"δικασταί" + 0.001*"συμμάχων" + 0.001*"θηβαίων" + 0.001*"τριήρεις" + 0.001*"καρχηδονίων" + 0.001*"θηβαῖοι" + 0.001*"ἡγεμονίας" + 0.001*"ψήφισμα" + 0.001*"θηβαίους"'),
 (4,
  '0.008*"ἰσραὴλ" + 0.003*"θεσπέσιος" + 0.003*"ἰσραήλ" + 0.003*"μονονουχὶ" + 0.002*"χριστός" + 0.002*"ἱερουσαλὴμ" + 0.002*"ἁγίοις" + 0.002*"μωσέως" + 0.002*"ἰησοῦν" + 0.002*"θεότητος"'),
 (21,
  '0.002*"τους" + 0.002*"νιν" + 0.001*"ἀλέξανδρε" + 0.001*"ὡσὰν" + 0.001*"φεῦ" + 0.001*"αἰσχύλος" + 0.001*"τᾶς" + 0.001*"βροτοῖς" + 0.001*"δὲν" + 0.001*"νυν"')]

In [19]:
pyLDAvis.gensim.prepare(lda_model, corpus_bow_tlg, id2word_tlg)


/home/kyle/venv3/lib/python3.6/site-packages/pyLDAvis/_prepare.py:387: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  topic_term_dists = topic_term_dists.ix[topic_order]
Out[19]:

60 topics


In [22]:
lda_path60 = '/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics60_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model'
lda_model = load_lda_model(lda_path60)
lda_model.show_topics()


INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics60_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading expElogbeta from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics60_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.expElogbeta.npy with mmap=None
INFO : setting ignored attribute dispatcher to None
INFO : setting ignored attribute id2word to None
INFO : setting ignored attribute state to None
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics60_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics60_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics60_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
Out[22]:
[(48,
  '0.003*"ἀπόστολος" + 0.002*"ἰσραὴλ" + 0.002*"δαβὶδ" + 0.002*"θεότητος" + 0.002*"ἀδὰμ" + 0.002*"ἀγάπης" + 0.001*"χριστός" + 0.001*"ἀβραὰμ" + 0.001*"ἀγάπην" + 0.001*"θέλημα"'),
 (11,
  '0.008*"χρῶ" + 0.006*"ὄξει" + 0.004*"κηροῦ" + 0.004*"ὄξους" + 0.003*"χυλοῦ" + 0.003*"κρόκου" + 0.003*"ἀφέψημα" + 0.003*"νίτρου" + 0.003*"ἰᾶται" + 0.003*"χυλὸς"'),
 (52,
  '0.007*"σώκρατες" + 0.004*"εὐριπίδου" + 0.003*"τᾶς" + 0.002*"μενάνδρου" + 0.001*"τοτὲ" + 0.001*"πατριάρχου" + 0.001*"γιγνόμενα" + 0.001*"φήσομεν" + 0.001*"σοφοκλέους" + 0.001*"σμικρὸν"'),
 (8,
  '0.035*"ἁβραὰμ" + 0.013*"μιχαὴλ" + 0.008*"θέων" + 0.007*"ἀρχιστράτηγος" + 0.007*"ἰσαὰκ" + 0.007*"μιχαήλ" + 0.006*"δίκαιε" + 0.005*"σωσικράτης" + 0.005*"μιλήσιος" + 0.005*"κύριέ"'),
 (17,
  '0.010*"αἴσωπος" + 0.008*"ξάνθος" + 0.005*"εὐριπίδου" + 0.005*"ἄλεξις" + 0.004*"ἀντιφάνης" + 0.004*"κλέαρχος" + 0.004*"σῦκα" + 0.004*"ἐστ" + 0.004*"χοροῦ" + 0.004*"δῆτ"'),
 (43,
  '0.023*"στωικοὶ" + 0.017*"ὑδράργυρον" + 0.017*"σύνθεμα" + 0.015*"ξανθὸν" + 0.015*"στράτων" + 0.014*"ἐμπεδοκλῆς" + 0.012*"ἀναξαγόρας" + 0.010*"μητρόδωρος" + 0.010*"ζώσιμος" + 0.010*"θαλῆς"'),
 (18,
  '0.004*"κῶλα" + 0.003*"κῶλον" + 0.003*"ἰδεῶν" + 0.003*"ῥητορικὴ" + 0.003*"κώλων" + 0.002*"μέθοδος" + 0.002*"ῥητορικὴν" + 0.002*"μέθοδον" + 0.002*"δεινότητος" + 0.002*"προοίμια"'),
 (30,
  '0.004*"καῖσαρ" + 0.003*"σφίσι" + 0.002*"ἱππεῖς" + 0.002*"καίσαρα" + 0.002*"ξὺν" + 0.002*"καρχηδονίων" + 0.001*"καίσαρι" + 0.001*"ἀντώνιος" + 0.001*"ἡγεμονίας" + 0.001*"ἀντωνίου"'),
 (54,
  '0.008*"ἀββᾶ" + 0.004*"ἀγάπην" + 0.004*"ἐνώπιον" + 0.004*"ἀδελφέ" + 0.004*"ἀποκριθεὶς" + 0.004*"ἀπεκρίθη" + 0.003*"μοναχὸς" + 0.003*"ἀββᾶν" + 0.003*"κελλίον" + 0.002*"ἀγαπητοί"'),
 (27,
  '0.007*"φεύγοντος" + 0.006*"κεφαλαίων" + 0.006*"κατηγόρου" + 0.006*"ζήτημα" + 0.005*"ὅρῳ" + 0.005*"ῥητοῦ" + 0.004*"ἀντίθεσις" + 0.004*"ἀδίκημα" + 0.004*"στάσεων" + 0.004*"στοχασμῷ"')]

In [23]:
pyLDAvis.gensim.prepare(lda_model, corpus_bow_tlg, id2word_tlg)


/home/kyle/venv3/lib/python3.6/site-packages/pyLDAvis/_prepare.py:387: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  topic_term_dists = topic_term_dists.ix[topic_order]
IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

120 topics


In [23]:
lda_path120 = '/home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics120_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model'
lda_model = load_lda_model(lda_path120)
lda_model.show_topics()


INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics120_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading expElogbeta from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics120_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.expElogbeta.npy with mmap=None
INFO : setting ignored attribute id2word to None
INFO : setting ignored attribute state to None
INFO : setting ignored attribute dispatcher to None
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics120_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model
INFO : loading LdaMulticore object from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics120_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
INFO : loading sstats from /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics120_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state.sstats.npy with mmap=None
INFO : loaded /home/kyle/cltk_data/user_data/lda_tlg/gensim_lda_model_tlg_numtopics120_numpasses100_nobelow20_noabove0.1_tokmin3_tokmax20_docmin50_deaccentFalse.model.state
Out[23]:
[(103,
  '0.006*"ἐνώπιον" + 0.004*"ἐλάλησεν" + 0.003*"μωυσῆς" + 0.003*"εἶπαν" + 0.003*"ἐγενήθη" + 0.002*"ἐπορεύθη" + 0.002*"ἔναντι" + 0.002*"θυσιαστήριον" + 0.002*"λαόν" + 0.002*"λαῷ"'),
 (116,
  '0.010*"καῖσαρ" + 0.005*"καίσαρα" + 0.004*"σφίσι" + 0.004*"ἀντώνιος" + 0.003*"καίσαρι" + 0.003*"ἀντωνίου" + 0.003*"ἀντώνιον" + 0.002*"πομπήιος" + 0.002*"σφισι" + 0.002*"αὐτοκράτορα"'),
 (105,
  '0.009*"σημαίνουσιν" + 0.007*"βλέφαρα" + 0.007*"βακχεῖος" + 0.004*"ὀφρύες" + 0.003*"τράχηλος" + 0.003*"μικροὶ" + 0.003*"βλέμμα" + 0.003*"κοῖλοι" + 0.003*"σημαίνουσι" + 0.003*"ἄρθρα"'),
 (23,
  '0.079*"αἰτιατικῇ" + 0.041*"γενικῇ" + 0.040*"δοτικῇ" + 0.009*"πολύβιος" + 0.007*"νεφέλαις" + 0.006*"συνέσιος" + 0.005*"αἰτιατική" + 0.005*"θεολόγος" + 0.005*"δίων" + 0.005*"δοτική"'),
 (114,
  '0.019*"ξὺν" + 0.008*"σφίσι" + 0.006*"στρατῷ" + 0.006*"οὐδαμῆ" + 0.004*"περιβόλου" + 0.004*"ἰουστινιανὸς" + 0.004*"γότθων" + 0.004*"γότθοι" + 0.004*"χοσρόης" + 0.003*"ὅπη"'),
 (94,
  '0.011*"ἀντίοχος" + 0.009*"φίλωνος" + 0.004*"φίλωνα" + 0.004*"ἐπικρατήσῃ" + 0.004*"ἀκρωρείαις" + 0.003*"φθαρῇ" + 0.003*"συγγράμματι" + 0.003*"δειλῶν" + 0.002*"ξηροῦ" + 0.002*"ἀγένητος"'),
 (110,
  '0.071*"σημείωσαι" + 0.048*"ἀριστείδης" + 0.018*"ἐπιστολῇ" + 0.015*"ἀττικοὶ" + 0.015*"λιβάνιος" + 0.013*"λουκιανὸς" + 0.011*"συνέσιος" + 0.008*"παναθηναϊκῷ" + 0.006*"ἴων" + 0.006*"γοργίᾳ"'),
 (54,
  '0.034*"σάτυρος" + 0.031*"ἱερώνυμος" + 0.024*"φαληρεὺς" + 0.022*"παιάν" + 0.006*"ἀρίστωνα" + 0.005*"σατύρου" + 0.005*"ῥόδιος" + 0.005*"ὑπόκρισιν" + 0.004*"ἀλθαίας" + 0.004*"φαληρεύς"'),
 (55,
  '0.008*"ἄρατος" + 0.006*"δύνει" + 0.005*"εὔδοξος" + 0.005*"ἀνατέλλει" + 0.005*"σόφισμα" + 0.004*"ἐλέγχου" + 0.004*"καρκίνου" + 0.004*"σιγῶντα" + 0.003*"ζῳδιακὸς" + 0.003*"ἀμφιβολίαν"'),
 (112,
  '0.021*"μοιρῶν" + 0.009*"ζῳδιακοῦ" + 0.009*"ἰσημερινοῦ" + 0.008*"περιφέρεια" + 0.007*"γωνία" + 0.007*"περιφερείας" + 0.007*"ζῳδίων" + 0.006*"ὁρίζοντος" + 0.006*"ἀνωμαλίας" + 0.006*"ὀρθαὶ"')]

In [24]:
pyLDAvis.gensim.prepare(lda_model, corpus_bow_tlg, id2word_tlg)


/home/kyle/cltk/venv_pypi/lib/python3.6/site-packages/pyLDAvis/_prepare.py:387: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  topic_term_dists = topic_term_dists.ix[topic_order]
Out[24]: