Relational Topic Model


In [1]:
import pickle
import logging

import numpy as np
import matplotlib
import matplotlib.pyplot as plt

from ptm import RelationalTopicModel
from ptm.utils import convert_cnt_to_list, get_top_words

logger = logging.getLogger('RelationalTopicModel')
logger.propagate=False

%matplotlib inline

Load CORA dataset


In [2]:
doc_ids = pickle.load(open('../data/cora/doc_ids.pkl', 'rb'))
doc_cnt = pickle.load(open('../data/cora/doc_cnt.pkl', 'rb'))
doc_links = pickle.load(open('../data/cora/doc_links_sym.pkl', 'rb'))
voca = pickle.load(open('../data/cora/voca.pkl', 'rb'))

In [3]:
n_doc = len(doc_ids)
n_topic = 10
n_voca = len(voca)
max_iter = 50

Fit model


In [4]:
model = RelationalTopicModel(n_topic, n_doc, n_voca, verbose=True)
model.fit(doc_ids, doc_cnt, doc_links, max_iter=max_iter)


2016-02-18 09:53:07 INFO:RelationalTopicModel:Initialize RTM: num_voca:17059, num_topic:10, num_doc:13147
2016-02-18 09:53:14 INFO:RelationalTopicModel:[ITER]   0,	Elapsed time: 5.057	ELBO: -7558267.416
2016-02-18 09:53:19 INFO:RelationalTopicModel:[ITER]   1,	Elapsed time: 5.066	ELBO: -7555930.907
2016-02-18 09:53:24 INFO:RelationalTopicModel:[ITER]   2,	Elapsed time: 5.277	ELBO: -7553953.242
2016-02-18 09:53:30 INFO:RelationalTopicModel:[ITER]   3,	Elapsed time: 5.819	ELBO: -7551585.770
2016-02-18 09:53:35 INFO:RelationalTopicModel:[ITER]   4,	Elapsed time: 5.233	ELBO: -7547984.223
2016-02-18 09:53:41 INFO:RelationalTopicModel:[ITER]   5,	Elapsed time: 5.650	ELBO: -7542205.711
2016-02-18 09:53:46 INFO:RelationalTopicModel:[ITER]   6,	Elapsed time: 5.062	ELBO: -7533498.895
2016-02-18 09:53:52 INFO:RelationalTopicModel:[ITER]   7,	Elapsed time: 5.678	ELBO: -7521541.780
2016-02-18 09:53:57 INFO:RelationalTopicModel:[ITER]   8,	Elapsed time: 5.574	ELBO: -7506539.192
2016-02-18 09:54:03 INFO:RelationalTopicModel:[ITER]   9,	Elapsed time: 5.747	ELBO: -7489166.663
2016-02-18 09:54:08 INFO:RelationalTopicModel:[ITER]  10,	Elapsed time: 5.443	ELBO: -7470531.973
2016-02-18 09:54:14 INFO:RelationalTopicModel:[ITER]  11,	Elapsed time: 5.647	ELBO: -7452060.141
2016-02-18 09:54:20 INFO:RelationalTopicModel:[ITER]  12,	Elapsed time: 5.465	ELBO: -7434975.754
2016-02-18 09:54:25 INFO:RelationalTopicModel:[ITER]  13,	Elapsed time: 5.545	ELBO: -7419959.908
2016-02-18 09:54:30 INFO:RelationalTopicModel:[ITER]  14,	Elapsed time: 5.240	ELBO: -7407361.898
2016-02-18 09:54:36 INFO:RelationalTopicModel:[ITER]  15,	Elapsed time: 5.247	ELBO: -7397150.490
2016-02-18 09:54:41 INFO:RelationalTopicModel:[ITER]  16,	Elapsed time: 5.160	ELBO: -7389101.911
2016-02-18 09:54:46 INFO:RelationalTopicModel:[ITER]  17,	Elapsed time: 5.285	ELBO: -7382819.209
2016-02-18 09:54:51 INFO:RelationalTopicModel:[ITER]  18,	Elapsed time: 5.447	ELBO: -7377980.009
2016-02-18 09:54:57 INFO:RelationalTopicModel:[ITER]  19,	Elapsed time: 5.430	ELBO: -7374338.836
2016-02-18 09:55:03 INFO:RelationalTopicModel:[ITER]  20,	Elapsed time: 5.659	ELBO: -7371643.396
2016-02-18 09:55:08 INFO:RelationalTopicModel:[ITER]  21,	Elapsed time: 5.646	ELBO: -7369688.903
2016-02-18 09:55:13 INFO:RelationalTopicModel:[ITER]  22,	Elapsed time: 4.985	ELBO: -7368338.195
2016-02-18 09:55:19 INFO:RelationalTopicModel:[ITER]  23,	Elapsed time: 5.319	ELBO: -7367446.391
2016-02-18 09:55:24 INFO:RelationalTopicModel:[ITER]  24,	Elapsed time: 5.260	ELBO: -7366919.001
2016-02-18 09:55:29 INFO:RelationalTopicModel:[ITER]  25,	Elapsed time: 5.182	ELBO: -7366663.674
2016-02-18 09:55:34 INFO:RelationalTopicModel:[ITER]  26,	Elapsed time: 5.156	ELBO: -7366624.061
2016-02-18 09:55:40 INFO:RelationalTopicModel:[ITER]  27,	Elapsed time: 5.892	ELBO: -7366759.507
2016-02-18 09:55:45 INFO:RelationalTopicModel:[ITER]  28,	Elapsed time: 5.379	ELBO: -7367038.713
2016-02-18 09:55:51 INFO:RelationalTopicModel:[ITER]  29,	Elapsed time: 5.349	ELBO: -7367430.856
2016-02-18 09:55:56 INFO:RelationalTopicModel:[ITER]  30,	Elapsed time: 5.617	ELBO: -7367911.771
2016-02-18 09:56:02 INFO:RelationalTopicModel:[ITER]  31,	Elapsed time: 5.716	ELBO: -7368443.710
2016-02-18 09:56:08 INFO:RelationalTopicModel:[ITER]  32,	Elapsed time: 5.658	ELBO: -7368992.404
2016-02-18 09:56:13 INFO:RelationalTopicModel:[ITER]  33,	Elapsed time: 5.613	ELBO: -7369555.693
2016-02-18 09:56:19 INFO:RelationalTopicModel:[ITER]  34,	Elapsed time: 5.816	ELBO: -7370132.692
2016-02-18 09:56:25 INFO:RelationalTopicModel:[ITER]  35,	Elapsed time: 5.670	ELBO: -7370722.740
2016-02-18 09:56:30 INFO:RelationalTopicModel:[ITER]  36,	Elapsed time: 5.491	ELBO: -7371328.853
2016-02-18 09:56:36 INFO:RelationalTopicModel:[ITER]  37,	Elapsed time: 5.453	ELBO: -7371934.003
2016-02-18 09:56:41 INFO:RelationalTopicModel:[ITER]  38,	Elapsed time: 5.514	ELBO: -7372531.831
2016-02-18 09:56:47 INFO:RelationalTopicModel:[ITER]  39,	Elapsed time: 5.513	ELBO: -7373139.290
2016-02-18 09:56:52 INFO:RelationalTopicModel:[ITER]  40,	Elapsed time: 5.691	ELBO: -7373752.452
2016-02-18 09:56:58 INFO:RelationalTopicModel:[ITER]  41,	Elapsed time: 5.561	ELBO: -7374371.438
2016-02-18 09:57:04 INFO:RelationalTopicModel:[ITER]  42,	Elapsed time: 5.556	ELBO: -7374985.691
2016-02-18 09:57:09 INFO:RelationalTopicModel:[ITER]  43,	Elapsed time: 5.587	ELBO: -7375587.696
2016-02-18 09:57:15 INFO:RelationalTopicModel:[ITER]  44,	Elapsed time: 5.522	ELBO: -7376178.072
2016-02-18 09:57:20 INFO:RelationalTopicModel:[ITER]  45,	Elapsed time: 5.597	ELBO: -7376748.420
2016-02-18 09:57:26 INFO:RelationalTopicModel:[ITER]  46,	Elapsed time: 5.610	ELBO: -7377299.373
2016-02-18 09:57:31 INFO:RelationalTopicModel:[ITER]  47,	Elapsed time: 5.431	ELBO: -7377820.927
2016-02-18 09:57:37 INFO:RelationalTopicModel:[ITER]  48,	Elapsed time: 5.701	ELBO: -7378326.535
2016-02-18 09:57:43 INFO:RelationalTopicModel:[ITER]  49,	Elapsed time: 5.459	ELBO: -7378829.569

In [5]:
for k in range(n_topic):
    top_words = get_top_words(model.beta, voca, k, 10)
    print('Topic', k, ':', ','.join(top_words))


Topic 0 : program,data,language,programming,parallel,analysis,code,paper,compiler,optimization
Topic 1 : network,service,protocol,performance,paper,routing,communication,application,traffic,control
Topic 2 : system,object,data,database,file,information,distributed,server,access,user
Topic 3 : algorithm,problem,time,show,bound,result,number,graph,solution,paper
Topic 4 : model,algorithm,method,network,function,learning,problem,result,neural,set
Topic 5 : agent,research,science,system,part,grant,paper,work,supported,computer
Topic 6 : system,query,planning,language,rule,paper,information,approach,problem,plan
Topic 7 : system,design,software,paper,model,application,specification,component,protocol,user
Topic 8 : learning,model,system,image,data,algorithm,approach,using,task,method
Topic 9 : system,performance,memory,application,parallel,communication,processor,cache,paper,architecture

In [ ]: