In [1]:
import os
import sys
import pickle
import numpy as np
from scipy.io.matlab import loadmat
from scipy.sparse import csr_matrix
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.markers import MarkerStyle
import itertools
from almc.bayesian_rescal import PFBayesianRescal, compute_regret
%matplotlib inline
In [2]:
def load_dataset(dataset):
if dataset == 'umls':
mat = loadmat('../data/%s/uml.mat' % (dataset))
T = np.array(mat['Rs'], np.float32)
elif dataset == 'nation':
mat = loadmat('../data/%s/dnations.mat' % (dataset))
T = np.array(mat['R'], np.float32)
elif dataset == 'kinship':
mat = loadmat('../data/%s/alyawarradata.mat' % (dataset))
T = np.array(mat['Rs'], np.float32)
elif dataset == 'wordnet':
T = pickle.load(open('../data/%s/reduced_wordnet.pkl' % (dataset), 'rb'))
elif dataset == 'freebase':
T, _, _ = pickle.load(open('../data/freebase/subset_5000.pkl', 'rb'))
if dataset == 'umls' or dataset == 'nation' or dataset == 'kinship':
T = np.swapaxes(T, 1, 2)
T = np.swapaxes(T, 0, 1) # [relation, entity, entity]
T[np.isnan(T)] = 0
return T
In [3]:
color = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),
(44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
(148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
(227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
(188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.
for i in range(len(color)):
r, g, b = color[i]
color[i] = (r / 255., g / 255., b / 255.)
In [4]:
model_var_comp = dict()
# model_var_comp[('nation', 'bcomp_mul')] = 100.00
model_var_comp[('nation', 'bcomp_mul')] = 1.00 # for the additional mcmc steps
model_var_comp[('kinship', 'bcomp_mul')] = 100.00
model_var_comp[('nation', 'bcomp_add')] = 10000.00
model_var_comp[('kinship', 'bcomp_add')] = 10000.00
# model_var_comp[('umls', 'bcomp_mul')] = 1.00
model_var_comp[('umls', 'bcomp_mul')] = 10.00 # for the additional mcmc steps
model_var_comp[('umls', 'bcomp_add')] = 1.00
dataset_limit = dict()
dataset_limit['nation'] = 2000
dataset_limit['kinship'] = 10000
dataset_limit['umls'] = 10000
model_colors = {'brescal':color[0], 'amdc_pop':color[8], 'amdc_pred':color[7],
'bcomp_mul':color[2], 'bcomp_add':color[3], 'logit':color[5],
'rescal':'grey',
'brescal_passive':'grey', 'bcomp_mul_10': color[8], 'bcomp_add_10':color[9],
'bcomp_mul_comp_10':color[11], 'bcomp_add_comp_10':color[13],
'bcomp_mul_var_1_comp_10': color[15], 'bcomp_add_var_1_comp_10': color[17]}
model_names = {'rescal':'rescal', 'brescal':'pnormal-ts', 'amdc_pop':'amdc-pop', 'amdc_pred':'amdc-pred',
'bcomp_mul':'pcomp-mul-ts', 'bcomp_add':'pcomp-add-ts', 'logit':'plogit-ts',
'brescal_passive':'pnormal'}
In [5]:
for i in range(len(color)):
plt.bar(i, 1, color=color[i])
In [8]:
legend_size = 9
plt_gap = 10
auc_plt_gap = 50
title_size = 12
#linestyles = ['--', '-.', '-.', ':', '-', '-']
markers = ('o', 'v', '^', '<', '>', 's', 'p', '*', 'h', 'H', 'D', 'd', '8')
vertical=True
datasets = ['nation', 'kinship', 'umls']
#models = ['brescal', 'logit', 'amdc_pop', 'amdc_pred', 'brescal_passive', 'bcomp_mul', 'bcomp_add']
models = ['brescal', 'logit', 'amdc_pop', 'amdc_pred', 'bcomp_mul', 'bcomp_add']
#models = ['brescal', 'bcomp_mul', 'bcomp_add']
#models = ['brescal', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
#models = ['brescal', 'logit', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
for dataset in datasets:
if dataset is 'nation':
marker_gap = 500
width_gap = 100
line_marker_gap = 100
else:
marker_gap = 2500
width_gap = 500
line_marker_gap = 500
T = load_dataset(dataset)
n_test = 5
x_lim = dataset_limit[dataset]
summary = dict()
auc_summary = dict()
min_len = dict()
for model in models:
summary[model] = np.zeros([n_test, x_lim])
auc_summary[model] = np.zeros([n_test, x_lim])
min_len[model] = x_lim
for nt in range(n_test):
for model in models:
if model == 'brescal':
auc_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d_eval.txt' % (dataset, nt)
query_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d.txt' %(dataset, nt)
elif model == 'bcomp_mul':
var_comp = model_var_comp[(dataset,model)]
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)
# for the additional mcmc steps
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10.txt' %(dataset, model, var_comp, nt)
elif model == 'bcomp_add':
var_comp = model_var_comp[(dataset,model)]
if dataset == 'umls':
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)
else:
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5.txt' %(dataset, model, var_comp, nt)
elif model == 'amdc_pop':
auc_file = '../result/%s/amdc_pn/auc_population_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
query_file = '../result/%s/amdc_pn/query_population_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
elif model == 'amdc_pred':
auc_file = '../result/%s/amdc_pn/auc_predictive_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
query_file = '../result/%s/amdc_pn/query_predictive_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
elif model == 'brescal_passive':
auc_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d_eval.txt' % (dataset, nt)
query_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d.txt' %(dataset, nt)
elif model == 'logit':
auc_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d_eval.txt' % (dataset, model, nt)
query_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d.txt' %(dataset, model, nt)
else:
raise Error('There is no such model')
seq = [line.split(',') for line in open(query_file, 'r').readlines()]
if not model.startswith('amdc'):
cum_sum = np.cumsum([T[s] for s in seq])
else:
cum_sum = np.cumsum([T[s[2],s[0],s[1]] for s in seq])
x_min = min(len(cum_sum), x_lim)
summary[model][nt, :x_min] = cum_sum[:x_min]
auc_sum = [float(x) for x in open(auc_file).readlines()]
x_min = min(x_min, len(auc_sum))
auc_summary[model][nt, :x_min] = auc_sum[:x_min]
if min_len[model] > x_min:
min_len[model] = x_min
if vertical:
fig = plt.figure(figsize=(4,6))
else:
fig = plt.figure(figsize=(12,3))
# plt.suptitle(dataset.upper())
for model in models:
cum_sum = np.mean(summary[model], 0)[:min_len[model]]
tmp = np.zeros(len(cum_sum) + 1)
tmp[:len(cum_sum)] = cum_sum
tmp[len(cum_sum)] = cum_sum[-1]
cum_sum = tmp
_color = model_colors[model]
model_name = model_names[model].upper()
if vertical:
plt.subplot(2, 1, 1)
else:
plt.subplot(1, 2, 1)
width = (marker_gap-width_gap) / float(len(models))
#line plot
plt.plot(np.arange(0,len(cum_sum), plt_gap), cum_sum[::plt_gap], color=_color)
plt.plot(np.arange(0,len(cum_sum), line_marker_gap), cum_sum[::line_marker_gap], markers[models.index(model)], label=model_name, color=_color)
#bar plot
# plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), cum_sum[::marker_gap], width, label=model_name, color=_color)
# plt.plot(np.arange(models.index(model)*100, len(cum_sum), marker_gap), cum_sum[models.index(model)*100:len(cum_sum):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
plt.title('Cumulative gain', size=title_size)
plt.xlim([0,x_lim])
# plt.xticks(np.arange(0,len(cum_sum), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
plt.locator_params(axis = 'y', nbins = 5)
plt.locator_params(axis = 'x', nbins = 6)
if vertical:
plt.subplot(2, 1, 2)
else:
plt.subplot(1, 2, 2)
auc = np.mean(auc_summary[model], 0)[:min_len[model]]
tmp = np.zeros(len(auc) + 1)
tmp[:len(auc)] = auc
tmp[len(auc)] = auc[-1]
auc = tmp
# plt.plot(np.arange(0,len(auc), auc_plt_gap), auc[::auc_plt_gap], color=_color)
plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), auc[::marker_gap], width, label=model_name, color=_color)
# plt.plot(np.arange(models.index(model)*100,len(auc), marker_gap), auc[models.index(model)*100:len(auc):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
plt.title('ROC-AUC score', size=title_size)
# plt.xlim([0,x_lim])
plt.ylim([0.4, 1])
plt.xlabel('\# of Queries', size=10)
plt.xticks(np.arange(0,len(auc), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
plt.locator_params(axis = 'y', nbins = 5)
plt.locator_params(axis = 'x', nbins = 6)
if dataset == 'kinship':
if vertical:
plt.subplot(2, 1, 1)
else:
plt.subplot(1, 2, 1)
plt.legend(loc='upper left', numpoints=1, frameon=False, prop={'size':legend_size})
if vertical:
plt.savefig('../paper/cikm2016/images/thompson_%s_mcmc_vertical_line_poster.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
else:
plt.savefig('../paper/cikm2016/images/thompson_%s_mcmc_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
In [30]:
legend_size = 9
plt_gap = 10
auc_plt_gap = 50
title_size = 12
#linestyles = ['--', '-.', '-.', ':', '-', '-']
markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd')
vertical=True
datasets = ['nation', 'kinship', 'umls']
models = ['brescal', 'logit', 'amdc_pop', 'amdc_pred', 'brescal_passive']
#models = ['brescal', 'bcomp_mul', 'bcomp_add']
#models = ['brescal', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
#models = ['brescal', 'logit', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
for dataset in datasets:
if dataset is 'nation':
marker_gap = 500
width_gap = 100
line_marker_gap = 100
else:
marker_gap = 2500
width_gap = 500
line_marker_gap = 500
T = load_dataset(dataset)
n_test = 5
x_lim = dataset_limit[dataset]
summary = dict()
auc_summary = dict()
min_len = dict()
for model in models:
summary[model] = np.zeros([n_test, x_lim])
auc_summary[model] = np.zeros([n_test, x_lim])
min_len[model] = x_lim
for nt in range(n_test):
for model in models:
if model == 'brescal':
auc_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d_eval.txt' % (dataset, nt)
query_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d.txt' %(dataset, nt)
elif model == 'bcomp_mul':
var_comp = model_var_comp[(dataset,model)]
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)
# for the additional mcmc steps
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10.txt' %(dataset, model, var_comp, nt)
elif model == 'bcomp_add':
var_comp = model_var_comp[(dataset,model)]
if dataset == 'umls':
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)
else:
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5.txt' %(dataset, model, var_comp, nt)
elif model == 'amdc_pop':
auc_file = '../result/%s/amdc_pn/auc_population_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
query_file = '../result/%s/amdc_pn/query_population_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
elif model == 'amdc_pred':
auc_file = '../result/%s/amdc_pn/auc_predictive_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
query_file = '../result/%s/amdc_pn/query_predictive_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
elif model == 'brescal_passive':
auc_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d_eval.txt' % (dataset, nt)
query_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d.txt' %(dataset, nt)
elif model == 'logit':
auc_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d_eval.txt' % (dataset, model, nt)
query_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d.txt' %(dataset, model, nt)
else:
raise Error('There is no such model')
seq = [line.split(',') for line in open(query_file, 'r').readlines()]
if not model.startswith('amdc'):
cum_sum = np.cumsum([T[s] for s in seq])
else:
cum_sum = np.cumsum([T[s[2],s[0],s[1]] for s in seq])
x_min = min(len(cum_sum), x_lim)
summary[model][nt, :x_min] = cum_sum[:x_min]
auc_sum = [float(x) for x in open(auc_file).readlines()]
x_min = min(x_min, len(auc_sum))
auc_summary[model][nt, :x_min] = auc_sum[:x_min]
if min_len[model] > x_min:
min_len[model] = x_min
if vertical:
fig = plt.figure(figsize=(4,6))
else:
fig = plt.figure(figsize=(12,3))
# plt.suptitle(dataset.upper())
for model in models:
cum_sum = np.mean(summary[model], 0)[:min_len[model]]
tmp = np.zeros(len(cum_sum) + 1)
tmp[:len(cum_sum)] = cum_sum
tmp[len(cum_sum)] = cum_sum[-1]
cum_sum = tmp
_color = model_colors[model]
model_name = model_names[model].upper()
if vertical:
plt.subplot(2, 1, 1)
else:
plt.subplot(1, 2, 1)
width = (marker_gap-width_gap) / float(len(models))
#line plot
plt.plot(np.arange(0,len(cum_sum), plt_gap), cum_sum[::plt_gap], color=_color)
plt.plot(np.arange(0,len(cum_sum), line_marker_gap), cum_sum[::line_marker_gap], markers[models.index(model)], label=model_name, color=_color)
#bar plot
# plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), cum_sum[::marker_gap], width, label=model_name, color=_color)
# plt.plot(np.arange(models.index(model)*100, len(cum_sum), marker_gap), cum_sum[models.index(model)*100:len(cum_sum):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
plt.title('Cumulative gain', size=title_size)
plt.xlim([0,x_lim])
# plt.xticks(np.arange(0,len(cum_sum), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
plt.locator_params(axis = 'y', nbins = 5)
plt.locator_params(axis = 'x', nbins = 6)
if vertical:
plt.subplot(2, 1, 2)
else:
plt.subplot(1, 2, 2)
auc = np.mean(auc_summary[model], 0)[:min_len[model]]
tmp = np.zeros(len(auc) + 1)
tmp[:len(auc)] = auc
tmp[len(auc)] = auc[-1]
auc = tmp
# plt.plot(np.arange(0,len(auc), auc_plt_gap), auc[::auc_plt_gap], color=_color)
plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), auc[::marker_gap], width, label=model_name, color=_color)
# plt.plot(np.arange(models.index(model)*100,len(auc), marker_gap), auc[models.index(model)*100:len(auc):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
plt.title('ROC-AUC score', size=title_size)
# plt.xlim([0,x_lim])
plt.ylim([0.4, 1])
plt.xticks(np.arange(0,len(auc), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
plt.locator_params(axis = 'y', nbins = 5)
plt.locator_params(axis = 'x', nbins = 6)
if dataset == 'kinship':
if vertical:
plt.subplot(2, 1, 1)
else:
plt.subplot(1, 2, 1)
plt.legend(loc='upper left', numpoints=1, frameon=False, prop={'size':legend_size})
if vertical:
plt.savefig('../paper/cikm2016/images/present_vs_passive_greedy_thompson_%s_mcmc_vertical_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
else:
plt.savefig('../paper/cikm2016/images/present_vs_passive_greedy_thompson_%s_mcmc_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
In [17]:
legend_size = 9
plt_gap = 10
auc_plt_gap = 50
title_size = 12
#linestyles = ['--', '-.', '-.', ':', '-', '-']
markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd')
vertical=True
datasets = ['nation', 'kinship', 'umls']
#models = ['brescal', 'logit', 'amdc_pop', 'amdc_pred', 'brescal_passive']
models = ['brescal', 'bcomp_mul', 'bcomp_add']
#models = ['brescal', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
#models = ['brescal', 'logit', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
for dataset in datasets:
if dataset is 'nation':
marker_gap = 500
width_gap = 100
line_marker_gap = 100
else:
marker_gap = 2500
width_gap = 500
line_marker_gap = 500
T = load_dataset(dataset)
n_test = 5
x_lim = dataset_limit[dataset]
summary = dict()
auc_summary = dict()
min_len = dict()
for model in models:
summary[model] = np.zeros([n_test, x_lim])
auc_summary[model] = np.zeros([n_test, x_lim])
min_len[model] = x_lim
for nt in range(n_test):
for model in models:
if model == 'brescal':
auc_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d_eval.txt' % (dataset, nt)
query_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d.txt' %(dataset, nt)
elif model == 'bcomp_mul':
var_comp = model_var_comp[(dataset,model)]
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)
# for the additional mcmc steps
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10.txt' %(dataset, model, var_comp, nt)
elif model == 'bcomp_add':
var_comp = model_var_comp[(dataset,model)]
if dataset == 'umls':
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)
else:
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5.txt' %(dataset, model, var_comp, nt)
elif model == 'amdc_pop':
auc_file = '../result/%s/amdc_pn/auc_population_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
query_file = '../result/%s/amdc_pn/query_population_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
elif model == 'amdc_pred':
auc_file = '../result/%s/amdc_pn/auc_predictive_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
query_file = '../result/%s/amdc_pn/query_predictive_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
elif model == 'brescal_passive':
auc_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d_eval.txt' % (dataset, nt)
query_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d.txt' %(dataset, nt)
elif model == 'logit':
auc_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d_eval.txt' % (dataset, model, nt)
query_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d.txt' %(dataset, model, nt)
else:
raise Error('There is no such model')
seq = [line.split(',') for line in open(query_file, 'r').readlines()]
if not model.startswith('amdc'):
cum_sum = np.cumsum([T[s] for s in seq])
else:
cum_sum = np.cumsum([T[s[2],s[0],s[1]] for s in seq])
x_min = min(len(cum_sum), x_lim)
summary[model][nt, :x_min] = cum_sum[:x_min]
auc_sum = [float(x) for x in open(auc_file).readlines()]
x_min = min(x_min, len(auc_sum))
auc_summary[model][nt, :x_min] = auc_sum[:x_min]
if min_len[model] > x_min:
min_len[model] = x_min
if vertical:
fig = plt.figure(figsize=(4,6))
else:
fig = plt.figure(figsize=(12,3))
# plt.suptitle(dataset.upper())
for model in models:
cum_sum = np.mean(summary[model], 0)[:min_len[model]]
tmp = np.zeros(len(cum_sum) + 1)
tmp[:len(cum_sum)] = cum_sum
tmp[len(cum_sum)] = cum_sum[-1]
cum_sum = tmp
_color = model_colors[model]
model_name = model_names[model].upper()
if vertical:
plt.subplot(2, 1, 1)
else:
plt.subplot(1, 2, 1)
width = (marker_gap-width_gap) / float(len(models))
#line plot
plt.plot(np.arange(0,len(cum_sum), plt_gap), cum_sum[::plt_gap], color=_color)
plt.plot(np.arange(0,len(cum_sum), line_marker_gap), cum_sum[::line_marker_gap], markers[models.index(model)], label=model_name, color=_color)
#bar plot
# plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), cum_sum[::marker_gap], width, label=model_name, color=_color)
# plt.plot(np.arange(models.index(model)*100, len(cum_sum), marker_gap), cum_sum[models.index(model)*100:len(cum_sum):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
plt.title('Cumulative gain', size=title_size)
plt.xlim([0,x_lim])
# plt.xticks(np.arange(0,len(cum_sum), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
plt.locator_params(axis = 'y', nbins = 5)
plt.locator_params(axis = 'x', nbins = 6)
if vertical:
plt.subplot(2, 1, 2)
else:
plt.subplot(1, 2, 2)
auc = np.mean(auc_summary[model], 0)[:min_len[model]]
tmp = np.zeros(len(auc) + 1)
tmp[:len(auc)] = auc
tmp[len(auc)] = auc[-1]
auc = tmp
# plt.plot(np.arange(0,len(auc), auc_plt_gap), auc[::auc_plt_gap], color=_color)
plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), auc[::marker_gap], width, label=model_name, color=_color)
# plt.plot(np.arange(models.index(model)*100,len(auc), marker_gap), auc[models.index(model)*100:len(auc):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
plt.title('ROC-AUC score', size=title_size)
# plt.xlim([0,x_lim])
plt.ylim([0.4, 1])
plt.xticks(np.arange(0,len(auc), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
plt.locator_params(axis = 'y', nbins = 5)
plt.locator_params(axis = 'x', nbins = 6)
if dataset == 'kinship':
if vertical:
plt.subplot(2, 1, 1)
else:
plt.subplot(1, 2, 1)
plt.legend(loc='upper left', numpoints=1, frameon=False, prop={'size':legend_size})
if vertical:
plt.savefig('../paper/cikm2016/images/present_vs_passive_greedy_thompson_%s_mcmc_vertical_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
else:
plt.savefig('../paper/cikm2016/images/present_vs_passive_greedy_thompson_%s_mcmc_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
In [7]:
model_var_comp = dict()
# model_var_comp[('nation', 'bcomp_mul')] = 100.00
model_var_comp[('nation', 'bcomp_mul')] = 1.00 # for the additional mcmc steps
model_var_comp[('kinship', 'bcomp_mul')] = 100.00
model_var_comp[('nation', 'bcomp_add')] = 10000.00
model_var_comp[('kinship', 'bcomp_add')] = 10000.00
# model_var_comp[('umls', 'bcomp_mul')] = 1.00
model_var_comp[('umls', 'bcomp_mul')] = 10.00 # for the additional mcmc steps
model_var_comp[('umls', 'bcomp_add')] = 1.00
dataset_limit = dict()
dataset_limit['nation'] = 2000
dataset_limit['kinship'] = 10000
dataset_limit['umls'] = 10000
model_colors = {'brescal':color[0], 'amdc_pop':color[8], 'amdc_pred':color[7],
'bcomp_mul':color[2], 'bcomp_add':color[3], 'logit':color[5],
'rescal':'grey',
'brescal_passive':'grey', 'bcomp_mul_10': color[8], 'bcomp_add_10':color[9],
'bcomp_mul_comp_10':color[11], 'bcomp_add_comp_10':color[13],
'bcomp_mul_var_1_comp_10': color[15], 'bcomp_add_var_1_comp_10': color[17]}
model_names = {'rescal':'rescal', 'brescal':'prescal-ts', 'amdc_pop':'amdc-pop', 'amdc_pred':'amdc-pred',
'bcomp_mul':'compositional-ts', 'bcomp_add':'pcomp-add-ts', 'logit':'plogit-ts',
'brescal_passive':'prescal-without-ts'}
In [6]:
legend_size = 9
plt_gap = 10
auc_plt_gap = 50
title_size = 12
#linestyles = ['--', '-.', '-.', ':', '-', '-']
markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd')
vertical=True
datasets = ['nation', 'kinship', 'umls']
models = ['brescal', 'amdc_pop', 'amdc_pred']
#models = ['brescal', 'bcomp_mul', 'bcomp_add']
#models = ['brescal', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
#models = ['brescal', 'logit', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
for dataset in datasets:
if dataset is 'nation':
marker_gap = 500
width_gap = 100
line_marker_gap = 100
else:
marker_gap = 2500
width_gap = 500
line_marker_gap = 500
T = load_dataset(dataset)
n_test = 5
x_lim = dataset_limit[dataset]
summary = dict()
auc_summary = dict()
min_len = dict()
for model in models:
summary[model] = np.zeros([n_test, x_lim])
auc_summary[model] = np.zeros([n_test, x_lim])
min_len[model] = x_lim
for nt in range(n_test):
for model in models:
if model == 'brescal':
auc_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d_eval.txt' % (dataset, nt)
query_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d.txt' %(dataset, nt)
elif model == 'bcomp_mul':
var_comp = model_var_comp[(dataset,model)]
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)
# for the additional mcmc steps
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10.txt' %(dataset, model, var_comp, nt)
elif model == 'bcomp_add':
var_comp = model_var_comp[(dataset,model)]
if dataset == 'umls':
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)
else:
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5.txt' %(dataset, model, var_comp, nt)
elif model == 'amdc_pop':
auc_file = '../result/%s/amdc_pn/auc_population_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
query_file = '../result/%s/amdc_pn/query_population_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
elif model == 'amdc_pred':
auc_file = '../result/%s/amdc_pn/auc_predictive_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
query_file = '../result/%s/amdc_pn/query_predictive_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
elif model == 'brescal_passive':
auc_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d_eval.txt' % (dataset, nt)
query_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d.txt' %(dataset, nt)
elif model == 'logit':
auc_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d_eval.txt' % (dataset, model, nt)
query_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d.txt' %(dataset, model, nt)
else:
raise Error('There is no such model')
seq = [line.split(',') for line in open(query_file, 'r').readlines()]
if not model.startswith('amdc'):
cum_sum = np.cumsum([T[s] for s in seq])
else:
cum_sum = np.cumsum([T[s[2],s[0],s[1]] for s in seq])
x_min = min(len(cum_sum), x_lim)
summary[model][nt, :x_min] = cum_sum[:x_min]
auc_sum = [float(x) for x in open(auc_file).readlines()]
x_min = min(x_min, len(auc_sum))
auc_summary[model][nt, :x_min] = auc_sum[:x_min]
if min_len[model] > x_min:
min_len[model] = x_min
if vertical:
fig = plt.figure(figsize=(4,6))
else:
fig = plt.figure(figsize=(12,3))
# plt.suptitle(dataset.upper())
for model in models:
cum_sum = np.mean(summary[model], 0)[:min_len[model]]
tmp = np.zeros(len(cum_sum) + 1)
tmp[:len(cum_sum)] = cum_sum
tmp[len(cum_sum)] = cum_sum[-1]
cum_sum = tmp
_color = model_colors[model]
model_name = model_names[model].upper()
if vertical:
plt.subplot(2, 1, 1)
else:
plt.subplot(1, 2, 1)
width = (marker_gap-width_gap) / float(len(models))
#line plot
plt.plot(np.arange(0,len(cum_sum), plt_gap), cum_sum[::plt_gap], color=_color)
plt.plot(np.arange(0,len(cum_sum), line_marker_gap), cum_sum[::line_marker_gap], markers[models.index(model)], label=model_name, color=_color)
#bar plot
# plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), cum_sum[::marker_gap], width, label=model_name, color=_color)
# plt.plot(np.arange(models.index(model)*100, len(cum_sum), marker_gap), cum_sum[models.index(model)*100:len(cum_sum):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
plt.title('Cumulative gain', size=title_size)
plt.xlim([0,x_lim])
# plt.xticks(np.arange(0,len(cum_sum), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
plt.locator_params(axis = 'y', nbins = 5)
plt.locator_params(axis = 'x', nbins = 6)
if vertical:
plt.subplot(2, 1, 2)
else:
plt.subplot(1, 2, 2)
auc = np.mean(auc_summary[model], 0)[:min_len[model]]
tmp = np.zeros(len(auc) + 1)
tmp[:len(auc)] = auc
tmp[len(auc)] = auc[-1]
auc = tmp
# plt.plot(np.arange(0,len(auc), auc_plt_gap), auc[::auc_plt_gap], color=_color)
plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), auc[::marker_gap], width, label=model_name, color=_color)
# plt.plot(np.arange(models.index(model)*100,len(auc), marker_gap), auc[models.index(model)*100:len(auc):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
plt.title('ROC-AUC score', size=title_size)
# plt.xlim([0,x_lim])
plt.ylim([0.4, 1])
plt.xticks(np.arange(0,len(auc), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
plt.locator_params(axis = 'y', nbins = 5)
plt.locator_params(axis = 'x', nbins = 6)
if dataset == 'kinship':
if vertical:
plt.subplot(2, 1, 1)
else:
plt.subplot(1, 2, 1)
plt.legend(loc='upper left', numpoints=1, frameon=False, prop={'size':legend_size})
if vertical:
plt.savefig('../paper/cikm2016/images/presentation_thompson_%s_mcmc_vertical_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
else:
plt.savefig('../paper/cikm2016/images/presentation_thompson_%s_mcmc_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
In [9]:
legend_size = 9
plt_gap = 10
auc_plt_gap = 50
title_size = 12
#linestyles = ['--', '-.', '-.', ':', '-', '-']
markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd')
vertical=True
datasets = ['nation', 'kinship', 'umls']
#models = ['brescal', 'logit', 'amdc_pop', 'amdc_pred', 'brescal_passive']
models = ['brescal', 'bcomp_mul']
#models = ['brescal', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
#models = ['brescal', 'logit', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
for dataset in datasets:
if dataset is 'nation':
marker_gap = 500
width_gap = 100
line_marker_gap = 100
else:
marker_gap = 2500
width_gap = 500
line_marker_gap = 500
T = load_dataset(dataset)
n_test = 5
x_lim = dataset_limit[dataset]
summary = dict()
auc_summary = dict()
min_len = dict()
for model in models:
summary[model] = np.zeros([n_test, x_lim])
auc_summary[model] = np.zeros([n_test, x_lim])
min_len[model] = x_lim
for nt in range(n_test):
for model in models:
if model == 'brescal':
auc_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d_eval.txt' % (dataset, nt)
query_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d.txt' %(dataset, nt)
elif model == 'bcomp_mul':
var_comp = model_var_comp[(dataset,model)]
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)
# for the additional mcmc steps
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10.txt' %(dataset, model, var_comp, nt)
elif model == 'bcomp_add':
var_comp = model_var_comp[(dataset,model)]
if dataset == 'umls':
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)
else:
auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5_eval.txt' %(dataset, model, var_comp, nt)
query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5.txt' %(dataset, model, var_comp, nt)
elif model == 'amdc_pop':
auc_file = '../result/%s/amdc_pn/auc_population_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
query_file = '../result/%s/amdc_pn/query_population_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
elif model == 'amdc_pred':
auc_file = '../result/%s/amdc_pn/auc_predictive_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
query_file = '../result/%s/amdc_pn/query_predictive_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
elif model == 'brescal_passive':
auc_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d_eval.txt' % (dataset, nt)
query_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d.txt' %(dataset, nt)
elif model == 'logit':
auc_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d_eval.txt' % (dataset, model, nt)
query_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d.txt' %(dataset, model, nt)
else:
raise Error('There is no such model')
seq = [line.split(',') for line in open(query_file, 'r').readlines()]
if not model.startswith('amdc'):
cum_sum = np.cumsum([T[s] for s in seq])
else:
cum_sum = np.cumsum([T[s[2],s[0],s[1]] for s in seq])
x_min = min(len(cum_sum), x_lim)
summary[model][nt, :x_min] = cum_sum[:x_min]
auc_sum = [float(x) for x in open(auc_file).readlines()]
x_min = min(x_min, len(auc_sum))
auc_summary[model][nt, :x_min] = auc_sum[:x_min]
if min_len[model] > x_min:
min_len[model] = x_min
if vertical:
fig = plt.figure(figsize=(4,6))
else:
fig = plt.figure(figsize=(12,3))
# plt.suptitle(dataset.upper())
for model in models:
cum_sum = np.mean(summary[model], 0)[:min_len[model]]
tmp = np.zeros(len(cum_sum) + 1)
tmp[:len(cum_sum)] = cum_sum
tmp[len(cum_sum)] = cum_sum[-1]
cum_sum = tmp
_color = model_colors[model]
model_name = model_names[model].upper()
if vertical:
plt.subplot(2, 1, 1)
else:
plt.subplot(1, 2, 1)
width = (marker_gap-width_gap) / float(len(models))
#line plot
plt.plot(np.arange(0,len(cum_sum), plt_gap), cum_sum[::plt_gap], color=_color)
plt.plot(np.arange(0,len(cum_sum), line_marker_gap), cum_sum[::line_marker_gap], markers[models.index(model)], label=model_name, color=_color)
#bar plot
# plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), cum_sum[::marker_gap], width, label=model_name, color=_color)
# plt.plot(np.arange(models.index(model)*100, len(cum_sum), marker_gap), cum_sum[models.index(model)*100:len(cum_sum):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
plt.title('Cumulative gain', size=title_size)
plt.xlim([0,x_lim])
# plt.xticks(np.arange(0,len(cum_sum), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
plt.locator_params(axis = 'y', nbins = 5)
plt.locator_params(axis = 'x', nbins = 6)
if vertical:
plt.subplot(2, 1, 2)
else:
plt.subplot(1, 2, 2)
auc = np.mean(auc_summary[model], 0)[:min_len[model]]
tmp = np.zeros(len(auc) + 1)
tmp[:len(auc)] = auc
tmp[len(auc)] = auc[-1]
auc = tmp
# plt.plot(np.arange(0,len(auc), auc_plt_gap), auc[::auc_plt_gap], color=_color)
plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), auc[::marker_gap], width, label=model_name, color=_color)
# plt.plot(np.arange(models.index(model)*100,len(auc), marker_gap), auc[models.index(model)*100:len(auc):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
plt.title('ROC-AUC score', size=title_size)
# plt.xlim([0,x_lim])
plt.ylim([0.4, 1])
plt.xticks(np.arange(0,len(auc), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
plt.locator_params(axis = 'y', nbins = 5)
plt.locator_params(axis = 'x', nbins = 6)
if dataset == 'kinship':
if vertical:
plt.subplot(2, 1, 1)
else:
plt.subplot(1, 2, 1)
plt.legend(loc='upper left', numpoints=1, frameon=False, prop={'size':legend_size})
if vertical:
plt.savefig('../paper/cikm2016/images/presentation_add_thompson_%s_mcmc_vertical_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
else:
plt.savefig('../paper/cikm2016/images/presentation_add_thompson_%s_mcmc_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
In [ ]: