In [1]:
import os
import sys
import pickle
import numpy as np
from scipy.io.matlab import loadmat
from scipy.sparse import csr_matrix
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.markers import MarkerStyle
import itertools

from almc.bayesian_rescal import PFBayesianRescal, compute_regret
%matplotlib inline

In [2]:
def load_dataset(dataset):
    if dataset == 'umls':
        mat = loadmat('../data/%s/uml.mat' % (dataset))
        T = np.array(mat['Rs'], np.float32)
    elif dataset == 'nation':
        mat = loadmat('../data/%s/dnations.mat' % (dataset))
        T = np.array(mat['R'], np.float32)
    elif dataset == 'kinship':
        mat = loadmat('../data/%s/alyawarradata.mat' % (dataset))
        T = np.array(mat['Rs'], np.float32)
    elif dataset == 'wordnet':
        T = pickle.load(open('../data/%s/reduced_wordnet.pkl' % (dataset), 'rb'))
    elif dataset == 'freebase':
        T, _, _ = pickle.load(open('../data/freebase/subset_5000.pkl', 'rb'))

    if dataset == 'umls' or dataset == 'nation' or dataset == 'kinship':
        T = np.swapaxes(T, 1, 2)
        T = np.swapaxes(T, 0, 1)  # [relation, entity, entity]
        T[np.isnan(T)] = 0
    return T

In [3]:
color = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),    
             (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),    
             (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),    
             (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),    
             (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]    
  
# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.    
for i in range(len(color)):    
    r, g, b = color[i]    
    color[i] = (r / 255., g / 255., b / 255.)

In [4]:
model_var_comp = dict()
# model_var_comp[('nation', 'bcomp_mul')] = 100.00
model_var_comp[('nation', 'bcomp_mul')] = 1.00   # for the additional mcmc steps
model_var_comp[('kinship', 'bcomp_mul')] = 100.00
model_var_comp[('nation', 'bcomp_add')] = 10000.00
model_var_comp[('kinship', 'bcomp_add')] = 10000.00
# model_var_comp[('umls', 'bcomp_mul')] = 1.00
model_var_comp[('umls', 'bcomp_mul')] = 10.00    # for the additional mcmc steps
model_var_comp[('umls', 'bcomp_add')] = 1.00
dataset_limit = dict()
dataset_limit['nation'] = 2000
dataset_limit['kinship'] = 10000
dataset_limit['umls'] = 10000

model_colors = {'brescal':color[0], 'amdc_pop':color[8], 'amdc_pred':color[7], 
                'bcomp_mul':color[2], 'bcomp_add':color[3], 'logit':color[5], 
                'rescal':'grey',
                'brescal_passive':'grey', 'bcomp_mul_10': color[8], 'bcomp_add_10':color[9], 
                'bcomp_mul_comp_10':color[11], 'bcomp_add_comp_10':color[13], 
                'bcomp_mul_var_1_comp_10': color[15], 'bcomp_add_var_1_comp_10': color[17]}
model_names = {'rescal':'rescal', 'brescal':'pnormal-ts', 'amdc_pop':'amdc-pop', 'amdc_pred':'amdc-pred', 
               'bcomp_mul':'pcomp-mul-ts', 'bcomp_add':'pcomp-add-ts', 'logit':'plogit-ts', 
               'brescal_passive':'pnormal'}

In [5]:
for i in range(len(color)):
    plt.bar(i, 1, color=color[i])


Combined image


In [8]:
legend_size = 9
plt_gap = 10
auc_plt_gap = 50
title_size = 12
#linestyles = ['--', '-.', '-.', ':', '-', '-']
markers = ('o', 'v', '^', '<', '>', 's', 'p', '*', 'h', 'H', 'D', 'd', '8')

vertical=True
datasets = ['nation', 'kinship', 'umls']
#models = ['brescal', 'logit', 'amdc_pop', 'amdc_pred', 'brescal_passive', 'bcomp_mul', 'bcomp_add']
models = ['brescal', 'logit', 'amdc_pop', 'amdc_pred', 'bcomp_mul', 'bcomp_add']
#models = ['brescal', 'bcomp_mul', 'bcomp_add']
#models = ['brescal', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
#models = ['brescal', 'logit', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']

for dataset in datasets:
    if dataset is 'nation':
        marker_gap = 500
        width_gap = 100
        line_marker_gap = 100
    else:
        marker_gap = 2500
        width_gap = 500
        line_marker_gap = 500
        
    T = load_dataset(dataset)

    n_test = 5
    x_lim = dataset_limit[dataset]
    summary = dict()
    auc_summary = dict()
    min_len = dict()
    for model in models:
        summary[model] = np.zeros([n_test, x_lim])
        auc_summary[model] = np.zeros([n_test, x_lim])
        min_len[model] = x_lim

    for nt in range(n_test):  

        for model in models:
            if model == 'brescal':
                auc_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d_eval.txt' % (dataset, nt)
                query_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d.txt' %(dataset, nt)
            elif model == 'bcomp_mul': 
                var_comp = model_var_comp[(dataset,model)]
                auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
                query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)   
                # for the additional mcmc steps
                auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10_eval.txt' %(dataset, model, var_comp, nt)
                query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10.txt' %(dataset, model, var_comp, nt)   
            elif model == 'bcomp_add':
                var_comp = model_var_comp[(dataset,model)]
                if dataset == 'umls':
                    auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
                    query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)                                                            
                else:
                    auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5_eval.txt' %(dataset, model, var_comp, nt)
                    query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5.txt' %(dataset, model, var_comp, nt)                                        
            elif model == 'amdc_pop':
                auc_file = '../result/%s/amdc_pn/auc_population_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
                query_file = '../result/%s/amdc_pn/query_population_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)            
            elif model == 'amdc_pred':
                auc_file = '../result/%s/amdc_pn/auc_predictive_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
                query_file = '../result/%s/amdc_pn/query_predictive_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
            elif model == 'brescal_passive':
                auc_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d_eval.txt' % (dataset, nt)
                query_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d.txt' %(dataset, nt)            
            elif model == 'logit':
                auc_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d_eval.txt' % (dataset, model, nt)
                query_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d.txt' %(dataset, model, nt)                            
            else:
                raise Error('There is no such model')

            seq = [line.split(',') for line in open(query_file, 'r').readlines()]
            if not model.startswith('amdc'):
                cum_sum = np.cumsum([T[s] for s in seq])
            else:
                cum_sum = np.cumsum([T[s[2],s[0],s[1]] for s in seq])

            x_min = min(len(cum_sum), x_lim)

            summary[model][nt, :x_min] = cum_sum[:x_min]
            auc_sum = [float(x) for x in open(auc_file).readlines()]
            x_min = min(x_min, len(auc_sum))

            auc_summary[model][nt, :x_min] = auc_sum[:x_min]

            if min_len[model] > x_min:
                min_len[model] = x_min
                
    if vertical:
        fig = plt.figure(figsize=(4,6))        
    else:
        fig = plt.figure(figsize=(12,3))
#     plt.suptitle(dataset.upper())
    for model in models:
        cum_sum = np.mean(summary[model], 0)[:min_len[model]]
        tmp = np.zeros(len(cum_sum) + 1)
        tmp[:len(cum_sum)] = cum_sum
        tmp[len(cum_sum)] = cum_sum[-1]
        cum_sum = tmp

        _color = model_colors[model]
        model_name = model_names[model].upper()

        if vertical:
            plt.subplot(2, 1, 1)        
        else:
            plt.subplot(1, 2, 1)

        width = (marker_gap-width_gap) / float(len(models))
        #line plot
        plt.plot(np.arange(0,len(cum_sum), plt_gap), cum_sum[::plt_gap], color=_color)
        plt.plot(np.arange(0,len(cum_sum), line_marker_gap), cum_sum[::line_marker_gap], markers[models.index(model)], label=model_name, color=_color)
        #bar plot
#        plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), cum_sum[::marker_gap], width, label=model_name, color=_color)    
#         plt.plot(np.arange(models.index(model)*100, len(cum_sum), marker_gap), cum_sum[models.index(model)*100:len(cum_sum):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
        plt.title('Cumulative gain', size=title_size)
        plt.xlim([0,x_lim])
#        plt.xticks(np.arange(0,len(cum_sum), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
        plt.locator_params(axis = 'y', nbins = 5)
        plt.locator_params(axis = 'x', nbins = 6) 
        
        if vertical:
            plt.subplot(2, 1, 2)
        else:
            plt.subplot(1, 2, 2)
        auc = np.mean(auc_summary[model], 0)[:min_len[model]]
        tmp = np.zeros(len(auc) + 1)
        tmp[:len(auc)] = auc
        tmp[len(auc)] = auc[-1]        
        auc = tmp
        
#        plt.plot(np.arange(0,len(auc), auc_plt_gap), auc[::auc_plt_gap], color=_color)
        plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), auc[::marker_gap], width, label=model_name, color=_color)    
#        plt.plot(np.arange(models.index(model)*100,len(auc), marker_gap), auc[models.index(model)*100:len(auc):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)        
        plt.title('ROC-AUC score', size=title_size)
#        plt.xlim([0,x_lim])
        plt.ylim([0.4, 1])
        plt.xlabel('\# of Queries', size=10)
        plt.xticks(np.arange(0,len(auc), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])        
        plt.locator_params(axis = 'y', nbins = 5)
        plt.locator_params(axis = 'x', nbins = 6)
        
    if dataset == 'kinship':
        if vertical:
            plt.subplot(2, 1, 1)
        else:
            plt.subplot(1, 2, 1)
        plt.legend(loc='upper left', numpoints=1, frameon=False, prop={'size':legend_size})
    if vertical:
        plt.savefig('../paper/cikm2016/images/thompson_%s_mcmc_vertical_line_poster.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
    else:
        plt.savefig('../paper/cikm2016/images/thompson_%s_mcmc_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)


/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:76: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:78: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:76: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:78: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:76: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:78: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

In [30]:
legend_size = 9
plt_gap = 10
auc_plt_gap = 50
title_size = 12
#linestyles = ['--', '-.', '-.', ':', '-', '-']
markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd')

vertical=True
datasets = ['nation', 'kinship', 'umls']
models = ['brescal', 'logit', 'amdc_pop', 'amdc_pred', 'brescal_passive']
#models = ['brescal', 'bcomp_mul', 'bcomp_add']
#models = ['brescal', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
#models = ['brescal', 'logit', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']

for dataset in datasets:
    if dataset is 'nation':
        marker_gap = 500
        width_gap = 100
        line_marker_gap = 100
    else:
        marker_gap = 2500
        width_gap = 500
        line_marker_gap = 500
        
    T = load_dataset(dataset)

    n_test = 5
    x_lim = dataset_limit[dataset]
    summary = dict()
    auc_summary = dict()
    min_len = dict()
    for model in models:
        summary[model] = np.zeros([n_test, x_lim])
        auc_summary[model] = np.zeros([n_test, x_lim])
        min_len[model] = x_lim

    for nt in range(n_test):  

        for model in models:
            if model == 'brescal':
                auc_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d_eval.txt' % (dataset, nt)
                query_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d.txt' %(dataset, nt)
            elif model == 'bcomp_mul': 
                var_comp = model_var_comp[(dataset,model)]
                auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
                query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)   
                # for the additional mcmc steps
                auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10_eval.txt' %(dataset, model, var_comp, nt)
                query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10.txt' %(dataset, model, var_comp, nt)   
            elif model == 'bcomp_add':
                var_comp = model_var_comp[(dataset,model)]
                if dataset == 'umls':
                    auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
                    query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)                                                            
                else:
                    auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5_eval.txt' %(dataset, model, var_comp, nt)
                    query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5.txt' %(dataset, model, var_comp, nt)                                        
            elif model == 'amdc_pop':
                auc_file = '../result/%s/amdc_pn/auc_population_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
                query_file = '../result/%s/amdc_pn/query_population_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)            
            elif model == 'amdc_pred':
                auc_file = '../result/%s/amdc_pn/auc_predictive_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
                query_file = '../result/%s/amdc_pn/query_predictive_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
            elif model == 'brescal_passive':
                auc_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d_eval.txt' % (dataset, nt)
                query_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d.txt' %(dataset, nt)            
            elif model == 'logit':
                auc_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d_eval.txt' % (dataset, model, nt)
                query_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d.txt' %(dataset, model, nt)                            
            else:
                raise Error('There is no such model')

            seq = [line.split(',') for line in open(query_file, 'r').readlines()]
            if not model.startswith('amdc'):
                cum_sum = np.cumsum([T[s] for s in seq])
            else:
                cum_sum = np.cumsum([T[s[2],s[0],s[1]] for s in seq])

            x_min = min(len(cum_sum), x_lim)

            summary[model][nt, :x_min] = cum_sum[:x_min]
            auc_sum = [float(x) for x in open(auc_file).readlines()]
            x_min = min(x_min, len(auc_sum))

            auc_summary[model][nt, :x_min] = auc_sum[:x_min]

            if min_len[model] > x_min:
                min_len[model] = x_min
                
    if vertical:
        fig = plt.figure(figsize=(4,6))        
    else:
        fig = plt.figure(figsize=(12,3))
#     plt.suptitle(dataset.upper())
    for model in models:
        cum_sum = np.mean(summary[model], 0)[:min_len[model]]
        tmp = np.zeros(len(cum_sum) + 1)
        tmp[:len(cum_sum)] = cum_sum
        tmp[len(cum_sum)] = cum_sum[-1]
        cum_sum = tmp

        _color = model_colors[model]
        model_name = model_names[model].upper()

        if vertical:
            plt.subplot(2, 1, 1)        
        else:
            plt.subplot(1, 2, 1)

        width = (marker_gap-width_gap) / float(len(models))
        #line plot
        plt.plot(np.arange(0,len(cum_sum), plt_gap), cum_sum[::plt_gap], color=_color)
        plt.plot(np.arange(0,len(cum_sum), line_marker_gap), cum_sum[::line_marker_gap], markers[models.index(model)], label=model_name, color=_color)
        #bar plot
#        plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), cum_sum[::marker_gap], width, label=model_name, color=_color)    
#         plt.plot(np.arange(models.index(model)*100, len(cum_sum), marker_gap), cum_sum[models.index(model)*100:len(cum_sum):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
        plt.title('Cumulative gain', size=title_size)
        plt.xlim([0,x_lim])
#        plt.xticks(np.arange(0,len(cum_sum), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
        plt.locator_params(axis = 'y', nbins = 5)
        plt.locator_params(axis = 'x', nbins = 6) 
        
        if vertical:
            plt.subplot(2, 1, 2)
        else:
            plt.subplot(1, 2, 2)
        auc = np.mean(auc_summary[model], 0)[:min_len[model]]
        tmp = np.zeros(len(auc) + 1)
        tmp[:len(auc)] = auc
        tmp[len(auc)] = auc[-1]        
        auc = tmp
        
#        plt.plot(np.arange(0,len(auc), auc_plt_gap), auc[::auc_plt_gap], color=_color)
        plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), auc[::marker_gap], width, label=model_name, color=_color)    
#        plt.plot(np.arange(models.index(model)*100,len(auc), marker_gap), auc[models.index(model)*100:len(auc):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)        
        plt.title('ROC-AUC score', size=title_size)
#        plt.xlim([0,x_lim])
        plt.ylim([0.4, 1])
        plt.xticks(np.arange(0,len(auc), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])        
        plt.locator_params(axis = 'y', nbins = 5)
        plt.locator_params(axis = 'x', nbins = 6)
        
    if dataset == 'kinship':
        if vertical:
            plt.subplot(2, 1, 1)
        else:
            plt.subplot(1, 2, 1)
        plt.legend(loc='upper left', numpoints=1, frameon=False, prop={'size':legend_size})
    if vertical:
        plt.savefig('../paper/cikm2016/images/present_vs_passive_greedy_thompson_%s_mcmc_vertical_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
    else:
        plt.savefig('../paper/cikm2016/images/present_vs_passive_greedy_thompson_%s_mcmc_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)


/Users/dongwookim/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:75: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:77: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

In [17]:
legend_size = 9
plt_gap = 10
auc_plt_gap = 50
title_size = 12
#linestyles = ['--', '-.', '-.', ':', '-', '-']
markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd')

vertical=True
datasets = ['nation', 'kinship', 'umls']
#models = ['brescal', 'logit', 'amdc_pop', 'amdc_pred', 'brescal_passive']
models = ['brescal', 'bcomp_mul', 'bcomp_add']
#models = ['brescal', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
#models = ['brescal', 'logit', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']

for dataset in datasets:
    if dataset is 'nation':
        marker_gap = 500
        width_gap = 100
        line_marker_gap = 100
    else:
        marker_gap = 2500
        width_gap = 500
        line_marker_gap = 500
        
    T = load_dataset(dataset)

    n_test = 5
    x_lim = dataset_limit[dataset]
    summary = dict()
    auc_summary = dict()
    min_len = dict()
    for model in models:
        summary[model] = np.zeros([n_test, x_lim])
        auc_summary[model] = np.zeros([n_test, x_lim])
        min_len[model] = x_lim

    for nt in range(n_test):  

        for model in models:
            if model == 'brescal':
                auc_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d_eval.txt' % (dataset, nt)
                query_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d.txt' %(dataset, nt)
            elif model == 'bcomp_mul': 
                var_comp = model_var_comp[(dataset,model)]
                auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
                query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)   
                # for the additional mcmc steps
                auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10_eval.txt' %(dataset, model, var_comp, nt)
                query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10.txt' %(dataset, model, var_comp, nt)   
            elif model == 'bcomp_add':
                var_comp = model_var_comp[(dataset,model)]
                if dataset == 'umls':
                    auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
                    query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)                                                            
                else:
                    auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5_eval.txt' %(dataset, model, var_comp, nt)
                    query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5.txt' %(dataset, model, var_comp, nt)                                        
            elif model == 'amdc_pop':
                auc_file = '../result/%s/amdc_pn/auc_population_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
                query_file = '../result/%s/amdc_pn/query_population_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)            
            elif model == 'amdc_pred':
                auc_file = '../result/%s/amdc_pn/auc_predictive_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
                query_file = '../result/%s/amdc_pn/query_predictive_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
            elif model == 'brescal_passive':
                auc_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d_eval.txt' % (dataset, nt)
                query_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d.txt' %(dataset, nt)            
            elif model == 'logit':
                auc_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d_eval.txt' % (dataset, model, nt)
                query_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d.txt' %(dataset, model, nt)                            
            else:
                raise Error('There is no such model')

            seq = [line.split(',') for line in open(query_file, 'r').readlines()]
            if not model.startswith('amdc'):
                cum_sum = np.cumsum([T[s] for s in seq])
            else:
                cum_sum = np.cumsum([T[s[2],s[0],s[1]] for s in seq])

            x_min = min(len(cum_sum), x_lim)

            summary[model][nt, :x_min] = cum_sum[:x_min]
            auc_sum = [float(x) for x in open(auc_file).readlines()]
            x_min = min(x_min, len(auc_sum))

            auc_summary[model][nt, :x_min] = auc_sum[:x_min]

            if min_len[model] > x_min:
                min_len[model] = x_min
    if vertical:
        fig = plt.figure(figsize=(4,6))        
    else:
        fig = plt.figure(figsize=(12,3))
#     plt.suptitle(dataset.upper())
    for model in models:
        cum_sum = np.mean(summary[model], 0)[:min_len[model]]
        tmp = np.zeros(len(cum_sum) + 1)
        tmp[:len(cum_sum)] = cum_sum
        tmp[len(cum_sum)] = cum_sum[-1]
        cum_sum = tmp

        _color = model_colors[model]
        model_name = model_names[model].upper()

        if vertical:
            plt.subplot(2, 1, 1)        
        else:
            plt.subplot(1, 2, 1)

        width = (marker_gap-width_gap) / float(len(models))
        #line plot
        plt.plot(np.arange(0,len(cum_sum), plt_gap), cum_sum[::plt_gap], color=_color)
        plt.plot(np.arange(0,len(cum_sum), line_marker_gap), cum_sum[::line_marker_gap], markers[models.index(model)], label=model_name, color=_color)
        #bar plot
#        plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), cum_sum[::marker_gap], width, label=model_name, color=_color)    
#         plt.plot(np.arange(models.index(model)*100, len(cum_sum), marker_gap), cum_sum[models.index(model)*100:len(cum_sum):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
        plt.title('Cumulative gain', size=title_size)
        plt.xlim([0,x_lim])
#        plt.xticks(np.arange(0,len(cum_sum), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
        plt.locator_params(axis = 'y', nbins = 5)
        plt.locator_params(axis = 'x', nbins = 6) 
        
        if vertical:
            plt.subplot(2, 1, 2)
        else:
            plt.subplot(1, 2, 2)
        auc = np.mean(auc_summary[model], 0)[:min_len[model]]
        tmp = np.zeros(len(auc) + 1)
        tmp[:len(auc)] = auc
        tmp[len(auc)] = auc[-1]        
        auc = tmp
        
#        plt.plot(np.arange(0,len(auc), auc_plt_gap), auc[::auc_plt_gap], color=_color)
        plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), auc[::marker_gap], width, label=model_name, color=_color)    
#        plt.plot(np.arange(models.index(model)*100,len(auc), marker_gap), auc[models.index(model)*100:len(auc):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)        
        plt.title('ROC-AUC score', size=title_size)
#        plt.xlim([0,x_lim])
        plt.ylim([0.4, 1])
        plt.xticks(np.arange(0,len(auc), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])        
        plt.locator_params(axis = 'y', nbins = 5)
        plt.locator_params(axis = 'x', nbins = 6)
        
    if dataset == 'kinship':
        if vertical:
            plt.subplot(2, 1, 1)
        else:
            plt.subplot(1, 2, 1)
        plt.legend(loc='upper left', numpoints=1, frameon=False, prop={'size':legend_size})
    if vertical:
        plt.savefig('../paper/cikm2016/images/present_vs_passive_greedy_thompson_%s_mcmc_vertical_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
    else:
        plt.savefig('../paper/cikm2016/images/present_vs_passive_greedy_thompson_%s_mcmc_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)


/Users/dongwookim/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:75: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

Presentation


In [7]:
model_var_comp = dict()
# model_var_comp[('nation', 'bcomp_mul')] = 100.00
model_var_comp[('nation', 'bcomp_mul')] = 1.00   # for the additional mcmc steps
model_var_comp[('kinship', 'bcomp_mul')] = 100.00
model_var_comp[('nation', 'bcomp_add')] = 10000.00
model_var_comp[('kinship', 'bcomp_add')] = 10000.00
# model_var_comp[('umls', 'bcomp_mul')] = 1.00
model_var_comp[('umls', 'bcomp_mul')] = 10.00    # for the additional mcmc steps
model_var_comp[('umls', 'bcomp_add')] = 1.00
dataset_limit = dict()
dataset_limit['nation'] = 2000
dataset_limit['kinship'] = 10000
dataset_limit['umls'] = 10000

model_colors = {'brescal':color[0], 'amdc_pop':color[8], 'amdc_pred':color[7], 
                'bcomp_mul':color[2], 'bcomp_add':color[3], 'logit':color[5], 
                'rescal':'grey',
                'brescal_passive':'grey', 'bcomp_mul_10': color[8], 'bcomp_add_10':color[9], 
                'bcomp_mul_comp_10':color[11], 'bcomp_add_comp_10':color[13], 
                'bcomp_mul_var_1_comp_10': color[15], 'bcomp_add_var_1_comp_10': color[17]}
model_names = {'rescal':'rescal', 'brescal':'prescal-ts', 'amdc_pop':'amdc-pop', 'amdc_pred':'amdc-pred', 
               'bcomp_mul':'compositional-ts', 'bcomp_add':'pcomp-add-ts', 'logit':'plogit-ts', 
               'brescal_passive':'prescal-without-ts'}

In [6]:
legend_size = 9
plt_gap = 10
auc_plt_gap = 50
title_size = 12
#linestyles = ['--', '-.', '-.', ':', '-', '-']
markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd')

vertical=True
datasets = ['nation', 'kinship', 'umls']
models = ['brescal', 'amdc_pop', 'amdc_pred']
#models = ['brescal', 'bcomp_mul', 'bcomp_add']
#models = ['brescal', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
#models = ['brescal', 'logit', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']

for dataset in datasets:
    if dataset is 'nation':
        marker_gap = 500
        width_gap = 100
        line_marker_gap = 100
    else:
        marker_gap = 2500
        width_gap = 500
        line_marker_gap = 500
        
    T = load_dataset(dataset)

    n_test = 5
    x_lim = dataset_limit[dataset]
    summary = dict()
    auc_summary = dict()
    min_len = dict()
    for model in models:
        summary[model] = np.zeros([n_test, x_lim])
        auc_summary[model] = np.zeros([n_test, x_lim])
        min_len[model] = x_lim

    for nt in range(n_test):  

        for model in models:
            if model == 'brescal':
                auc_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d_eval.txt' % (dataset, nt)
                query_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d.txt' %(dataset, nt)
            elif model == 'bcomp_mul': 
                var_comp = model_var_comp[(dataset,model)]
                auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
                query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)   
                # for the additional mcmc steps
                auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10_eval.txt' %(dataset, model, var_comp, nt)
                query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10.txt' %(dataset, model, var_comp, nt)   
            elif model == 'bcomp_add':
                var_comp = model_var_comp[(dataset,model)]
                if dataset == 'umls':
                    auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
                    query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)                                                            
                else:
                    auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5_eval.txt' %(dataset, model, var_comp, nt)
                    query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5.txt' %(dataset, model, var_comp, nt)                                        
            elif model == 'amdc_pop':
                auc_file = '../result/%s/amdc_pn/auc_population_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
                query_file = '../result/%s/amdc_pn/query_population_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)            
            elif model == 'amdc_pred':
                auc_file = '../result/%s/amdc_pn/auc_predictive_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
                query_file = '../result/%s/amdc_pn/query_predictive_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
            elif model == 'brescal_passive':
                auc_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d_eval.txt' % (dataset, nt)
                query_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d.txt' %(dataset, nt)            
            elif model == 'logit':
                auc_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d_eval.txt' % (dataset, model, nt)
                query_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d.txt' %(dataset, model, nt)                            
            else:
                raise Error('There is no such model')

            seq = [line.split(',') for line in open(query_file, 'r').readlines()]
            if not model.startswith('amdc'):
                cum_sum = np.cumsum([T[s] for s in seq])
            else:
                cum_sum = np.cumsum([T[s[2],s[0],s[1]] for s in seq])

            x_min = min(len(cum_sum), x_lim)

            summary[model][nt, :x_min] = cum_sum[:x_min]
            auc_sum = [float(x) for x in open(auc_file).readlines()]
            x_min = min(x_min, len(auc_sum))

            auc_summary[model][nt, :x_min] = auc_sum[:x_min]

            if min_len[model] > x_min:
                min_len[model] = x_min
                
    if vertical:
        fig = plt.figure(figsize=(4,6))        
    else:
        fig = plt.figure(figsize=(12,3))
#     plt.suptitle(dataset.upper())
    for model in models:
        cum_sum = np.mean(summary[model], 0)[:min_len[model]]
        tmp = np.zeros(len(cum_sum) + 1)
        tmp[:len(cum_sum)] = cum_sum
        tmp[len(cum_sum)] = cum_sum[-1]
        cum_sum = tmp

        _color = model_colors[model]
        model_name = model_names[model].upper()

        if vertical:
            plt.subplot(2, 1, 1)        
        else:
            plt.subplot(1, 2, 1)

        width = (marker_gap-width_gap) / float(len(models))
        #line plot
        plt.plot(np.arange(0,len(cum_sum), plt_gap), cum_sum[::plt_gap], color=_color)
        plt.plot(np.arange(0,len(cum_sum), line_marker_gap), cum_sum[::line_marker_gap], markers[models.index(model)], label=model_name, color=_color)
        #bar plot
#        plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), cum_sum[::marker_gap], width, label=model_name, color=_color)    
#         plt.plot(np.arange(models.index(model)*100, len(cum_sum), marker_gap), cum_sum[models.index(model)*100:len(cum_sum):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
        plt.title('Cumulative gain', size=title_size)
        plt.xlim([0,x_lim])
#        plt.xticks(np.arange(0,len(cum_sum), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
        plt.locator_params(axis = 'y', nbins = 5)
        plt.locator_params(axis = 'x', nbins = 6) 
        
        if vertical:
            plt.subplot(2, 1, 2)
        else:
            plt.subplot(1, 2, 2)
        auc = np.mean(auc_summary[model], 0)[:min_len[model]]
        tmp = np.zeros(len(auc) + 1)
        tmp[:len(auc)] = auc
        tmp[len(auc)] = auc[-1]        
        auc = tmp
        
#        plt.plot(np.arange(0,len(auc), auc_plt_gap), auc[::auc_plt_gap], color=_color)
        plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), auc[::marker_gap], width, label=model_name, color=_color)    
#        plt.plot(np.arange(models.index(model)*100,len(auc), marker_gap), auc[models.index(model)*100:len(auc):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)        
        plt.title('ROC-AUC score', size=title_size)
#        plt.xlim([0,x_lim])
        plt.ylim([0.4, 1])
        plt.xticks(np.arange(0,len(auc), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])        
        plt.locator_params(axis = 'y', nbins = 5)
        plt.locator_params(axis = 'x', nbins = 6)
        
    if dataset == 'kinship':
        if vertical:
            plt.subplot(2, 1, 1)
        else:
            plt.subplot(1, 2, 1)
        plt.legend(loc='upper left', numpoints=1, frameon=False, prop={'size':legend_size})
    if vertical:
        plt.savefig('../paper/cikm2016/images/presentation_thompson_%s_mcmc_vertical_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
    else:
        plt.savefig('../paper/cikm2016/images/presentation_thompson_%s_mcmc_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)


/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:75: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:77: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:75: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:77: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:75: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:77: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

In [9]:
legend_size = 9
plt_gap = 10
auc_plt_gap = 50
title_size = 12
#linestyles = ['--', '-.', '-.', ':', '-', '-']
markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd')

vertical=True
datasets = ['nation', 'kinship', 'umls']
#models = ['brescal', 'logit', 'amdc_pop', 'amdc_pred', 'brescal_passive']
models = ['brescal', 'bcomp_mul']
#models = ['brescal', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']
#models = ['brescal', 'logit', 'bcomp_mul', 'bcomp_add', 'brescal_passive','amdc_pop', 'amdc_pred']

for dataset in datasets:
    if dataset is 'nation':
        marker_gap = 500
        width_gap = 100
        line_marker_gap = 100
    else:
        marker_gap = 2500
        width_gap = 500
        line_marker_gap = 500
        
    T = load_dataset(dataset)

    n_test = 5
    x_lim = dataset_limit[dataset]
    summary = dict()
    auc_summary = dict()
    min_len = dict()
    for model in models:
        summary[model] = np.zeros([n_test, x_lim])
        auc_summary[model] = np.zeros([n_test, x_lim])
        min_len[model] = x_lim

    for nt in range(n_test):  

        for model in models:
            if model == 'brescal':
                auc_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d_eval.txt' % (dataset, nt)
                query_file = '../result/%s/brescal/train_test_varx_0.10_dim_10_par_5_test_%d.txt' %(dataset, nt)
            elif model == 'bcomp_mul': 
                var_comp = model_var_comp[(dataset,model)]
                auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
                query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)   
                # for the additional mcmc steps
                auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10_eval.txt' %(dataset, model, var_comp, nt)
                query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_1_test_%d_mc_10.txt' %(dataset, model, var_comp, nt)   
            elif model == 'bcomp_add':
                var_comp = model_var_comp[(dataset,model)]
                if dataset == 'umls':
                    auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_eval.txt' %(dataset, model, var_comp, nt)
                    query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d.txt' %(dataset, model, var_comp, nt)                                                            
                else:
                    auc_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5_eval.txt' %(dataset, model, var_comp, nt)
                    query_file = '../result/%s/%s/train_test_var_0.10_%.2f_dim_10_par_5_test_%d_mc_move_5.txt' %(dataset, model, var_comp, nt)                                        
            elif model == 'amdc_pop':
                auc_file = '../result/%s/amdc_pn/auc_population_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
                query_file = '../result/%s/amdc_pn/query_population_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)            
            elif model == 'amdc_pred':
                auc_file = '../result/%s/amdc_pn/auc_predictive_train_0.000_test_0.300_10_%d.txt' % (dataset, nt)
                query_file = '../result/%s/amdc_pn/query_predictive_train_0.000_test_0.300_10_%d.txt' %(dataset, nt)
            elif model == 'brescal_passive':
                auc_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d_eval.txt' % (dataset, nt)
                query_file = '../result/%s/brescal_passive/train_test_varx_0.10_dim_10_test_%d.txt' %(dataset, nt)            
            elif model == 'logit':
                auc_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d_eval.txt' % (dataset, model, nt)
                query_file = '../result/%s/%s/train_test_dim_10_par_5_test_%d.txt' %(dataset, model, nt)                            
            else:
                raise Error('There is no such model')

            seq = [line.split(',') for line in open(query_file, 'r').readlines()]
            if not model.startswith('amdc'):
                cum_sum = np.cumsum([T[s] for s in seq])
            else:
                cum_sum = np.cumsum([T[s[2],s[0],s[1]] for s in seq])

            x_min = min(len(cum_sum), x_lim)

            summary[model][nt, :x_min] = cum_sum[:x_min]
            auc_sum = [float(x) for x in open(auc_file).readlines()]
            x_min = min(x_min, len(auc_sum))

            auc_summary[model][nt, :x_min] = auc_sum[:x_min]

            if min_len[model] > x_min:
                min_len[model] = x_min
    if vertical:
        fig = plt.figure(figsize=(4,6))        
    else:
        fig = plt.figure(figsize=(12,3))
#     plt.suptitle(dataset.upper())
    for model in models:
        cum_sum = np.mean(summary[model], 0)[:min_len[model]]
        tmp = np.zeros(len(cum_sum) + 1)
        tmp[:len(cum_sum)] = cum_sum
        tmp[len(cum_sum)] = cum_sum[-1]
        cum_sum = tmp

        _color = model_colors[model]
        model_name = model_names[model].upper()

        if vertical:
            plt.subplot(2, 1, 1)        
        else:
            plt.subplot(1, 2, 1)

        width = (marker_gap-width_gap) / float(len(models))
        #line plot
        plt.plot(np.arange(0,len(cum_sum), plt_gap), cum_sum[::plt_gap], color=_color)
        plt.plot(np.arange(0,len(cum_sum), line_marker_gap), cum_sum[::line_marker_gap], markers[models.index(model)], label=model_name, color=_color)
        #bar plot
#        plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), cum_sum[::marker_gap], width, label=model_name, color=_color)    
#         plt.plot(np.arange(models.index(model)*100, len(cum_sum), marker_gap), cum_sum[models.index(model)*100:len(cum_sum):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)
        plt.title('Cumulative gain', size=title_size)
        plt.xlim([0,x_lim])
#        plt.xticks(np.arange(0,len(cum_sum), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])
        plt.locator_params(axis = 'y', nbins = 5)
        plt.locator_params(axis = 'x', nbins = 6) 
        
        if vertical:
            plt.subplot(2, 1, 2)
        else:
            plt.subplot(1, 2, 2)
        auc = np.mean(auc_summary[model], 0)[:min_len[model]]
        tmp = np.zeros(len(auc) + 1)
        tmp[:len(auc)] = auc
        tmp[len(auc)] = auc[-1]        
        auc = tmp
        
#        plt.plot(np.arange(0,len(auc), auc_plt_gap), auc[::auc_plt_gap], color=_color)
        plt.bar(np.arange(0,len(cum_sum), marker_gap) + width*models.index(model), auc[::marker_gap], width, label=model_name, color=_color)    
#        plt.plot(np.arange(models.index(model)*100,len(auc), marker_gap), auc[models.index(model)*100:len(auc):marker_gap], markers[models.index(model)], label=model.upper(), color=_color)        
        plt.title('ROC-AUC score', size=title_size)
#        plt.xlim([0,x_lim])
        plt.ylim([0.4, 1])
        plt.xticks(np.arange(0,len(auc), marker_gap) + width*float(len(models))/2, ['%d'% (p) for p in np.arange(0,len(cum_sum), marker_gap)])        
        plt.locator_params(axis = 'y', nbins = 5)
        plt.locator_params(axis = 'x', nbins = 6)
        
    if dataset == 'kinship':
        if vertical:
            plt.subplot(2, 1, 1)
        else:
            plt.subplot(1, 2, 1)
        plt.legend(loc='upper left', numpoints=1, frameon=False, prop={'size':legend_size})
    if vertical:
        plt.savefig('../paper/cikm2016/images/presentation_add_thompson_%s_mcmc_vertical_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)
    else:
        plt.savefig('../paper/cikm2016/images/presentation_add_thompson_%s_mcmc_line.pdf' % (dataset), format='PDF', bbox_inches='tight', pad_inches=0.1)


/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:75: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:75: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/dongwookim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:75: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

In [ ]: