In [1]:
    
%matplotlib inline
import os
import sys
import numpy as np
import pickle as pkl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
    
In [2]:
    
plt.style.use('seaborn')
sns.set()
#sns.set_style('white')
#sns.set_style("whitegrid", {'axes.grid' : False})
    
In [3]:
    
datasets = [('30music', '30Music'), ('aotm2011', 'AotM-2011')]
TOPs = [5, 10, 20, 30, 50, 100, 200, 300, 500, 700, 1000]
    
In [4]:
    
algos = [('pop', 'PopRank'), ('cagh', 'CAGH'), ('sagh', 'SAGH'), ('mtc', 'MTC')]
    
In [5]:
    
# algos = [('br1', 'Logistic Regression'),
#          ('mtc', 'Multitask Classification'),
#          ('mf', 'Matrix Factorisation'),
#          ('pop', 'Popularity Ranking'), ('cagh', 'CAGH'), ('sagh', 'SAGH')]
# metrics = [('Hit-Rate', 'HitRate@100 \\%'), ('AUC', 'AUC \\%'), ('Hit-Rate', 'Hit Rate %'), 
#            ('Artist-Diversity', 'Artist-Diversity'), ('Genre-Diversity', 'Genre-Diversity'),
#            ('Novelty', 'Novelty')]
    
Higher values are better
In [6]:
    
def gen_acc_df(setting_name, metric_tuples, algo_tuples, datasets=datasets):
    rows = [a[1] for a in algo_tuples]
    cols = pd.MultiIndex.from_product([[d[1] for d in datasets], [m[1] for m in metric_tuples]])
    df = pd.DataFrame(index=rows, columns=cols)
    for algo in algo_tuples:
        row_ix = algo[1]
        for dataset in datasets:
            data_dir = 'data/%s/coldstart/%s' % (dataset[0], setting_name)
            fperf = os.path.join(data_dir, 'perf-%s.pkl' % algo[0])
            if not os.path.exists(fperf):
                continue
            perf_dict = pkl.load(open(fperf, 'rb'))
            #print(perf_dict)
            for metric in metric_tuples:
                col_ix = (dataset[1], metric[1])
                if metric[0] == 'Hit-Rate':
                    df.loc[row_ix, col_ix] = perf_dict[dataset[0]]['Test'][metric[0]][100] * 100
                else:
                    df.loc[row_ix, col_ix] = perf_dict[dataset[0]]['Test'][metric[0]] * 100
    return df
    
In [7]:
    
# df = gen_acc_df('setting1', [('Hit-Rate', 'HitRate@100 \\%'), ('AUC', 'AUC \\%')], 
#                 algos + [('br1', 'LR'), ('mf', 'MF+MLP')])
# df
    
In [8]:
    
df = gen_acc_df('setting1', [('AUC', 'AUC \\%')], algos + [('mf', 'MF+MLP')])
df
    
    Out[8]:
In [ ]:
    
tab_str = df.to_latex(float_format=lambda x: '$%.1f$' % x, na_rep='N/A', \
                      column_format='l' + '*{%d}{c}' % 4 * len(datasets), escape=False,
                      multirow=True, multicolumn=True, multicolumn_format='c')
print(tab_str)
    
In [9]:
    
df = gen_acc_df('setting3', [('AUC', 'AUC \\%')], algos + [('mf', 'WMF')])
df
    
    Out[9]:
In [ ]:
    
tab_str = df.to_latex(float_format=lambda x: '$%.1f$' % x, na_rep='N/A', \
                      column_format='l' + '*{%d}{c}' % 4 * len(datasets), escape=False,
                      multirow=True, multicolumn=True, multicolumn_format='c')
print(tab_str)
    
In [15]:
    
df = gen_acc_df('setting4', [('AUC', 'AUC \\%')], algos + [('mf', 'WMF+kNN')])
df
    
    Out[15]:
In [ ]:
    
tab_str = df.to_latex(float_format=lambda x: '$%.1f$' % x, na_rep='N/A', \
                      column_format='l' + '*{%d}{c}' % 4 * len(datasets), escape=False,
                      multirow=True, multicolumn=True, multicolumn_format='c')
print(tab_str)
    
moderate values are preferable
In [11]:
    
def gen_metric_df(setting_name, metric, algo_tuples, datasets=datasets):
    rows = [a[1] for a in algo_tuples]
    cols = [d[1] for d in datasets]
    df = pd.DataFrame(index=rows, columns=cols)
    for algo in algo_tuples:
        row_ix = algo[1]
        for dataset in datasets:
            data_dir = 'data/%s/coldstart/%s' % (dataset[0], setting_name)
            fperf = os.path.join(data_dir, 'perf-%s.pkl' % algo[0])
            if not os.path.exists(fperf):
                continue
            perf_dict = pkl.load(open(fperf, 'rb'))
            #print(perf_dict)
            col_ix = dataset[1]
            df.loc[row_ix, col_ix] = perf_dict[dataset[0]]['Test'][metric]
    return df
    
In [12]:
    
df = gen_metric_df('setting1', 'Spread', algos + [('mf', 'MF+MLP')])
df
    
    Out[12]:
In [ ]:
    
tab_str = df.to_latex(float_format=lambda x: '%.1f' % x, na_rep='N/A', \
                      column_format='l' + '*{%d}{c}' % len(datasets), escape=False,
                      multirow=True, multicolumn=True, multicolumn_format='c')
print(tab_str)
    
In [13]:
    
df = gen_metric_df('setting3', 'Spread', algos + [('mf', 'WMF')])
df
    
    Out[13]:
In [ ]:
    
tab_str = df.to_latex(float_format=lambda x: '%.1f' % x, na_rep='N/A', \
                      column_format='l' + '*{%d}{c}' % len(datasets), escape=False,
                      multirow=True, multicolumn=True, multicolumn_format='c')
print(tab_str)
    
In [16]:
    
df = gen_metric_df('setting4', 'Spread', algos + [('mf', 'WMF+kNN')])
df
    
    Out[16]:
In [ ]:
    
tab_str = df.to_latex(float_format=lambda x: '%.1f' % x, na_rep='N/A', \
                      column_format='l' + '*{%d}{c}' % len(datasets), escape=False,
                      multirow=True, multicolumn=True, multicolumn_format='c')
print(tab_str)
    
Higher values are better
In [ ]:
    
df = gen_metric_df('setting1', 'PTop', algos + [('mf', 'MF+MLP')])
df
    
In [ ]:
    
tab_str = df.to_latex(float_format=lambda x: '%.1f' % x, na_rep='N/A', \
                      column_format='l' + '*{%d}{c}' % len(datasets), escape=False,
                      multirow=True, multicolumn=True, multicolumn_format='c')
print(tab_str)
    
In [17]:
    
def gen_curve(setting_name, metric_tuple, algo_tuples, datasets=datasets, TOPs=TOPs, 
              legend_loc='upper left', svgfile=None, ylim=None):
    nrows, ncols = 1, 2
    # colors = ["#2ecc71", "#3498db", "#34495e", "#ff1006", "#e74c3c", "#9b59b6"]
    colors = ["#3498db", "#34495e", "#ff1006", "#977c3c", "#2ecc71", "#9b59b6"]
    #colors = ['r', 'g', 'b', 'm', 'c', 'y']
    linestyles = ['-', '--', ':', '-.', '-', '--']
    sizes = [1, 2, 2, 2, 3, 2]
    #linestyles = ['-', '-', '-', '-', '-', '-']
    fig = plt.figure(figsize=[10, 5])
    for i in range(len(datasets)):
        dataset = datasets[i]
        data_dir = 'data/%s/coldstart/%s' % (dataset[0], setting_name)
        fperfs = [os.path.join(data_dir, 'perf-%s.pkl' % algo) for algo, _ in algo_tuples]
        perf_dicts = [pkl.load(open(fperf, 'rb')) if os.path.exists(fperf) else None for fperf in fperfs]
        ax = plt.subplot(nrows, ncols, i+1)
        for j in range(len(perf_dicts)):
            if perf_dicts[j] is None:
                continue
            #print(perf_dicts[j])
            x = TOPs
            y = [perf_dicts[j][dataset[0]]['Test'][metric_tuple[0]][k] for k in TOPs]
            ax.plot(x, y, ls=linestyles[j], c=colors[j], lw=sizes[j], label=algo_tuples[j][1])
            #ax.set_xscale('log')
            if ylim is not None:
                ax.set_ylim(ylim)
            ax.set_title(dataset[1], fontsize=18.5)
        ax.legend(loc=legend_loc)
        ax.set_xlabel('Number of recommendations', fontsize=17.5)
        if i == 0:
            ax.set_ylabel(metric_tuple[1], fontsize=20)
    #plt.suptitle('Hit rates of playlist augmentation (%s)' % dataset_name, fontsize=12)
    if svgfile is not None:
        assert svgfile.endswith('.svg')
        plt.savefig(svgfile)
    
Higher values are better
In [18]:
    
sns.set_style('white')
    
In [19]:
    
gen_curve('setting1', ('Hit-Rate', 'Hit Rate'), algos[:-1] + [('mf', 'MF+MLP'), ('mtc', 'MTC')], svgfile='hr1.svg')
    
    
In [20]:
    
gen_curve('setting3', ('Hit-Rate', 'Hit Rate'), algos[:-1] + [('mf', 'WMF'), ('mtc', 'MTC')], svgfile='hr3.svg')
    
    
In [21]:
    
gen_curve('setting4', ('Hit-Rate', 'Hit Rate'), algos[:-1] + [('mf', 'WMF+kNN'), ('mtc', 'MTC')], svgfile='hr4.svg')
    
    
Moderate values are preferable
In [22]:
    
gen_curve('setting1', ('Novelty', 'Novelty'), algos[:-1] + [('mf', 'MF+MLP'), ('mtc', 'MTC')], svgfile='nov1.svg',
          legend_loc='lower right')
    
    
In [23]:
    
gen_curve('setting3', ('Novelty', 'Novelty'), algos[:-1] + [('mf', 'WMF'), ('mtc', 'MTC')], svgfile='nov3.svg',
          legend_loc='lower right')
    
    
In [24]:
    
gen_curve('setting4', ('Novelty', 'Novelty'), algos[:-1] + [('mf', 'WMF+kNN'), ('mtc', 'MTC')], svgfile='nov4.svg',
          legend_loc='lower right')
    
    
In [ ]:
    
# gen_curve('setting1', ('Artist-Diversity', 'Artist Diversity'), algos[:-1] + [('mf', 'MF+MLP'), ('mtc', 'MTC')], 
#           svgfile='adiv1.svg', legend_loc='lower right', TOPs=TOPs[5:])
    
In [ ]:
    
# gen_curve('setting3', ('Artist-Diversity', 'Artist Diversity'), algos[:-1] + [('mf', 'WMF'), ('mtc', 'MTC')], 
#           svgfile='adiv3.svg', legend_loc='lower right', TOPs=TOPs[5:])
    
In [ ]:
    
# gen_curve('setting4', ('Artist-Diversity', 'Artist Diversity'), algos[:-1] + [('mf', 'WMF+kNN'), ('mtc', 'MTC')], 
#           svgfile='nov4.svg', legend_loc='lower right', TOPs=TOPs[5:])
    
In [ ]:
    
# gen_curve('setting1', ('Genre-Diversity', 'Genre-Diversity'), algos, legend_loc='best', TOPs=TOPs[4:])
    
In [ ]:
    
# gen_curve('setting3', ('Genre-Diversity', 'Genre-Diversity'), algos, legend_loc='best', TOPs=TOPs[4:])
    
In [ ]:
    
# gen_curve('setting4', ('Genre-Diversity', 'Genre-Diversity'), algos, legend_loc='best', TOPs=TOPs[4:])