Evaluate results per user


In [ ]:
%matplotlib inline
import os
import sys
import gzip
import numpy as np
import pickle as pkl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [ ]:
plt.style.use('seaborn')

In [ ]:
datasets = [('30music', '30Music'), ('aotm2011', 'AotM-2011')]
TOPs = [5, 10, 20, 30, 50, 100, 200, 300, 500, 1000]

In [ ]:
metrics = [('Hit-Rate', 'HitRate@100'), ('AUC', 'AUC')]

Cold Songs


In [ ]:
task = 1
base_dir = 'setting%d' % task

In [ ]:
algos = [('nsr', 'Multitask Classification'),
         ('br1', 'Logistic Regression'),
         ('pop', 'Popularity Ranking'), 
         ('cagh', 'CAGH'), ('sagh', 'SAGH')]

evaluate per user


In [ ]:
metric = metrics[0]

In [ ]:
nrows, ncols = len(algos), len(datasets)
colors = ["#2ecc71", "#9b59b6", "#3498db", "#34495e", "#ff1006", "#e74c3c"]
fig = plt.figure(figsize=[10, 20])
for j in range(len(datasets)):
    dataset = datasets[j]
    data_dir = 'data/%s/%s' % (dataset[0], base_dir)
    cliques = pkl.load(gzip.open(os.path.join(data_dir, 'cliques_trndev.pkl.gz'), 'rb'))
    Y_test = pkl.load(gzip.open(os.path.join(data_dir, 'Y_test.pkl.gz'), 'rb'))
    fperfs = [os.path.join(data_dir, 'perf-%s.pkl' % algo) for algo, _ in algos]
    perf_dicts = [pkl.load(open(fperf, 'rb')) if os.path.exists(fperf) else None for fperf in fperfs]
    
    npos = Y_test.sum(axis=0).A.reshape(-1)
    nz_ix = sorted(np.nonzero(npos)[0].tolist())
    # print(nz_ix)
    
    U = len(cliques)
    u2pl = dict()
    pl2u = np.zeros(Y_test.shape[1], dtype=np.int)
    for u in range(U):
        clq = cliques[u]
        u2pl[u] = clq
        pl2u[clq] = u
    
    u2perf_dicts = []
    for i in range(len(perf_dicts)):
        perf = perf_dicts[i]
        assert len(perf[dataset[0]]['Test_All']['AUC']) == len(nz_ix)
        u2perf = dict()
        for k in range(len(nz_ix)):
            u = pl2u[nz_ix[k]]
            auc = perf[dataset[0]]['Test_All']['AUC'][k] \
                  if metric[0] == 'AUC' else perf[dataset[0]]['Test_All']['Hit-Rate'][100][k]
            try:
                u2perf[u].append(auc)
            except KeyError:
                u2perf[u] = [auc]
        u2perf_dicts.append(u2perf)
        npl_user = [len(u2pl[u]) for u in sorted(u2perf)]
        mean_auc = [np.mean(u2perf[u]) for u in sorted(u2perf)]
        ax = plt.subplot(nrows, ncols, i * len(datasets) + j + 1)
        ax.scatter(npl_user, mean_auc, color=colors[i], alpha=0.5, s=20)
        lim = [-0.03, 1.03]
        ax.set_ylim(lim)
        if i == len(algos) - 1:
            ax.set_xlabel('#Playlists per User for Training')
        if j == 0:
            ax.set_ylabel('Mean %s per User' % metric[1])
        ax.set_title('%s (%s)' % (algos[i][1], dataset[1]))
plt.savefig('%s_per_user%d.svg' % (metric[0].replace('-', '').lower(), 0 if task == 1 else task-2))

Cold Playlists


In [ ]:
task = 3
base_dir = 'setting%d' % task

In [ ]:
algos = [('mtc', 'Multitask Classification'),
         ('pop', 'Popularity Ranking'),
         ('cagh', 'CAGH'), ('sagh', 'SAGH')]

evaluate per user


In [ ]:
metric = metrics[0]

In [ ]:
nrows, ncols = len(algos), len(datasets)
colors = ["#2ecc71", "#9b59b6", "#3498db", "#34495e", "#ff1006", "#e74c3c"]
fig = plt.figure(figsize=[10, 20])
for j in range(len(datasets)):
    dataset = datasets[j]
    data_dir = 'data/%s/%s' % (dataset[0], base_dir)
    Y_train = pkl.load(gzip.open(os.path.join(data_dir, 'Y_train.pkl.gz'), 'rb'))
    Y_test = pkl.load(gzip.open(os.path.join(data_dir, 'Y_test.pkl.gz'), 'rb'))
    cliques_train = pkl.load(gzip.open(os.path.join(data_dir, 'cliques_train.pkl.gz'), 'rb'))
    cliques_all = pkl.load(gzip.open(os.path.join(data_dir, 'cliques_all.pkl.gz'), 'rb'))
    fperfs = [os.path.join(data_dir, 'perf-%s.pkl' % algo) for algo, _ in algos]
    perf_dicts = [pkl.load(open(fperf, 'rb')) if os.path.exists(fperf) else None for fperf in fperfs]
    
    pl2u_train = np.zeros(Y_train.shape[1], dtype=np.int)
    pl2u_all = np.zeros(Y_train.shape[1] + Y_test.shape[1], dtype=np.int)
    U = len(cliques_all)
    assert U == len(cliques_train)
    for u in range(U):
        pl2u_train[cliques_train[u]] = u
        pl2u_all[cliques_all[u]] = u
    assert np.all(pl2u_train == pl2u_all[:Y_train.shape[1]])
    
    u2perf_dicts = []
    offset = Y_train.shape[1]
    for i in range(len(perf_dicts)):
        perf = perf_dicts[i]
        assert len(perf[dataset[0]]['Test_All']['AUC']) == Y_test.shape[1]
        u2perf = dict()
        for k in range(Y_test.shape[1]):
            u = pl2u_all[k + offset]
            num = perf[dataset[0]]['Test_All']['AUC'][k] \
                  if metric[0] == 'AUC' else perf[dataset[0]]['Test_All']['Hit-Rate'][100][k]
            try:
                u2perf[u].append(num)
            except KeyError:
                u2perf[u] = [auc]
        u2perf_dicts.append(u2perf)
        npl_user = [len(cliques_train[u]) for u in sorted(u2perf)]
        mean_num = [np.mean(u2perf[u]) for u in sorted(u2perf)]
        ax = plt.subplot(nrows, ncols, i * len(datasets) + j + 1)
        ax.scatter(npl_user, mean_num, color=colors[i], alpha=0.5, s=20)
        lim = [-0.03, 1.03]
        ax.set_ylim(lim)
        if i == len(algos) - 1:
            ax.set_xlabel('#Playlists per User for Training')
        if j == 0:
            ax.set_ylabel('Mean %s per User' % metric[1])
        ax.set_title('%s (%s)' % (algos[i][1], dataset[1]))
plt.savefig('%s_per_user%d.svg' % (metric[0].replace('-', '').lower(), 0 if task == 1 else task-2))