In [1]:
import numpy as np
import paper
n_folds = 4
test_sets = [np.load('TS'+str(i)+'.npy') for i in range(n_folds)]
held_outs = [np.load('HO'+str(i)+'.npy') for i in range(n_folds)]
maes = [np.load('MAE'+str(i)+'.npy') for i in range(n_folds)]
In [2]:
ts = test_sets[0]
ts
Out[2]:
In [3]:
ho = held_outs[0]
ho
Out[3]:
In [4]:
mae = maes[0]
mae
Out[4]:
In [5]:
R_test = np.copy(paper.Rnan)
R_test[ho] = np.nan
R_test
Out[5]:
In [6]:
n_available = np.sum(R_test > 0, axis = 1)[ts]
n_available
Out[6]:
In [7]:
print('Num users:', mae.shape[0])
print('Average MAE:',np.mean(mae))
In [8]:
d1_inds = n_available < 4
d1_maes = mae[d1_inds]
print('Num users:', d1_maes.shape[0])
print('Average MAE:',np.mean(d1_maes))
In [9]:
d2_inds = np.logical_and(n_available > 3, n_available < 10)
d2_maes = mae[d2_inds]
print('Num users:', d2_maes.shape[0])
print('Average MAE:',np.mean(d2_maes))
In [10]:
d3_inds = n_available > 9
d3_maes = mae[d3_inds]
print('Num users:', d3_maes.shape[0])
print('Average MAE:',np.mean(d3_maes))
In [11]:
def get_maes(R, mae, test_set, held_outs):
R_test = np.copy(R)
R_test[held_outs] = np.nan
n_available = np.sum(R_test > 0, axis = 1)[test_set]
d1_inds = n_available < 4
d1_maes = mae[d1_inds]
d2_inds = np.logical_and(n_available > 3, n_available < 10)
d2_maes = mae[d2_inds]
d3_inds = n_available > 9
d3_maes = mae[d3_inds]
return d1_maes, d2_maes, d3_maes, mae
def get_cumul_maes(R, maes, test_sets, held_outs):
d1_maes = []
d2_maes = []
d3_maes = []
all_maes = []
for test_set, held_out, mae in zip(test_sets, held_outs, maes):
d1_mae, d2_mae, d3_mae, all_mae = get_maes(R, mae, test_set, held_out)
d1_maes.append(d1_mae)
d2_maes.append(d2_mae)
d3_maes.append(d3_mae)
all_maes.append(all_mae)
return np.hstack(d1_maes), np.hstack(d2_maes), np.hstack(d3_maes), np.hstack(all_maes)
d1_maes, d2_maes, d3_maes, all_maes = get_cumul_maes(paper.Rnan, maes, test_sets, held_outs)
In [12]:
print('D1 (extremely cold start) users:')
print('Num users:', d1_maes.shape[0])
print('Average MAE:',np.mean(d1_maes))
print('D2 (cold start) users:')
print('Num users:', d2_maes.shape[0])
print('Average MAE:',np.mean(d2_maes))
print('D3 (warm start) users:')
print('Num users:', d3_maes.shape[0])
print('Average MAE:',np.mean(d3_maes))
print('All users:')
print('Num users:', all_maes.shape[0])
print('Average MAE:',np.mean(all_maes))
In [13]:
def k_avg(R, maes, test_sets, held_outs):
n_users = np.zeros((n_folds,4), dtype = np.int)
ms = np.zeros((n_folds,4))
for i, (test_set, held_out, mae) in enumerate(zip(test_sets, held_outs, maes)):
tpl = get_maes(R, mae, test_set, held_out)
for j, m in enumerate(tpl):
n_users[i,j] = m.shape[0]
ms[i,j] = np.mean(m)
n_users = np.mean(n_users, axis = 0)
ms = np.mean(ms, axis = 0)
return n_users, ms
k_avg(paper.Rnan, maes, test_sets, held_outs)
Out[13]: