In [1]:
import numpy as np
import paper

n_folds = 4
test_sets = [np.load('TS'+str(i)+'.npy') for i in range(n_folds)]
held_outs = [np.load('HO'+str(i)+'.npy') for i in range(n_folds)]
maes = [np.load('MAE'+str(i)+'.npy') for i in range(n_folds)]

In [2]:
ts = test_sets[0]
ts


Out[2]:
array([   3,    4,    7, ..., 6027, 6029, 6037])

In [3]:
ho = held_outs[0]
ho


Out[3]:
array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ..., 
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]], dtype=bool)

In [4]:
mae = maes[0]
mae


Out[4]:
array([ 0.85714286,  0.91919192,  0.70503597, ...,  0.74468085,
        0.69565217,  1.55      ])

In [5]:
R_test = np.copy(paper.Rnan)
R_test[ho] = np.nan
R_test


Out[5]:
array([[  5.,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       ..., 
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [  3.,  nan,  nan, ...,  nan,  nan,  nan]])

In [6]:
n_available = np.sum(R_test > 0, axis = 1)[ts]
n_available


/home/xyllan/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: invalid value encountered in greater
  if __name__ == '__main__':
Out[6]:
array([ 3, 20, 14, ...,  5, 12,  2])

In [7]:
print('Num users:', mae.shape[0])
print('Average MAE:',np.mean(mae))


Num users: 1510
Average MAE: 0.744205551368

In [8]:
d1_inds = n_available < 4
d1_maes = mae[d1_inds]
print('Num users:', d1_maes.shape[0])
print('Average MAE:',np.mean(d1_maes))


Num users: 195
Average MAE: 0.852002892044

In [9]:
d2_inds = np.logical_and(n_available > 3, n_available < 10)
d2_maes = mae[d2_inds]
print('Num users:', d2_maes.shape[0])
print('Average MAE:',np.mean(d2_maes))


Num users: 525
Average MAE: 0.764849890762

In [10]:
d3_inds = n_available > 9
d3_maes = mae[d3_inds]
print('Num users:', d3_maes.shape[0])
print('Average MAE:',np.mean(d3_maes))


Num users: 790
Average MAE: 0.703878007553

In [11]:
def get_maes(R, mae, test_set, held_outs):
    R_test = np.copy(R)
    R_test[held_outs] = np.nan
    n_available = np.sum(R_test > 0, axis = 1)[test_set]
    d1_inds = n_available < 4
    d1_maes = mae[d1_inds]
    d2_inds = np.logical_and(n_available > 3, n_available < 10)
    d2_maes = mae[d2_inds]
    d3_inds = n_available > 9
    d3_maes = mae[d3_inds]
    return d1_maes, d2_maes, d3_maes, mae

def get_cumul_maes(R, maes, test_sets, held_outs):
    d1_maes = []
    d2_maes = []
    d3_maes = []
    all_maes = []
    for test_set, held_out, mae in zip(test_sets, held_outs, maes):
        d1_mae, d2_mae, d3_mae, all_mae = get_maes(R, mae, test_set, held_out)
        d1_maes.append(d1_mae)
        d2_maes.append(d2_mae)
        d3_maes.append(d3_mae)
        all_maes.append(all_mae)
    return np.hstack(d1_maes), np.hstack(d2_maes), np.hstack(d3_maes), np.hstack(all_maes)

d1_maes, d2_maes, d3_maes, all_maes = get_cumul_maes(paper.Rnan, maes, test_sets, held_outs)


/home/xyllan/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:4: RuntimeWarning: invalid value encountered in greater

In [12]:
print('D1 (extremely cold start) users:')
print('Num users:', d1_maes.shape[0])
print('Average MAE:',np.mean(d1_maes))

print('D2 (cold start) users:')
print('Num users:', d2_maes.shape[0])
print('Average MAE:',np.mean(d2_maes))

print('D3 (warm start) users:')
print('Num users:', d3_maes.shape[0])
print('Average MAE:',np.mean(d3_maes))

print('All users:')
print('Num users:', all_maes.shape[0])
print('Average MAE:',np.mean(all_maes))


D1 (extremely cold start) users:
Num users: 809
Average MAE: 0.857315445252
D2 (cold start) users:
Num users: 2117
Average MAE: 0.773490025629
D3 (warm start) users:
Num users: 3114
Average MAE: 0.698172525663
All users:
Num users: 6040
Average MAE: 0.745886725891

In [13]:
def k_avg(R, maes, test_sets, held_outs):
    n_users = np.zeros((n_folds,4), dtype = np.int)
    ms = np.zeros((n_folds,4))
    for i, (test_set, held_out, mae) in enumerate(zip(test_sets, held_outs, maes)):
        tpl = get_maes(R, mae, test_set, held_out)
        for j, m in enumerate(tpl):
            n_users[i,j] = m.shape[0]
            ms[i,j] = np.mean(m)
    n_users = np.mean(n_users, axis = 0)
    ms = np.mean(ms, axis = 0)
    return n_users, ms

k_avg(paper.Rnan, maes, test_sets, held_outs)


/home/xyllan/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:4: RuntimeWarning: invalid value encountered in greater
Out[13]:
(array([  202.25,   529.25,   778.5 ,  1510.  ]),
 array([ 0.85720252,  0.77322922,  0.69812563,  0.74588673]))