In [ ]:
%load_ext autoreload
%autoreload 2
import time
import sys
import numpy as np
from scipy.sparse import coo_matrix, isspmatrix_coo
from scipy.optimize import minimize

In [ ]:
settings0 = np.seterr(all='raise')

In [ ]:
class NSR_Dual_Aug(object):
    
    def __init__(self, X_train, Y_train, user_playlist_indices):
        """
        Initialisation and computing shared data.
            Input:
                - X: feature matrix, M x D
                - Y: label matrix,   M x N
        """
        assert isspmatrix_coo(Y_train)
        assert Y_train.dtype == np.bool
        assert X_train.shape[0] == Y_train.shape[0]
        self.X = X_train
        self.Y = Y_train
        self.M, self.D = self.X.shape
        self.N = self.Y.shape[1]
        self.cliques = user_playlist_indices
        assert np.all(np.arange(self.N) == np.asarray(sorted([k for clq in self.cliques for k in clq])))
        self.U = len(self.cliques)
        self.preprocess()
        self.trained = False

    def init_var(self):
        """
            Initial feasible guess:
              y_n^i = 0: \theta_i^n = +0.001,                   
              y_m^i = 1: \theta_i^m = -0.001 * (M / M_i^+ - 1), 
            which satisfy constraint:
              \sum_m theta_i^m + \sum_n theta_i^n = 0, i=1...N
        """
        W0 = 0.001 * np.ones((self.N, self.M), dtype=np.float)
        row, col = self.pcol, self.prow  # NOTE: Y (M x N), W (N x M)
        W0[row, col] = -0.001 * ((self.M / self.Msum)[self.pcol] - 1)
        #print(W0[row, col].tolist())
        return W0.ravel()
    
    def preprocess(self):
        """
            Compute shared data
        """
        M, N = self.M, self.N
        self.EPS = 1e-8
        self.BIG = 1e08
        
        self.Msum = self.Y.tocsc().sum(axis=0).A.reshape(-1)  # number of songs per playlist
        self.P = np.log(self.N) + np.log(self.Msum)
        self.pl2u = np.zeros(self.N, dtype=np.int)
        for u in range(self.U):
            clq = self.cliques[u]
            self.pl2u[clq] = u
        self.u2pl = {i: clq for i, clq in enumerate(self.cliques)}
        
        self.prow = self.Y.row
        self.pcol = self.Y.col
        
        self.ncalls = 0
        
    
    def fit(self, C):
        assert C > 0
        self.C = C   # regularisation constant
        M, N = self.M, self.N
        
        # constraints
        # theta_i^m <= -EPS, y_m^i = 1
        # theta_i^n >= +EPS, y_n^i = 0
        Y = self.Y.tocsc()
        self.bnds = [(None, -self.EPS) if Y[m, i] is True else (self.EPS, None) \
                for m in range(M) for i in range(N)]
#         ret = minimize(self.objective, x0=self.init_var(), args=(), method='L-BFGS-B', jac=self.gradient, 
#                        bounds=bnds, callback=None)
#         if ret.success is True:
#             self.trained = True
#             self.dual2primal(ret.x)
#         else:
#             self.trained = False
#             sys.stderr.write('Optimisation failed: %s' % ret.message)
            

    def predict(self, X_test):
        assert self.trained is True, 'Model has yet to be trained!'
        M, N, U, D = self.M, self.N, self.U, self.D
        assert X_test.shape[1] == D
        Y_pred = np.zeros((X_test.shape[0], N))
        for k in range(N):
            u = self.pl2u[k]
            vu = self.V[u, :]
            wk = self.WP[k, :]
            mu = self.mu
            Y_pred[:, k] = np.dot(X_test, vu + wk + mu)
        return Y_pred
  

    def dual2primal(self, w):
        M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
        X = self.X
        assert w.shape == (N * M,)
        # assert np.isclose(w.sum(), 0)   # check for one constraint
        print('max: %g, min: %g, sum: %g' % (w.max(), w.min(), w.sum()))
        W = w.reshape(N, M)
        self.V = np.zeros((U, D), dtype=np.float)
        self.WP = np.zeros((N, D), dtype=np.float)
        self.mu = -np.dot(W.sum(axis=0), X) / C
        for u in range(U):
            clq = self.u2pl[u]
            self.V[u, :] = -W[clq, :].sum(axis=0).dot(X) * U / C
        for k in range(N):
            self.WP[k, :] = -W[k, :].dot(X) * N / C
        
        
    def objective(self, w):
        """
            The callback for calculating the objective
        """
        self.ncalls += 1
        sys.stdout.write('\r%d' % self.ncalls)
        
        t0 = time.time()
        M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
        X = self.X
        assert w.shape == (N * M,)
        W = w.reshape(N, M)
        #print(W[self.pcol, self.prow])
        J1 = 0.
        for u in range(U):
            Tu = W[self.u2pl[u], :].sum(axis=0)
            J1 += Tu.dot(X).dot(X.T).dot(Tu)
            
        J2 = 0.
        for k in range(N):
            Tk = W[k, :]
            J2 += Tk.dot(X).dot(X.T).dot(Tk)
        
        Tmu = W.sum(axis=0)
        J3 = Tmu.dot(X).dot(X.T).dot(Tmu)
        
        row, col = self.pcol, self.prow  # NOTE: Y (M x N), W (N x M)
        Tp = W[row, col]
        #print(Tp.tolist())
        J4 = np.dot(Tp, 1 - self.P[self.pcol] - np.log(-Tp))
        
        Tc = W.sum(axis=1)
        J5 = np.dot(Tc, Tc)
            
        J = (U * J1 + N * J2 + J3) * 0.5 / C + J4 + self.BIG * J5
        
        #print('Eval f(): %g, %.1f seconds' % (J, time.time() - t0))
            
        return J
        

    def gradient(self, w):
        """
            The callback for calculating the gradient
        """
        t0 = time.time()
        M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
        X = self.X
        assert w.shape == (N * M,)
        W = w.reshape(N, M)
        dW = np.zeros((N, M), dtype=np.float)
        Wsum = W.sum(axis=0)
        for k in range(N):
            u = self.pl2u[k]
            clq = self.u2pl[u]
            T = U * W[clq, :].sum(axis=0) + N * W[k, :] + Wsum
            dW[k, :] = np.dot(X, X.T.dot(T)) / C
        
        row, col = self.pcol, self.prow  # NOTE: Y (M x N), W (N x M)
        Tp = W[row, col]
        dW[row, col] -= self.P[self.pcol] + np.log(-Tp)
        
        for k in range(N):
            dW[k, :] += 2 * self.BIG * W[k, :].sum()
            
        g = dW.ravel()
        #print('Eval g(): |g|=%g, %.1f seconds' % (np.sqrt(np.dot(g, g)), time.time() - t0))
        return g

In [ ]:
import os, sys
import gzip
import pickle as pkl
from scipy.optimize import check_grad
datasets = ['aotm2011', '30music']
sys.path.append('src')
from tools import create_dataset, dataset_names, nLabels_dict

In [ ]:
dix = 0

In [ ]:
# dataset_name = datasets[dix]
# data_dir = 'data/%s/setting1' % dataset_name
# fxtrain = os.path.join(data_dir, 'X_train.pkl.gz')
# fytrain = os.path.join(data_dir, 'Y_train.pkl.gz')
# fcliques = os.path.join(data_dir, 'cliques_train.pkl.gz')
# X_train = pkl.load(gzip.open(fxtrain, 'rb'))
# Y_train = pkl.load(gzip.open(fytrain, 'rb'))
# cliques = pkl.load(gzip.open(fcliques, 'rb'))

In [ ]:
dataset_name = dataset_names[dix]
nLabels = nLabels_dict[dataset_name]
print(dataset_name, nLabels)

In [ ]:
X_train, Y_train = create_dataset(dataset_name, train_data=True)
X_test,  Y_test  = create_dataset(dataset_name, train_data=False)

In [ ]:
X_train_mean = np.mean(X_train, axis=0).reshape((1, -1))
X_train_std = np.std(X_train, axis=0).reshape((1, -1)) + 10 ** (-6)
X_train -= X_train_mean
X_train /= X_train_std
X_test  -= X_train_mean
X_test  /= X_train_std

In [ ]:
def print_dataset_info(X_train, Y_train, X_test, Y_test):
    N_train, D = X_train.shape
    K = Y_train.shape[1]
    N_test = X_test.shape[0]
    print('%-45s %s' % ('Number of training examples:', '{:,}'.format(N_train)))
    print('%-45s %s' % ('Number of test examples:', '{:,}'.format(N_test)))
    print('%-45s %s' % ('Number of features:', '{:,}'.format(D)))
    print('%-45s %s' % ('Number of labels:', '{:,}'.format(K)))

In [ ]:
print('%-45s %s' % ('Dataset:', dataset_name))
print_dataset_info(X_train, Y_train, X_test, Y_test)

In [ ]:
def check_grad_loop(obj, grad, w0):
    eps = 1.49e-08
    w = np.zeros_like(w0)
    for i in range(len(w0)):
        if (i+1) % 1000 == 0:
            sys.stdout.write('\r%d / %d' % (i+1, len(w0)))
        wi1 = w0.copy()
        wi2 = w0.copy()
        wi1[i] = wi1[i] - eps
        wi2[i] = wi2[i] + eps
        J1 = obj(wi1)
        J2 = obj(wi2)
        w[i] = (J2 - J1) / (2 * eps)
        #print(w[i])
    w1 = grad(w0)
    diff = w1 - w
    return np.sqrt(np.dot(diff, diff))

Solve.


In [ ]:
#cliques = [[0], [1], [2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13]]
cliques = [np.arange(Y_train.shape[1])]
model = NSR_Dual_Aug(X_train, coo_matrix(Y_train), cliques)

In [ ]:
#w0 = model.init_var()
#model.C = 1
#%lprun -f accumulate_risk \
#%lprun -f objective \
#check_grad(model.objective, model.gradient, w0)

In [ ]:
#check_grad_loop(model.objective, model.gradient, w0)

In [ ]:
model.fit(C=1)
ret = minimize(model.objective, x0=model.init_var(), method='L-BFGS-B', jac=model.gradient, 
               bounds=model.bnds, callback=None)

In [ ]:
model.trained

In [ ]:
Y_pred = model.predict(X_test)

In [ ]:
from tools import calc_RPrecision_HitRate
def calc_RP(Y_true, Y_pred):
    assert Y_true.shape == Y_pred.shape
    rps = []
    for j in range(Y_true.shape[1]):
        y_true = Y_true[:, j]
        y_pred = Y_pred[:, j]
        rp, _ = calc_RPrecision_HitRate(y_true, y_pred)
        rps.append(rp)
    return rps

In [ ]:
rps = calc_RP(Y_test, Y_pred)

In [ ]:
np.mean(rps)