In [1]:
%load_ext autoreload
%autoreload 2
import time
import sys
import numpy as np
from scipy.sparse import issparse, csc_matrix, coo_matrix, lil_matrix, isspmatrix_coo, isspmatrix_csc
import cyipopt

In [2]:
settings0 = np.seterr(all='raise')

In [3]:
class NSR_Rank_Dual(cyipopt.problem):
    
    def __init__(self, X_train, Y_train, user_playlist_indices):
        """
        Initialisation and computing shared data.
            Input:
                - X: feature matrix, M x D
                - Y: label matrix,   M x N
        """
        assert isspmatrix_coo(Y_train)
        assert X_train.shape[0] == Y_train.shape[0]
        self.X = X_train
        self.Y = Y_train
        self.M, self.D = self.X.shape
        self.N = self.Y.shape[1]
        self.cliques = user_playlist_indices
        assert np.all(np.arange(self.N) == np.asarray(sorted([k for clq in self.cliques for k in clq])))
        self.U = len(self.cliques)
        self.preprocess()
        self.trained = False

    def init_var(self):
        """
            Initial feasible guess:
              y_n^i = 0: \theta_i^n = +0.001,                   
              y_m^i = 1: \theta_i^m = -0.001 * (M / M_i^+ - 1), 
            which satisfy constraint:
              \sum_m theta_i^m + \sum_n theta_i^n = 0, i=1...N
        """
        W0 = 0.001 * np.ones((self.N, self.M), dtype=np.float)
        row, col = self.pcol, self.prow  # NOTE: Y (M x N), W (N x M)
        W0[row, col] = -0.001 * ((self.M / self.Msum)[self.pcol] - 1)
        return W0.ravel()
    
    def preprocess(self):
        """
            Compute shared data
        """
        M, N = self.M, self.N
        self.EPS = 1e-8
        
        self.Msum = self.Y.tocsc().sum(axis=0).A.reshape(-1)  # number of songs per playlist
        self.P = np.log(self.N) + np.log(self.Msum)
        self.pl2u = np.zeros(self.N, dtype=np.int)
        for u in range(self.U):
            clq = self.cliques[u]
            self.pl2u[clq] = u
        self.u2pl = {i: clq for i, clq in enumerate(self.cliques)}
        
        self.prow = self.Y.row
        self.pcol = self.Y.col
        
        # sparse matrix to hold the jacobian structure
        self.js = lil_matrix((N, N * M), dtype=np.bool)  # N constraints in total
        Mvec = np.arange(M)
        for k in range(N):
            self.js[k, k * M + Mvec] = True
        self.js = self.js.tocoo()
        
        # Jacobian of constraints
        self.OneNM = np.ones(N * M, dtype=np.float)
    
    
    def fit(self, C):
        assert C > 0
        self.C = C   # regularisation constant
        M, N = self.M, self.N
        row, col = self.pcol, self.prow  # NOTE: Y (M x N), W (N x M)
        
        # constraints
        # -1e6 <= theta_i^m <= -EPS, y_m^i = 1
        #  EPS <= theta_i^n <= 1e6,  y_n^i = 0
        LB = np.full((N, M), self.EPS, dtype=np.float)
        LB[row, col] = -1e6
        UB = np.full((N, M), 1e6, dtype=np.float)
        UB[row, col] = -self.EPS
        
        super(NSR_Rank_Dual, self).__init__(
            n = N * M,        # number of variables
            m = N,            # number of constraints
            lb = LB.ravel(),  # lower bounds on variables
            ub = UB.ravel(),  # upper bounds on variables
            cl = np.zeros(N), # lower bounds on constraints: equality constraints
            cu = np.zeros(N)  # upper bounds on constraints: equality constraints
        )
  
        # Set solver options, https://www.coin-or.org/Ipopt/documentation/node51.html
        self.addOption(b'mu_strategy', b'adaptive')
        self.addOption(b'max_iter', int(1e4))
        self.addOption(b'tol', 1e-7)
        self.addOption(b'acceptable_tol', 1e-5)
        #self.addOption(b'derivative_test', b'first-order')
        #self.addOption(b'acceptable_constr_viol_tol', 1e-6)
        self.addOption(b'linear_solver', b'ma57')

        #try:
        w, info = super(NSR_Rank_Dual, self).solve(self.init_var())
        print(info['status'], info['status_msg'])
        self.trained = True
        self.dual2primal(w)
        #except:
        #    self.trained = False
        #    print('Optimisation failed')

    def predict(self, X_test):
        assert self.trained is True, 'Model has yet to be trained!'
        M, N, U, D = self.M, self.N, self.U, self.D
        assert X_test.shape[1] == D
        Y_pred = np.zeros((X_test.shape[0], N))
        for k in range(N):
            u = self.pl2u[k]
            vu = self.V[u, :]
            wk = self.WP[k, :]
            mu = self.mu
            Y_pred[:, k] = np.dot(X_test, vu + wk + mu)
        return Y_pred
  

    def dual2primal(self, w):
        M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
        X = self.X
        assert w.shape == (N * M,)
        # assert np.isclose(w.sum(), 0)   # check for one constraint
        print('max: %g, min: %g, sum: %g' % (w.max(), w.min(), w.sum()))
        W = w.reshape(N, M)
        self.V = np.zeros((U, D), dtype=np.float)
        self.WP = np.zeros((N, D), dtype=np.float)
        self.mu = -np.dot(W.sum(axis=0), X) / C
        for u in range(U):
            clq = self.u2pl[u]
            self.V[u, :] = -W[clq, :].sum(axis=0).dot(X) * U / C
        for k in range(N):
            self.WP[k, :] = -W[k, :].dot(X) * N / C
        
        
    def objective(self, w):
        """
            The callback for calculating the objective
        """
        # self.last_w[:] = w
        t0 = time.time()
        M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
        X = self.X
        assert w.shape == (N * M,)
        W = w.reshape(N, M)
        J1 = 0.
        for u in range(U):
            Tu = W[self.u2pl[u], :].sum(axis=0)
            J1 += Tu.dot(X).dot(X.T).dot(Tu)
            
        J2 = 0.
        for k in range(N):
            Tk = W[k, :]
            J2 += Tk.dot(X).dot(X.T).dot(Tk)
        
        Tmu = W.sum(axis=0)
        J3 = Tmu.dot(X).dot(X.T).dot(Tmu)
        
        row, col = self.pcol, self.prow  # NOTE: Y (M x N), W (N x M)
        Tp = W[row, col]
        J4 = np.dot(Tp, 1 - self.P[self.pcol] - np.log(-Tp))
            
        J = (U * J1 + N * J2 + J3) * 0.5 / C + J4
        
        #print('Eval f(): %g, %.1f seconds' % (J, time.time() - t0))
            
        return J
        

    def gradient(self, w):
        """
            The callback for calculating the gradient
        """
        t0 = time.time()
        M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
        X = self.X
        assert w.shape == (N * M,)
        W = w.reshape(N, M)
        dW = np.zeros((N, M), dtype=np.float)
        Wsum = W.sum(axis=0)
        for k in range(N):
            u = self.pl2u[k]
            clq = self.u2pl[u]
            T = U * W[clq, :].sum(axis=0) + N * W[k, :] + Wsum
            dW[k, :] = np.dot(X, X.T.dot(T)) / C
        
        row, col = self.pcol, self.prow  # NOTE: Y (M x N), W (N x M)
        Tp = W[row, col]
        dW[row, col] -= self.P[self.pcol] + np.log(-Tp)
        g = dW.ravel()
        #print('Eval g(): |g|=%g, %.1f seconds' % (np.sqrt(np.dot(g, g)), time.time() - t0))
        return g
    

    def constraints(self, w):
        """
            The callback for calculating the constraints
        """
        M, N = self.M, self.N
        assert w.shape == (N * M,)
        W = w.reshape(N, M)
        return W.sum(axis=1)
    
    
    def jacobianstructure(self):
        """
            The sparse structure (i.e., rows, cols) of the Jacobian matrix
        """
        return self.js.row, self.js.col
        
    
    def jacobian(self, w):
        """
            The callback for calculating the Jacobian of constraints
        """
        return self.OneNM
    
        
#     def intermediate(
#             self,
#             alg_mod,
#             iter_count,
#             obj_value,
#             inf_pr,
#             inf_du,
#             mu,
#             d_norm,
#             regularization_size,
#             alpha_du,
#             alpha_pr,
#             ls_trials
#         ):
#         """
#             The intermediate callback
#         """
#         print('Iter %5d: %20.6f' % (iter_count, obj_value))

In [4]:
import os, sys
import gzip
import pickle as pkl
from scipy.optimize import check_grad
#datasets = ['aotm2011', '30music']
sys.path.append('src')
from tools import create_dataset, dataset_names, nLabels_dict

In [5]:
dix = 2

In [6]:
# dataset_name = datasets[dix]
# data_dir = 'data/%s/setting1' % dataset_name
# fxtrain = os.path.join(data_dir, 'X_train.pkl.gz')
# fytrain = os.path.join(data_dir, 'Y_train.pkl.gz')
# fcliques = os.path.join(data_dir, 'cliques_train.pkl.gz')
# X_train = pkl.load(gzip.open(fxtrain, 'rb'))
# Y_train = pkl.load(gzip.open(fytrain, 'rb'))
# cliques = pkl.load(gzip.open(fcliques, 'rb'))

In [7]:
dataset_name = dataset_names[dix]
nLabels = nLabels_dict[dataset_name]
print(dataset_name, nLabels)


bibtex 159

In [8]:
X_train, Y_train = create_dataset(dataset_name, train_data=True)
X_test,  Y_test  = create_dataset(dataset_name, train_data=False)

In [9]:
X_train_mean = np.mean(X_train, axis=0).reshape((1, -1))
X_train_std = np.std(X_train, axis=0).reshape((1, -1)) + 10 ** (-6)
X_train -= X_train_mean
X_train /= X_train_std
X_test  -= X_train_mean
X_test  /= X_train_std

In [10]:
def print_dataset_info(X_train, Y_train, X_test, Y_test):
    N_train, D = X_train.shape
    K = Y_train.shape[1]
    N_test = X_test.shape[0]
    print('%-45s %s' % ('Number of training examples:', '{:,}'.format(N_train)))
    print('%-45s %s' % ('Number of test examples:', '{:,}'.format(N_test)))
    print('%-45s %s' % ('Number of features:', '{:,}'.format(D)))
    print('%-45s %s' % ('Number of labels:', '{:,}'.format(K)))

In [11]:
print('%-45s %s' % ('Dataset:', dataset_name))
print_dataset_info(X_train, Y_train, X_test, Y_test)


Dataset:                                      bibtex
Number of training examples:                  4,880
Number of test examples:                      2,515
Number of features:                           1,836
Number of labels:                             159

In [12]:
def check_grad_loop(obj, grad, w0):
    eps = 1.49e-08
    w = np.zeros_like(w0)
    for i in range(len(w0)):
        if (i+1) % 1000 == 0:
            sys.stdout.write('\r%d / %d' % (i+1, len(w0)))
        wi1 = w0.copy()
        wi2 = w0.copy()
        wi1[i] = wi1[i] - eps
        wi2[i] = wi2[i] + eps
        J1 = obj(wi1)
        J2 = obj(wi2)
        w[i] = (J2 - J1) / (2 * eps)
        #print(w[i])
    w1 = grad(w0)
    diff = w1 - w
    return np.sqrt(np.dot(diff, diff))

Solve.


In [13]:
#cliques = [[0], [1], [2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13]]
cliques = [np.arange(Y_train.shape[1])]
model = NSR_Rank_Dual(X_train, coo_matrix(Y_train), cliques)

In [14]:
w0 = model.init_var()
model.C = 1
#%lprun -f accumulate_risk \
#%lprun -f objective \
#check_grad(model.objective, model.gradient, w0)

In [15]:
#check_grad_loop(model.objective, model.gradient, w0)

In [16]:
model.fit(C=1)


1 b'Algorithm stopped at a point that was converged, not to "desired" tolerances, but to "acceptable" tolerances (see the acceptable-... options).'
max: 0.000582671, min: -0.000219973, sum: 9.65545e-09

In [17]:
model.trained


Out[17]:
True

In [18]:
Y_pred = model.predict(X_test)

In [19]:
from tools import calc_RPrecision_HitRate
def calc_RP(Y_true, Y_pred):
    assert Y_true.shape == Y_pred.shape
    rps = []
    for j in range(Y_true.shape[1]):
        y_true = Y_true[:, j]
        y_pred = Y_pred[:, j]
        rp, _ = calc_RPrecision_HitRate(y_true, y_pred)
        rps.append(rp)
    return rps

In [20]:
rps = calc_RP(Y_test, Y_pred)

In [21]:
np.mean(rps)


Out[21]:
0.384380044412107