In [1]:
%load_ext autoreload
%autoreload 2
import time
import sys
import numpy as np
from scipy.sparse import issparse, csc_matrix, coo_matrix, lil_matrix, isspmatrix_coo, isspmatrix_csc
import cyipopt
In [2]:
settings0 = np.seterr(all='raise')
In [3]:
class NSR_Rank_Dual(cyipopt.problem):
def __init__(self, X_train, Y_train, user_playlist_indices):
"""
Initialisation and computing shared data.
Input:
- X: feature matrix, M x D
- Y: label matrix, M x N
"""
assert isspmatrix_coo(Y_train)
assert X_train.shape[0] == Y_train.shape[0]
self.X = X_train
self.Y = Y_train
self.M, self.D = self.X.shape
self.N = self.Y.shape[1]
self.cliques = user_playlist_indices
assert np.all(np.arange(self.N) == np.asarray(sorted([k for clq in self.cliques for k in clq])))
self.U = len(self.cliques)
self.preprocess()
self.trained = False
def init_var(self):
"""
Initial feasible guess:
y_n^i = 0: \theta_i^n = +0.001,
y_m^i = 1: \theta_i^m = -0.001 * (M / M_i^+ - 1),
which satisfy constraint:
\sum_m theta_i^m + \sum_n theta_i^n = 0, i=1...N
"""
W0 = 0.001 * np.ones((self.N, self.M), dtype=np.float)
row, col = self.pcol, self.prow # NOTE: Y (M x N), W (N x M)
W0[row, col] = -0.001 * ((self.M / self.Msum)[self.pcol] - 1)
return W0.ravel()
def preprocess(self):
"""
Compute shared data
"""
M, N = self.M, self.N
self.EPS = 1e-8
self.Msum = self.Y.tocsc().sum(axis=0).A.reshape(-1) # number of songs per playlist
self.P = np.log(self.N) + np.log(self.Msum)
self.pl2u = np.zeros(self.N, dtype=np.int)
for u in range(self.U):
clq = self.cliques[u]
self.pl2u[clq] = u
self.u2pl = {i: clq for i, clq in enumerate(self.cliques)}
self.prow = self.Y.row
self.pcol = self.Y.col
# sparse matrix to hold the jacobian structure
self.js = lil_matrix((N, N * M), dtype=np.bool) # N constraints in total
Mvec = np.arange(M)
for k in range(N):
self.js[k, k * M + Mvec] = True
self.js = self.js.tocoo()
# Jacobian of constraints
self.OneNM = np.ones(N * M, dtype=np.float)
def fit(self, C):
assert C > 0
self.C = C # regularisation constant
M, N = self.M, self.N
row, col = self.pcol, self.prow # NOTE: Y (M x N), W (N x M)
# constraints
# -1e6 <= theta_i^m <= -EPS, y_m^i = 1
# EPS <= theta_i^n <= 1e6, y_n^i = 0
LB = np.full((N, M), self.EPS, dtype=np.float)
LB[row, col] = -1e6
UB = np.full((N, M), 1e6, dtype=np.float)
UB[row, col] = -self.EPS
super(NSR_Rank_Dual, self).__init__(
n = N * M, # number of variables
m = N, # number of constraints
lb = LB.ravel(), # lower bounds on variables
ub = UB.ravel(), # upper bounds on variables
cl = np.zeros(N), # lower bounds on constraints: equality constraints
cu = np.zeros(N) # upper bounds on constraints: equality constraints
)
# Set solver options, https://www.coin-or.org/Ipopt/documentation/node51.html
self.addOption(b'mu_strategy', b'adaptive')
self.addOption(b'max_iter', int(1e4))
self.addOption(b'tol', 1e-7)
self.addOption(b'acceptable_tol', 1e-5)
#self.addOption(b'derivative_test', b'first-order')
#self.addOption(b'acceptable_constr_viol_tol', 1e-6)
self.addOption(b'linear_solver', b'ma57')
#try:
w, info = super(NSR_Rank_Dual, self).solve(self.init_var())
print(info['status'], info['status_msg'])
self.trained = True
self.dual2primal(w)
#except:
# self.trained = False
# print('Optimisation failed')
def predict(self, X_test):
assert self.trained is True, 'Model has yet to be trained!'
M, N, U, D = self.M, self.N, self.U, self.D
assert X_test.shape[1] == D
Y_pred = np.zeros((X_test.shape[0], N))
for k in range(N):
u = self.pl2u[k]
vu = self.V[u, :]
wk = self.WP[k, :]
mu = self.mu
Y_pred[:, k] = np.dot(X_test, vu + wk + mu)
return Y_pred
def dual2primal(self, w):
M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
X = self.X
assert w.shape == (N * M,)
# assert np.isclose(w.sum(), 0) # check for one constraint
print('max: %g, min: %g, sum: %g' % (w.max(), w.min(), w.sum()))
W = w.reshape(N, M)
self.V = np.zeros((U, D), dtype=np.float)
self.WP = np.zeros((N, D), dtype=np.float)
self.mu = -np.dot(W.sum(axis=0), X) / C
for u in range(U):
clq = self.u2pl[u]
self.V[u, :] = -W[clq, :].sum(axis=0).dot(X) * U / C
for k in range(N):
self.WP[k, :] = -W[k, :].dot(X) * N / C
def objective(self, w):
"""
The callback for calculating the objective
"""
# self.last_w[:] = w
t0 = time.time()
M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
X = self.X
assert w.shape == (N * M,)
W = w.reshape(N, M)
J1 = 0.
for u in range(U):
Tu = W[self.u2pl[u], :].sum(axis=0)
J1 += Tu.dot(X).dot(X.T).dot(Tu)
J2 = 0.
for k in range(N):
Tk = W[k, :]
J2 += Tk.dot(X).dot(X.T).dot(Tk)
Tmu = W.sum(axis=0)
J3 = Tmu.dot(X).dot(X.T).dot(Tmu)
row, col = self.pcol, self.prow # NOTE: Y (M x N), W (N x M)
Tp = W[row, col]
J4 = np.dot(Tp, 1 - self.P[self.pcol] - np.log(-Tp))
J = (U * J1 + N * J2 + J3) * 0.5 / C + J4
#print('Eval f(): %g, %.1f seconds' % (J, time.time() - t0))
return J
def gradient(self, w):
"""
The callback for calculating the gradient
"""
t0 = time.time()
M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
X = self.X
assert w.shape == (N * M,)
W = w.reshape(N, M)
dW = np.zeros((N, M), dtype=np.float)
Wsum = W.sum(axis=0)
for k in range(N):
u = self.pl2u[k]
clq = self.u2pl[u]
T = U * W[clq, :].sum(axis=0) + N * W[k, :] + Wsum
dW[k, :] = np.dot(X, X.T.dot(T)) / C
row, col = self.pcol, self.prow # NOTE: Y (M x N), W (N x M)
Tp = W[row, col]
dW[row, col] -= self.P[self.pcol] + np.log(-Tp)
g = dW.ravel()
#print('Eval g(): |g|=%g, %.1f seconds' % (np.sqrt(np.dot(g, g)), time.time() - t0))
return g
def constraints(self, w):
"""
The callback for calculating the constraints
"""
M, N = self.M, self.N
assert w.shape == (N * M,)
W = w.reshape(N, M)
return W.sum(axis=1)
def jacobianstructure(self):
"""
The sparse structure (i.e., rows, cols) of the Jacobian matrix
"""
return self.js.row, self.js.col
def jacobian(self, w):
"""
The callback for calculating the Jacobian of constraints
"""
return self.OneNM
# def intermediate(
# self,
# alg_mod,
# iter_count,
# obj_value,
# inf_pr,
# inf_du,
# mu,
# d_norm,
# regularization_size,
# alpha_du,
# alpha_pr,
# ls_trials
# ):
# """
# The intermediate callback
# """
# print('Iter %5d: %20.6f' % (iter_count, obj_value))
In [4]:
import os, sys
import gzip
import pickle as pkl
from scipy.optimize import check_grad
#datasets = ['aotm2011', '30music']
sys.path.append('src')
from tools import create_dataset, dataset_names, nLabels_dict
In [5]:
dix = 2
In [6]:
# dataset_name = datasets[dix]
# data_dir = 'data/%s/setting1' % dataset_name
# fxtrain = os.path.join(data_dir, 'X_train.pkl.gz')
# fytrain = os.path.join(data_dir, 'Y_train.pkl.gz')
# fcliques = os.path.join(data_dir, 'cliques_train.pkl.gz')
# X_train = pkl.load(gzip.open(fxtrain, 'rb'))
# Y_train = pkl.load(gzip.open(fytrain, 'rb'))
# cliques = pkl.load(gzip.open(fcliques, 'rb'))
In [7]:
dataset_name = dataset_names[dix]
nLabels = nLabels_dict[dataset_name]
print(dataset_name, nLabels)
In [8]:
X_train, Y_train = create_dataset(dataset_name, train_data=True)
X_test, Y_test = create_dataset(dataset_name, train_data=False)
In [9]:
X_train_mean = np.mean(X_train, axis=0).reshape((1, -1))
X_train_std = np.std(X_train, axis=0).reshape((1, -1)) + 10 ** (-6)
X_train -= X_train_mean
X_train /= X_train_std
X_test -= X_train_mean
X_test /= X_train_std
In [10]:
def print_dataset_info(X_train, Y_train, X_test, Y_test):
N_train, D = X_train.shape
K = Y_train.shape[1]
N_test = X_test.shape[0]
print('%-45s %s' % ('Number of training examples:', '{:,}'.format(N_train)))
print('%-45s %s' % ('Number of test examples:', '{:,}'.format(N_test)))
print('%-45s %s' % ('Number of features:', '{:,}'.format(D)))
print('%-45s %s' % ('Number of labels:', '{:,}'.format(K)))
In [11]:
print('%-45s %s' % ('Dataset:', dataset_name))
print_dataset_info(X_train, Y_train, X_test, Y_test)
In [12]:
def check_grad_loop(obj, grad, w0):
eps = 1.49e-08
w = np.zeros_like(w0)
for i in range(len(w0)):
if (i+1) % 1000 == 0:
sys.stdout.write('\r%d / %d' % (i+1, len(w0)))
wi1 = w0.copy()
wi2 = w0.copy()
wi1[i] = wi1[i] - eps
wi2[i] = wi2[i] + eps
J1 = obj(wi1)
J2 = obj(wi2)
w[i] = (J2 - J1) / (2 * eps)
#print(w[i])
w1 = grad(w0)
diff = w1 - w
return np.sqrt(np.dot(diff, diff))
Solve.
In [13]:
#cliques = [[0], [1], [2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13]]
cliques = [np.arange(Y_train.shape[1])]
model = NSR_Rank_Dual(X_train, coo_matrix(Y_train), cliques)
In [14]:
w0 = model.init_var()
model.C = 1
#%lprun -f accumulate_risk \
#%lprun -f objective \
#check_grad(model.objective, model.gradient, w0)
In [15]:
#check_grad_loop(model.objective, model.gradient, w0)
In [16]:
model.fit(C=1)
In [17]:
model.trained
Out[17]:
In [18]:
Y_pred = model.predict(X_test)
In [19]:
from tools import calc_RPrecision_HitRate
def calc_RP(Y_true, Y_pred):
assert Y_true.shape == Y_pred.shape
rps = []
for j in range(Y_true.shape[1]):
y_true = Y_true[:, j]
y_pred = Y_pred[:, j]
rp, _ = calc_RPrecision_HitRate(y_true, y_pred)
rps.append(rp)
return rps
In [20]:
rps = calc_RP(Y_test, Y_pred)
In [21]:
np.mean(rps)
Out[21]: