In [ ]:
%load_ext autoreload
%autoreload 2
import time
import sys
import numpy as np
from scipy.sparse import coo_matrix, isspmatrix_coo
from scipy.optimize import minimize
In [ ]:
settings0 = np.seterr(all='raise')
In [ ]:
class NSR_Dual_Aug(object):
def __init__(self, X_train, Y_train, user_playlist_indices):
"""
Initialisation and computing shared data.
Input:
- X: feature matrix, M x D
- Y: label matrix, M x N
"""
assert isspmatrix_coo(Y_train)
assert Y_train.dtype == np.bool
assert X_train.shape[0] == Y_train.shape[0]
self.X = X_train
self.Y = Y_train
self.M, self.D = self.X.shape
self.N = self.Y.shape[1]
self.cliques = user_playlist_indices
assert np.all(np.arange(self.N) == np.asarray(sorted([k for clq in self.cliques for k in clq])))
self.U = len(self.cliques)
self.preprocess()
self.trained = False
def init_var(self):
"""
Initial feasible guess:
y_n^i = 0: \theta_i^n = +0.001,
y_m^i = 1: \theta_i^m = -0.001 * (M / M_i^+ - 1),
which satisfy constraint:
\sum_m theta_i^m + \sum_n theta_i^n = 0, i=1...N
"""
W0 = 0.001 * np.ones((self.N, self.M), dtype=np.float)
row, col = self.pcol, self.prow # NOTE: Y (M x N), W (N x M)
W0[row, col] = -0.001 * ((self.M / self.Msum)[self.pcol] - 1)
#print(W0[row, col].tolist())
return W0.ravel()
def preprocess(self):
"""
Compute shared data
"""
M, N = self.M, self.N
self.EPS = 1e-8
self.BIG = 1e08
self.Msum = self.Y.tocsc().sum(axis=0).A.reshape(-1) # number of songs per playlist
self.P = np.log(self.N) + np.log(self.Msum)
self.pl2u = np.zeros(self.N, dtype=np.int)
for u in range(self.U):
clq = self.cliques[u]
self.pl2u[clq] = u
self.u2pl = {i: clq for i, clq in enumerate(self.cliques)}
self.prow = self.Y.row
self.pcol = self.Y.col
self.ncalls = 0
def fit(self, C):
assert C > 0
self.C = C # regularisation constant
M, N = self.M, self.N
# constraints
# theta_i^m <= -EPS, y_m^i = 1
# theta_i^n >= +EPS, y_n^i = 0
Y = self.Y.tocsc()
self.bnds = [(None, -self.EPS) if Y[m, i] is True else (self.EPS, None) \
for m in range(M) for i in range(N)]
# ret = minimize(self.objective, x0=self.init_var(), args=(), method='L-BFGS-B', jac=self.gradient,
# bounds=bnds, callback=None)
# if ret.success is True:
# self.trained = True
# self.dual2primal(ret.x)
# else:
# self.trained = False
# sys.stderr.write('Optimisation failed: %s' % ret.message)
def predict(self, X_test):
assert self.trained is True, 'Model has yet to be trained!'
M, N, U, D = self.M, self.N, self.U, self.D
assert X_test.shape[1] == D
Y_pred = np.zeros((X_test.shape[0], N))
for k in range(N):
u = self.pl2u[k]
vu = self.V[u, :]
wk = self.WP[k, :]
mu = self.mu
Y_pred[:, k] = np.dot(X_test, vu + wk + mu)
return Y_pred
def dual2primal(self, w):
M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
X = self.X
assert w.shape == (N * M,)
# assert np.isclose(w.sum(), 0) # check for one constraint
print('max: %g, min: %g, sum: %g' % (w.max(), w.min(), w.sum()))
W = w.reshape(N, M)
self.V = np.zeros((U, D), dtype=np.float)
self.WP = np.zeros((N, D), dtype=np.float)
self.mu = -np.dot(W.sum(axis=0), X) / C
for u in range(U):
clq = self.u2pl[u]
self.V[u, :] = -W[clq, :].sum(axis=0).dot(X) * U / C
for k in range(N):
self.WP[k, :] = -W[k, :].dot(X) * N / C
def objective(self, w):
"""
The callback for calculating the objective
"""
self.ncalls += 1
sys.stdout.write('\r%d' % self.ncalls)
t0 = time.time()
M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
X = self.X
assert w.shape == (N * M,)
W = w.reshape(N, M)
#print(W[self.pcol, self.prow])
J1 = 0.
for u in range(U):
Tu = W[self.u2pl[u], :].sum(axis=0)
J1 += Tu.dot(X).dot(X.T).dot(Tu)
J2 = 0.
for k in range(N):
Tk = W[k, :]
J2 += Tk.dot(X).dot(X.T).dot(Tk)
Tmu = W.sum(axis=0)
J3 = Tmu.dot(X).dot(X.T).dot(Tmu)
row, col = self.pcol, self.prow # NOTE: Y (M x N), W (N x M)
Tp = W[row, col]
#print(Tp.tolist())
J4 = np.dot(Tp, 1 - self.P[self.pcol] - np.log(-Tp))
Tc = W.sum(axis=1)
J5 = np.dot(Tc, Tc)
J = (U * J1 + N * J2 + J3) * 0.5 / C + J4 + self.BIG * J5
#print('Eval f(): %g, %.1f seconds' % (J, time.time() - t0))
return J
def gradient(self, w):
"""
The callback for calculating the gradient
"""
t0 = time.time()
M, N, D, U, C = self.M, self.N, self.D, self.U, self.C
X = self.X
assert w.shape == (N * M,)
W = w.reshape(N, M)
dW = np.zeros((N, M), dtype=np.float)
Wsum = W.sum(axis=0)
for k in range(N):
u = self.pl2u[k]
clq = self.u2pl[u]
T = U * W[clq, :].sum(axis=0) + N * W[k, :] + Wsum
dW[k, :] = np.dot(X, X.T.dot(T)) / C
row, col = self.pcol, self.prow # NOTE: Y (M x N), W (N x M)
Tp = W[row, col]
dW[row, col] -= self.P[self.pcol] + np.log(-Tp)
for k in range(N):
dW[k, :] += 2 * self.BIG * W[k, :].sum()
g = dW.ravel()
#print('Eval g(): |g|=%g, %.1f seconds' % (np.sqrt(np.dot(g, g)), time.time() - t0))
return g
In [ ]:
import os, sys
import gzip
import pickle as pkl
from scipy.optimize import check_grad
datasets = ['aotm2011', '30music']
sys.path.append('src')
from tools import create_dataset, dataset_names, nLabels_dict
In [ ]:
dix = 0
In [ ]:
# dataset_name = datasets[dix]
# data_dir = 'data/%s/setting1' % dataset_name
# fxtrain = os.path.join(data_dir, 'X_train.pkl.gz')
# fytrain = os.path.join(data_dir, 'Y_train.pkl.gz')
# fcliques = os.path.join(data_dir, 'cliques_train.pkl.gz')
# X_train = pkl.load(gzip.open(fxtrain, 'rb'))
# Y_train = pkl.load(gzip.open(fytrain, 'rb'))
# cliques = pkl.load(gzip.open(fcliques, 'rb'))
In [ ]:
dataset_name = dataset_names[dix]
nLabels = nLabels_dict[dataset_name]
print(dataset_name, nLabels)
In [ ]:
X_train, Y_train = create_dataset(dataset_name, train_data=True)
X_test, Y_test = create_dataset(dataset_name, train_data=False)
In [ ]:
X_train_mean = np.mean(X_train, axis=0).reshape((1, -1))
X_train_std = np.std(X_train, axis=0).reshape((1, -1)) + 10 ** (-6)
X_train -= X_train_mean
X_train /= X_train_std
X_test -= X_train_mean
X_test /= X_train_std
In [ ]:
def print_dataset_info(X_train, Y_train, X_test, Y_test):
N_train, D = X_train.shape
K = Y_train.shape[1]
N_test = X_test.shape[0]
print('%-45s %s' % ('Number of training examples:', '{:,}'.format(N_train)))
print('%-45s %s' % ('Number of test examples:', '{:,}'.format(N_test)))
print('%-45s %s' % ('Number of features:', '{:,}'.format(D)))
print('%-45s %s' % ('Number of labels:', '{:,}'.format(K)))
In [ ]:
print('%-45s %s' % ('Dataset:', dataset_name))
print_dataset_info(X_train, Y_train, X_test, Y_test)
In [ ]:
def check_grad_loop(obj, grad, w0):
eps = 1.49e-08
w = np.zeros_like(w0)
for i in range(len(w0)):
if (i+1) % 1000 == 0:
sys.stdout.write('\r%d / %d' % (i+1, len(w0)))
wi1 = w0.copy()
wi2 = w0.copy()
wi1[i] = wi1[i] - eps
wi2[i] = wi2[i] + eps
J1 = obj(wi1)
J2 = obj(wi2)
w[i] = (J2 - J1) / (2 * eps)
#print(w[i])
w1 = grad(w0)
diff = w1 - w
return np.sqrt(np.dot(diff, diff))
Solve.
In [ ]:
#cliques = [[0], [1], [2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13]]
cliques = [np.arange(Y_train.shape[1])]
model = NSR_Dual_Aug(X_train, coo_matrix(Y_train), cliques)
In [ ]:
#w0 = model.init_var()
#model.C = 1
#%lprun -f accumulate_risk \
#%lprun -f objective \
#check_grad(model.objective, model.gradient, w0)
In [ ]:
#check_grad_loop(model.objective, model.gradient, w0)
In [ ]:
model.fit(C=1)
ret = minimize(model.objective, x0=model.init_var(), method='L-BFGS-B', jac=model.gradient,
bounds=model.bnds, callback=None)
In [ ]:
model.trained
In [ ]:
Y_pred = model.predict(X_test)
In [ ]:
from tools import calc_RPrecision_HitRate
def calc_RP(Y_true, Y_pred):
assert Y_true.shape == Y_pred.shape
rps = []
for j in range(Y_true.shape[1]):
y_true = Y_true[:, j]
y_pred = Y_pred[:, j]
rp, _ = calc_RPrecision_HitRate(y_true, y_pred)
rps.append(rp)
return rps
In [ ]:
rps = calc_RP(Y_test, Y_pred)
In [ ]:
np.mean(rps)