In [ ]:
%matplotlib inline
%load_ext line_profiler
%load_ext memory_profiler
%load_ext autoreload
%autoreload 2
import os, sys, time
import pickle as pkl
import numpy as np
import pandas as pd
from scipy.sparse import coo_matrix
from scipy.optimize import minimize
from scipy.optimize import check_grad
from scipy.optimize.slsqp import _minimize_slsqp
import nlopt
import ipopt
from sklearn.base import BaseEstimator
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
sys.path.append('src')
from evaluate import avgPrecision, avgPrecisionK, printEvaluation
from datasets import create_dataset_yeast_train, create_dataset_yeast_test, yeast_nLabels
from datasets import create_dataset_emotions_train, create_dataset_emotions_test, emotions_nLabels
from datasets import create_dataset_scene_train, create_dataset_scene_test, scene_nLabels
from datasets import create_dataset_mediamill_subset_train, create_dataset_mediamill_subset_test, mm_nLabels
In [ ]:
datasets = ['yeast', 'emotions', 'scene', 'mediamill']
num_labels = [yeast_nLabels, emotions_nLabels, scene_nLabels, mm_nLabels]
create_dataset_train_funcs = [create_dataset_yeast_train,
create_dataset_emotions_train,
create_dataset_scene_train,
create_dataset_mediamill_subset_train]
create_dataset_test_funcs = [create_dataset_yeast_test,
create_dataset_emotions_test,
create_dataset_scene_test,
create_dataset_mediamill_subset_test]
In [ ]:
data_ix = 1
In [ ]:
dataset_name = datasets[data_ix]
nLabels = num_labels[data_ix]
create_dataset_train = create_dataset_train_funcs[data_ix]
create_dataset_test = create_dataset_test_funcs [data_ix]
print('Dataset:', dataset_name)
The sigmoid function.
In [ ]:
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-x))
Multi-label learning with top push loss.
In [ ]:
def obj_top_push(w, X, Y, C):
"""
Objective with L2 regularisation and top push loss
Input:
- w: current weight vector, flattened L x D
- X: feature matrix, N x D
- Y: label matrix, N x K
- C: regularisation constant, is consistent with scikit-learn C = 1 / (N * \lambda)
if we normalise the objective J by dividing C, \lambda = 1/C
"""
N, D = X.shape
K = Y.shape[1]
assert(w.shape[0] == N * K)
assert(C > 0)
T = w.reshape(N, K) # theta
J = 0.0 # cost
G = np.zeros_like(T) # gradient matrix
KPosAll = np.sum(Y, axis=1) # number of positive labels for each example, N by 1
KNegAll = K - KPosAll # number of negative labels for each example, N by 1
for k in range(K):
y = Y[:, k]
posVec = np.zeros(N, dtype=np.float)
negVec = np.zeros(N, dtype=np.float)
posVec[y == 1] = 1
negVec[y == 0] = 1
posVec = np.divide(posVec, KPosAll * N) # 1 / NK+, N by 1
negVec = np.divide(negVec, KNegAll * N) # 1 / NK-, N by 1
a = np.multiply(T[:, k], posVec) # alpha / NK+ if y_nk = 1 else 0
b = np.multiply(T[:, k], negVec) # beta / NK- if y_nk = 0 else 0
c = a + b # gamma
d = np.sum(X * c[:, None], axis=0)
t1 = T[y == 1, k]
p1 = posVec[y == 1]
s1 = -np.log(-t1)
s2 = np.log(1 + t1)
J += 0.5 * C * np.dot(d, d) + np.dot(a[y == 1], s1) + np.dot(p1, np.multiply(1+t1, s2))
p2 = negVec[y == 0]
t2 = np.dot(X, d) * C
G[y == 1, k] = np.multiply(p1, t2[y == 1] + s1 + s2)
G[y == 0, k] = np.multiply(p2, t2[y == 0])
return (J, G.ravel())
In [ ]:
def obj_top_push_loop(w, X, Y, C):
"""
Objective with L2 regularisation and top push loss
Input:
- w: current weight vector, flattened L x D
- X: feature matrix, N x D
- Y: label matrix, N x K
- C: regularisation constant, is consistent with scikit-learn C = 1 / (N * \lambda)
if we normalise the objective J by dividing C, \lambda = 1/C
"""
N, D = X.shape
K = Y.shape[1]
assert(w.shape[0] == N * K)
assert(C > 0)
T = w.reshape(N, K) # theta
J = 0.0 # cost
G = np.zeros_like(T) # gradient matrix
KPosAll = np.sum(Y, axis=1) # number of positive labels for each example, N by 1
KNegAll = K - KPosAll # number of negative labels for each example, N by 1
for k in range(K):
y = Y[:, k]
# cost
X1 = np.zeros_like(X)
for n in range(N):
t = T[n, k] / (N * KPosAll[n]) if y[n] == 1 else T[n, k] / (N * KNegAll[n])
X1[n, :] = X[n, :] * t
xs = np.sum(X1, axis=0)
J += np.dot(xs, xs) * 0.5 * C
#print(J)
for n in range(N):
if y[n] == 1:
print(T[n,k], n, k)
assert(T[n, k] < 0)
assert(T[n, k] + 1 > 0)
J += (-T[n, k] * np.log(-T[n, k]) + (1 + T[n, k]) * np.log(1 + T[n, k])) / (N * KPosAll[n])
# gradient
for n in range(N):
if y[n] == 1:
ga = C * np.dot(np.sum(X1, axis=0), X[n, :]) - np.log(-T[n, k]) + np.log(1 + T[n, k])
G[n, k] = G[n, k] + ga / (N * KPosAll[n])
else:
gb = C * np.dot(np.sum(X1, axis=0), X[n, :])
G[n, k] = G[n, k] + gb / (N * KNegAll[n])
return (J, G.ravel())
In [ ]:
def dual2primal(w, X, Y, C):
"""
Compute primal variable values given dual variable values
Input:
- w: current weight vector, flattened L x D
- X: feature matrix, N x D
- Y: label matrix, N x K
- C: regularisation constant, is consistent with scikit-learn C = 1 / (N * \lambda)
if we normalise the objective J by dividing C, \lambda = 1/C
"""
N, D = X.shape
K = Y.shape[1]
assert(w.shape[0] == N * K)
assert(C > 0)
W = np.zeros((K, D), dtype=np.float)
T = w.reshape(N, K) # theta
KPosAll = np.sum(Y, axis=1) # number of positive labels for each example, N by 1
KNegAll = K - KPosAll # number of negative labels for each example, N by 1
for k in range(K):
y = Y[:, k]
posVec = np.zeros(N, dtype=np.float)
negVec = np.zeros(N, dtype=np.float)
posVec[y == 1] = 1
negVec[y == 0] = 1
posVec = np.divide(posVec, KPosAll / N)
negVec = np.divide(negVec, KNegAll / N)
t = np.multiply(T[:, k], posVec) + np.multiply(T[:, k], negVec) # gamma
W[k, :] = -C * np.sum(X * t[:, None], axis=0)
return W
In [ ]:
def dual2primal_loop(w, X, Y, C):
"""
Compute primal variable values given dual variable values
Input:
- w: current weight vector, flattened L x D
- X: feature matrix, N x D
- Y: label matrix, N x K
- C: regularisation constant, is consistent with scikit-learn C = 1 / (N * \lambda)
if we normalise the objective J by dividing C, \lambda = 1/C
"""
N, D = X.shape
K = Y.shape[1]
assert(w.shape[0] == N * K)
assert(C > 0)
W = np.zeros((K, D), dtype=np.float)
T = w.reshape(N, K) # theta
KPosAll = np.sum(Y, axis=1) # number of positive labels for each example, N by 1
KNegAll = K - KPosAll # number of negative labels for each example, N by 1
for k in range(K):
y = Y[:, k]
X1 = X.copy()
for n in range(N):
t = T[n, k] / (N * KPosAll[n]) if y[n] == 1 else T[n, k] / (N * KNegAll[n])
X1[n, :] = X1[n, :] * t
W[k, :] = -C * np.sum(X1, axis=0)
return W
In [ ]:
#aa = np.array([0,1])
#-1 * (2 * aa - 1)
In [ ]:
def init_var(X, Y, seed=None):
"""
Initialise the dual variables
Input:
- X: feature matrix, N x D
- Y: label matrix, N x K
Output:
- W: matrix with random numbers
W_{n,k} \in (-1,0) if Y_{n,k} = 1,
W_{n,k} \in (0, 1) if Y_{n,k} = 0.
"""
N, D = X.shape
K = Y.shape[1]
I = -1 * (2 * Y - 1)
if seed is not None: np.random.seed(seed)
w0 = np.multiply(np.random.rand(N * K).reshape(N, K), I).ravel()
return w0
In [ ]:
def init_var_feasible(X, Y):
"""
Initialise the dual variables
Input:
- X: feature matrix, N x D
- Y: label matrix, N x K
Output:
- W: matrix with real numbers that satisfy all constraints
W_{n,k} \in (-1,0) if Y_{n,k} = 1,
W_{n,k} \in (0, 1) if Y_{n,k} = 0.
"""
N, D = X.shape
K = Y.shape[1]
#I = -1 * (2 * Y - 1)
#if seed is not None: np.random.seed(seed)
#w0 = np.multiply(np.random.rand(N * K).reshape(N, K), I).ravel()
#return w0
# NOTE:
# simply let
# alpha_{n,k} = -v, 0 < v < 1
# beta_{n,k} = v
# will satisfy the equality constraints
I = -1 * (2 * Y - 1)
return np.ravel(0.1 * I)
In [ ]:
def obj_constraint(w, n, X, Y):
"""
Compute the constraints for top push loss
Input:
- w: weights vector, N*K x 1
- n: the n-th constraint
- X: feature matrix, N x D
- Y: label matrix, N x K
Output:
- 0 if all equality constraits satisfy (equal to 0)
- 1 otherwise
"""
N, D = X.shape
K = Y.shape[1]
T = w.reshape(N, K) # theta
assert 0 <= n < N
KPos = np.sum(Y[n, :]) # number of positive labels for the n-th example
KNeg = K - KPos # number of negative labels for the n-th example
yn = Y[n, :]
return (np.sum(T[n, yn == 1]) / KPos + np.sum(T[n, yn == 0]) / KNeg) / N
In [ ]:
def grad_constraint(w, n, X, Y):
"""
Compute the gradient of constraints for top push loss
Input:
- w: weights vector, N*K x 1
- n: the n-th constraint
- X: feature matrix, N x D
- Y: label matrix, N x K
Output:
- an array of gradients
"""
N, D = X.shape
K = Y.shape[1]
T = w.reshape(N, K) # theta
assert 0 <= n < N
KPos = np.sum(Y[n, :]) # number of positive labels for the n-th example
KNeg = K - KPos # number of negative labels for the n-th example
yn = Y[n, :]
pos = np.zeros(K, dtype=np.float)
neg = np.zeros(K, dtype=np.float)
pos[yn == 1] = 1
neg[yn == 0] = 1
pos = np.divide(pos, KPos / N)
neg = np.divide(neg, KNeg / N)
return pos + neg
In [ ]:
class toppush(object):
def __init__(self, X, Y, C):
"""
Initialisation and computing shared data.
Input:
- X: feature matrix, N x D
- Y: label matrix, N x K
- C: regularisation constant
"""
self.N, self.D = X.shape
self.K = Y.shape[1]
assert(C > 0)
self.C = C
self.X = X
self.Y = Y
#print(X.shape, Y.shape)
KPosAll = np.sum(Y, axis=1) # number of positive labels for each example, N by 1
#print(KPosAll.shape)
KNegAll = self.K - KPosAll # number of negative labels for each example, N by 1
self.coefMat = np.ones((self.N, self.K), dtype=np.float) # v_{n,k} = 1/NK+ is y_{n,k} = 1 else 1/NK-
for k in range(self.K):
y = Y[:, k]
posVec = np.divide(y, KPosAll * self.N) # 1 / NK+, N by 1
negVec = np.divide(1-y, KNegAll * self.N) # 1 / NK-, N by 1
self.coefMat[:, k] = posVec + negVec
def objective(self, x):
#
# The callback for calculating the objective
#
w = x
assert(w.shape[0] == self.N * self.K)
T = w.reshape(self.N, self.K) # theta
J = 0.0
Gama = np.multiply(T, self.coefMat) # gamma
for k in range(self.K):
y = self.Y[:, k]
d = np.sum(self.X * Gama[:, k][:, None], axis=0)
a = T[y == 1, k] # alpha
J += 0.5 * self.C * np.dot(d, d)
J += np.dot(self.coefMat[y == 1, k], np.multiply(-a, np.log(-a)) + np.multiply(1+a, np.log(1+a)))
return J
def gradient(self, w):
#
# The callback for calculating the gradient
#
T = w.reshape(self.N, self.K) # theta
Gama = np.multiply(T, self.coefMat) # gamma
G = np.zeros_like(T) # gradient matrix
for k in range(self.K):
y = self.Y[:, k]
a = T[y == 1, k] # alpha
d = np.sum(self.X * Gama[:, k][:, None], axis=0)
t = np.dot(self.X, d) * self.C
G[y == 1, k] = np.multiply(self.coefMat[y == 1, k], t[y == 1] - np.log(-a) + np.log(1+a))
G[y == 0, k] = np.multiply(self.coefMat[y == 0, k], t[y == 0])
return G.ravel()
def constraints(self, w):
#
# The callback for calculating the constraints
#
T = w.reshape(self.N, self.K) # theta
Gama = np.multiply(T, self.coefMat) # gamma
return np.sum(Gama, axis=1)
def jacobian(self, w):
#
# The callback for calculating the Jacobian of constraints
#
jac = np.tile(np.zeros((self.N, self.K)), (self.N,1))
ix = [n * (self.N + 1) for n in range(self.N)] # [0, N+1, 2N+2, ..., N^2-1]
jac[ix, :] = self.coefMat
return jac.ravel()
def hessian(self, w, lagrange_multipliers, obj_factor):
#
# The callback for calculating the Hessian of both objective and constraints
#
M1 = np.repeat(self.X, repeats=self.K, axis=0) # repeat each row K times, NK by K
M2 = M1 * self.coefMat.flatten()[:, None] # scale each row in T1
M3 = np.dot(M2, M2.T) # NK by NK
T = w.reshape(self.N, self.K) # theta
T1 = np.divide(1, np.multiply(T, 1 + T))
A1 = np.multiply(np.multiply(T1, self.Y), self.coefMat)
diagix = np.arange(M3.shape[0])
M3[diagix, diagix] = M3[diagix, diagix] - A1.ravel() # change the diagonal values
H = obj_factor * np.tril(M3) # NK by NK
# Linear constraints: Hession of all constraints are zeros
return H.ravel()
def intermediate(
self,
alg_mod,
iter_count,
obj_value,
inf_pr,
inf_du,
mu,
d_norm,
regularization_size,
alpha_du,
alpha_pr,
ls_trials
):
#
# Example for the use of the intermediate callback.
#
print("Objective value at iteration #%d is: %g" % (iter_count, obj_value))
Check gradient
In [ ]:
X_train, Y_train = create_dataset_train()
X_test, Y_test = create_dataset_test()
In [ ]:
X_train.shape
In [ ]:
%%script false
# testing func
def obj_func(w):
# f = xy + x^2
x = w[0]
y = w[1]
return (x*y + x*x, np.asarray([y + 2*x, x]))
eps = 1.49e-08
w0 = np.random.rand(2)
w = np.zeros_like(w0)
for i in range(len(w0)):
wi1 = w0.copy()
wi2 = w0.copy()
wi1[i] = wi1[i] - eps
wi2[i] = wi2[i] + eps
J1, _ = obj_func(wi1)
J2, _ = obj_func(wi2)
w[i] = (J2 - J1) / (2 * eps)
J, w1 = obj_func(w0)
wdiff = w1 - w
print(np.dot(wdiff, wdiff))
In [ ]:
%%script false
C = 1
eps = 1.49e-08
w0 = init_var(X_train[:100, :], Y_train[:100, :])
w = np.zeros_like(w0)
for i in range(len(w0)):
wi1 = w0.copy()
wi2 = w0.copy()
wi1[i] = wi1[i] - eps
wi2[i] = wi2[i] + eps
J1, _ = obj_top_push_loop(wi1, X_train[:100, :], Y_train[:100, :], C)
J2, _ = obj_top_push_loop(wi2, X_train[:100, :], Y_train[:100, :], C)
w[i] = (J2 - J1) / (2 * eps)
#print(w[i])
J, w1 = obj_top_push_loop(w0, X_train[:100, :], Y_train[:100, :], C)
diff = w1 - w
np.dot(diff, diff)
In [ ]:
%%script false
C = 1
w0 = init_var(X_train[:100, :], Y_train[:100, :])
check_grad(lambda w: obj_top_push_loop(w, X_train[:100, :], Y_train[:100, :], C)[0],
lambda w: obj_top_push_loop(w, X_train[:100, :], Y_train[:100, :], C)[1], w0)
In [ ]:
%%script false
N = X_train.shape[0]
K = Y_train.shape[1]
print('%15s %15s %15s %15s' % ('J_Diff', 'J_loop', 'J_vec', 'G_Diff'))
for e in range(-6, 10):
C = 10**(e)
w0 = init_var(X_train, Y_train)
J, G = obj_top_push_loop(w0, X_train, Y_train, C)
J1, G1 = obj_top_push(w0, X_train, Y_train, C)
Gdiff = G1 - G
#print('%-15g %-15g %-15g' % (J1 - J, J, J1))
print('%15g %15g %15g %15g' % (J1 - J, J, J1, np.dot(Gdiff, Gdiff)))
In [ ]:
%%script false
C = 1
w0 = init_var(X_train, Y_train)
check_grad(lambda w: obj_top_push(w, X_train, Y_train, C)[0],
lambda w: obj_top_push(w, X_train, Y_train, C)[1], w0)
#%lprun -f obj_top_push check_grad(lambda w: obj_top_push(w, X_train, Y_train, C)[0], \
# lambda w: obj_top_push(w, X_train, Y_train, C)[1], w0)
In [ ]:
#w0 = init_var(X_train[:100, :], Y_train[:100, :])
#check_grad(lambda w: obj_constraint(w, 0, X_train[:100, :], Y_train[:100, :]),
# lambda w: grad_constraint(w, 0, X_train[:100, :], Y_train[:100, :]), w0)
In [ ]:
X_train.shape
In [ ]:
Y_train.shape
In [ ]:
def minimise_cost_ipopt(X, Y, C):
"""
Minimise the cost using Interior Point method provided by ipopt
see https://projects.coin-or.org/Ipopt for details.
Input:
- X: feature matrix, N x D
- Y: label matrix, N x K
- C: regularisation constant
Output:
- optx: the optimal solution (N*K x 1) if succeed, None otherwise
- success: True if succeed else False
"""
N, D = X.shape
K = Y.shape[1]
M = N * K # number of parameters
w0 = init_var_feasible(X, Y)
eps = 1e-12
lb_var = np.array([-1 + eps if wi < 0 else 0 for wi in w0], dtype=np.float64) # lower bounds
ub_var = np.array([ 0 - eps if wi < 0 else 2.0e19 for wi in w0], dtype=np.float64) # upper bounds
lb_cons = np.array([0 for n in range(N)])
ub_cons = np.array([0 for n in range(N)])
prob = ipopt.problem(
n = len(w0),
m = N,
problem_obj = toppush(X, Y, C),
lb = lb_var,
ub = ub_var,
cl = lb_cons,
cu = ub_cons
)
#prob.addOption('mu_strategy', 'adaptive')
#prob.addOption('tol', 1e-7)
#prob.addOption('max_iter', 1e4)
xopt, info = prob.solve(w0)
print(info.items())
return (xopt, True)
In [ ]:
nlopt.algorithm_name(nlopt.LD_AUGLAG)
In [ ]:
def minimise_cost_nlopt(X, Y, C):
"""
Minimise the cost using Augmented Lagrangian algorithm and L-BFGS provided by NLopt,
see https://nlopt.readthedocs.io/en/latest/NLopt_Algorithms/#augmented-lagrangian-algorithm for details.
Input:
- X: feature matrix, N x D
- Y: label matrix, N x K
- C: regularisation constant
Output:
- optx: the optimal solution (N*K x 1) if succeed, None otherwise
- success: True if succeed else False
"""
N, D = X.shape
K = Y.shape[1]
M = N * K # number of parameters
obj_func = obj_top_push_loop
#obj_func = obj_top_push
# objective
def _func(w, grad):
obj, G = obj_func(w, X, Y, C)
if grad.size > 0:
grad[:] = G # inplace write, eqv to np.copyto(grad, G)
return obj
# constraints
def _cons(w, grad, n):
if grad.size > 0:
T = np.zeros((N, K))
T[n, :] = grad_constraint(w, n, X, Y)
grad[:] = T.ravel()
return obj_constraint(w, n, X, Y)
opt = nlopt.opt(nlopt.LD_AUGLAG, M)
opt.set_min_objective(_func) # to minimise objective function
local_opt = nlopt.opt(nlopt.LD_LBFGS, M)
opt.set_local_optimizer(local_opt)
eps = 1e-12
w0 = init_var_feasible(X, Y)
lb = np.array([-1 + eps if wi < 0 else 0 for wi in w0], dtype=np.float64) # lower bounds
ub = np.array([ 0 - eps if wi < 0 else np.inf for wi in w0], dtype=np.float64) # upper bounds
opt.set_lower_bounds(lb)
opt.set_upper_bounds(ub)
for n in range(N):
opt.add_equality_constraint(lambda x, grad: _cons(x, grad, n))
opt.set_ftol_rel(2.220446049250313e-09) # same as scipy.optimize.minimize for L-BFGS-B
opt.set_maxtime(2 * 60 * 60) # time limit: 2 hours
try:
xopt = opt.optimize(w0) # optimal solution
opt_val = opt.last_optimum_value() # optimal value
res_code = opt.last_optimize_result() # result code
if res_code == nlopt.SUCCESS:
return (xopt, True)
else:
sys.stderr.write('Optimisation failed: (result code %d)' % res)
return (None, False)
except:
sys.stderr.write('Optimisation failed')
raise
In [ ]:
def minimise_cost_slsqp(X, Y, C):
"""
Minimise the cost using SLSQP (Sequential Least SQuares Programming) algorithm
Input:
- X: feature matrix, N x D
- Y: label matrix, N x K
- C: regularisation constant
Output:
- optx: the optimal solution (N*K x 1) if succeed, None otherwise
- success: True if succeed else False
NOTE: SLSQP is less practical for optimizing more than a few thousand parameters due to space and
time complexity, see https://nlopt.readthedocs.io/en/latest/NLopt_Algorithms/#slsqp for details.
"""
opt_method = 'SLSQP'
options = {'disp': True, 'maxiter': 100}
if options['disp']:
print('\nC: %g' % C)
N = X.shape[0]
# NOTE: initial point should be feasible, otherwise optimisation will fail
w0 = init_var_feasible(X, Y)
#bnds = [(-1, 0) if wi < 0 else (0, None) for wi in w0] # the min/left bound is inclusion
eps = 1e-12
bnds = [(-1+eps, 0-eps) if wi < 0 else (0, None) for wi in w0]
cons = [{'type': 'eq', 'fun': obj_constraint, 'args': (n, X, Y)} for n in range(N)]
#obj_func = obj_top_push_loop
obj_func = obj_top_push
opt = minimize(obj_func, w0, args=(X, Y, C), method=opt_method, jac=True, \
bounds=bnds, constraints=cons, options=options)
#print(opt.success)
#print(type(opt.success))
#print(opt.success == np.True_)
# NOTE: opt.success is of type 'numpy.bool_' which is different from python bool
#if opt.success is True:
if opt.success is np.True_:
return (opt.x, True)
else:
sys.stderr.write('Optimisation failed: \n')
#print(opt.items())
return (None, False)
In [ ]:
N, K = X_train.shape
# code in scipy.optimize.slsqp._minimize_slsqp()
meq = N
mieq = 0
m = meq + mieq
la = np.array([1, m]).max()
n = N*K
n1 = n + 1
mineq = m - meq + n1 + n1
len_w = (3*n1+m)*(n1+1)+(n1-meq+1)*(mineq+2) + 2*mineq+(n1+mineq)*(n1-meq) + \
2*meq + n1 + ((n+1)*n)//2 + 2*m + 3*n + 3*n1 + 1
print(len_w)
print(np.log2(len_w))
#e = 32
#np.zeros(2**(e))
#np.zeros(len_w)
In [ ]:
def minimise_cost(X, Y, C):
#opt_func = minimise_cost_slsqp
#opt_func = minimise_cost_nlopt
opt_func = minimise_cost_ipopt
return opt_func(X, Y, C)
class MLC_toppush(BaseEstimator):
"""All methods are necessary for a scikit-learn estimator"""
def __init__(self, p=1, C=1):
"""Initialisation"""
assert C > 0
self.C = C
self.trained = False
def fit(self, X_train, Y_train):
"""Model fitting by optimising the objective"""
optx, success = minimise_cost(X_train, Y_train, self.C)
if success is True:
self.W = dual2primal(optx, X_train, Y_train, self.C)
self.trained = True
else:
self.trained = False
def decision_function(self, X_test):
"""Make predictions (score is real number)"""
assert self.trained is True, "Can't make prediction before training"
D = X_test.shape[1]
return np.dot(X_test, self.W.T)
def predict(self, X_test):
"""Make predictions (score is boolean)"""
preds = self.decision_function(X_test)
return (preds > 0)
def score(self, X, Y):
"""Compute scoring metric"""
allPreds = self.decision_function(X)
return avgPrecisionK(Y, allPreds)
# inherit from BaseEstimator instead of re-implement
#
#def get_params(self, deep = True):
#def set_params(self, **params):
In [ ]:
#%mprun
model = MLC_toppush()
model.fit(X_train[:50], Y_train[:50])
#%memit model.fit(X_train[:30], Y_train[:30])
#%mprun -f minimize model.fit(X_train[:100], Y_train[:100])
#%mprun -f _minimize_slsqp model.fit(X_train[:10], Y_train[:10])
In [ ]:
parameters = [{'C': [10**(e) for e in range(-6,-1)]}]
clf = GridSearchCV(MLC_toppush(), parameters, cv=5, n_jobs=1)
clf.fit(X_train[:100], Y_train[:100])
print("\nBest parameters set found on development set:")
print(clf.best_params_)
In [ ]:
for mean, std, params in zip(clf.cv_results_['mean_test_score'], clf.cv_results_['std_test_score'], \
clf.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
In [ ]:
clf = model
In [ ]:
preds_train = clf.decision_function(X_train)
preds_test = clf.decision_function(X_test)
In [ ]:
print('Training set:')
printEvaluation(Y_train, preds_train)
print()
print('Test set:')
printEvaluation(Y_test, preds_test)
In [ ]:
precisions_train = [avgPrecision(Y_train, preds_train, k) for k in range(1, nLabels+1)]
precisions_test = [avgPrecision(Y_test, preds_test, k) for k in range(1, nLabels+1)]
In [ ]:
precisionK_train = avgPrecisionK(Y_train, preds_train)
precisionK_test = avgPrecisionK(Y_test, preds_test)
In [ ]:
plt.figure(figsize=[10,5])
plt.plot(precisions_train, ls='--', c='r', label='Train')
plt.plot(precisions_test, ls='-', c='g', label='Test')
plt.plot([precisionK_train for k in range(nLabels)], ls='-', c='r', label='Train, Precision@K')
plt.plot([precisionK_test for k in range(nLabels)], ls='-', c='g', label='Test, Precision@K')
plt.xticks(np.arange(nLabels), np.arange(1,nLabels+1))
plt.xlabel('k')
plt.ylabel('Precision@k')
plt.legend(loc='best')
plt.title('MLC w. Top-push Loss on ' + dataset_name + ' dataset')
plt.savefig(dataset_name + '_tp.svg')