In [ ]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import os, sys, time
import pickle as pkl
import numpy as np
import pandas as pd
import itertools
from scipy.optimize import minimize
from scipy.optimize import check_grad
from sklearn.base import BaseEstimator
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
sys.path.append('src')
from evaluate import avgPrecision, avgPrecisionK, printEvaluation
from datasets import create_dataset_yeast_train, create_dataset_yeast_test, yeast_nLabels
from datasets import create_dataset_scene_train, create_dataset_scene_test, scene_nLabels
from datasets import create_dataset_mediamill_subset_train, create_dataset_mediamill_subset_test, mm_nLabels
In [ ]:
datasets = ['yeast', 'scene', 'mediamill']
num_labels = [yeast_nLabels, scene_nLabels, mm_nLabels]
create_dataset_train_funcs = [create_dataset_yeast_train,
create_dataset_scene_train,
create_dataset_mediamill_subset_train]
create_dataset_test_funcs = [create_dataset_yeast_test,
create_dataset_scene_test,
create_dataset_mediamill_subset_test]
In [ ]:
data_ix = 1
In [ ]:
dataset_name = datasets[data_ix]
nLabels = num_labels[data_ix]
create_dataset_train = create_dataset_train_funcs[data_ix]
create_dataset_test = create_dataset_test_funcs [data_ix]
The sigmoid function.
In [ ]:
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-x))
Multi-label learning with ranking loss.
In [ ]:
def obj_ranking(w, X, Y, C):
"""
Objective with L2 regularisation and ranking loss
Input:
- w: current weight vector, flattened L x D
- X: feature matrix, N x D
- Y: label matrix, N x L
- C: regularisation constant, is consistent with scikit-learn C = 1 / (N * \lambda)
"""
N, D = X.shape
L = Y.shape[1]
assert(w.shape[0] == L * D)
W = w.reshape(L, D) # reshape weight matrix
J = 0.0 # cost
G = np.zeros_like(W) # gradient matrix
for n in range(N):
Jn = 0.0
Gn = np.zeros_like(W)
x = X[n, :]
y = Y[n, :]
nPos = np.sum(y) # num of positive examples
nNeg = L - nPos # num of negative examples
denom = nPos * nNeg
ixPos = np.nonzero(y)[0].tolist() # indices positive examples
ixNeg = list(set(np.arange(L)) - set(ixPos)) # indices negative examples
ixmat = np.array(list(itertools.product(ixPos, ixNeg))) # shape: ixPos*ixNeg by 2
dW = W[ixmat[:, 0], :] - W[ixmat[:, 1], :]
sVec = np.dot(dW, x)
Jn = np.sum(np.log1p(np.exp(-sVec)))
coeffVec = np.divide(1, 1 + np.exp(sVec))
coeffPos = pd.DataFrame(coeffVec)
coeffPos['gid'] = ixmat[:, 0]
coeffPos = coeffPos.groupby('gid', sort=False).sum()
coeffNeg = pd.DataFrame(coeffVec)
coeffNeg['gid'] = ixmat[:, 1]
coeffNeg = coeffNeg.groupby('gid', sort=False).sum()
#print(coeffPos)
#print(coeffNeg)
coeffs = np.ones(L)
coeffs[ixPos] = -coeffPos.loc[ixPos].values.squeeze()
coeffs[ixNeg] = coeffNeg.loc[ixNeg].values.squeeze()
#print(coeffs)
Gn = np.tile(x, (L, 1)) * coeffs[:, None]
J += Jn / denom
G = G + Gn / denom
#J = 0.5 * C * np.dot(w, w) + J / N
#G = C * W + G / N
# be consistent with scikit-learn C = 1 / (N * \lambda)
#J = 0.5 * np.dot(w, w) + C * J
#G = W + C * G
# normalise the objective J by dividing it C
J = np.dot(w, w) / (2.0 * C) + J / N
G = W / C + G / N
return (J, G.ravel())
Check gradient
In [ ]:
X_train, Y_train = create_dataset_train()
X_test, Y_test = create_dataset_test()
In [ ]:
#%%script false
C = 1
w0 = np.random.rand(X_train.shape[1] * nLabels)
check_grad(lambda w: obj_ranking(w, X_train[:10], Y_train[:10], C)[0], \
lambda w: obj_ranking(w, X_train[:10], Y_train[:10], C)[1], w0)
In [ ]:
class MLC_ranking(BaseEstimator):
"""All methods are necessary for a scikit-learn estimator"""
def __init__(self, C=1):
"""Initialisation"""
assert C > 0
self.C = C
self.trained = False
def fit(self, X_train, Y_train):
"""Model fitting by optimising the objective"""
opt_method = 'BFGS' #'Newton-CG'
options = {'disp': True}
if options['disp']:
print('\nC: %g' % self.C)
D = X_train.shape[1]
L = Y_train.shape[1]
w0 = np.random.rand(L * D) # initial guess
opt = minimize(obj_ranking, w0, args=(X_train, Y_train, self.C), \
method=opt_method, jac=True, options=options)
if opt.success is True:
self.w = opt.x
self.trained = True
else:
sys.stderr.write('Optimisation failed')
self.trained = False
def decision_function(self, X_test):
"""Make predictions (score is real number)"""
assert self.trained is True, "Can't make prediction before training"
D = X_test.shape[1]
return np.dot(X_test, self.w.reshape(-1, D).T)
def predict(self, X_test):
"""Make predictions (score is boolean)"""
preds = self.decision_function(X_test)
return (preds > 0)
def score(self, X, Y):
"""Compute scoring metric"""
allPreds = self.decision_function(X)
return avgPrecisionK(Y, allPreds)
# inherit from BaseEstimator instead of re-implement
#
#def get_params(self, deep = True):
#def set_params(self, **params):
In [ ]:
#parameters = [{'C': [10**(e) for e in range(-6,7)]}]
parameters = [{'C': [10**(e) for e in range(-6,1)]}]
clf = GridSearchCV(MLC_ranking(), parameters, cv=5)
clf.fit(X_train, Y_train)
print("\nBest parameters set found on development set:")
print(clf.best_params_)
In [ ]:
for mean, std, params in zip(clf.cv_results_['mean_test_score'], clf.cv_results_['std_test_score'], \
clf.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
In [ ]:
preds_train = clf.decision_function(X_train)
preds_test = clf.decision_function(X_test)
In [ ]:
print('Training set:')
printEvaluation(Y_train, preds_train)
print()
print('Test set:')
printEvaluation(Y_test, preds_test)
In [ ]:
precisions_train = [avgPrecision(Y_train, preds_train, k) for k in range(1, nLabels+1)]
precisions_test = [avgPrecision(Y_test, preds_test, k) for k in range(1, nLabels+1)]
In [ ]:
precisionK_train = avgPrecisionK(Y_train, preds_train)
precisionK_test = avgPrecisionK(Y_test, preds_test)
In [ ]:
plt.figure(figsize=[10,5])
plt.plot(precisions_train, ls='--', c='r', label='Train')
plt.plot(precisions_test, ls='-', c='g', label='Test')
plt.plot([precisionK_train for k in range(nLabels)], ls='-', c='r', label='Train, Precision@K')
plt.plot([precisionK_test for k in range(nLabels)], ls='-', c='g', label='Test, Precision@K')
plt.xticks(np.arange(nLabels), np.arange(1,nLabels+1))
plt.xlabel('k')
plt.ylabel('Precision@k')
plt.legend(loc='best')
plt.title('MLC w. Rank Loss on ' + dataset_name + ' dataset')
plt.savefig(dataset_name + '_rank.svg')