Multi-label classification -- ranking loss



In [ ]:

    
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os, sys, time
import pickle as pkl
import numpy as np
import pandas as pd
import itertools

from scipy.optimize import minimize
from scipy.optimize import check_grad

from sklearn.base import BaseEstimator
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

import matplotlib.pyplot as plt
import seaborn as sns



In [ ]:

    
sys.path.append('src')
from evaluate import avgPrecision, avgPrecisionK, printEvaluation
from datasets import create_dataset_yeast_train, create_dataset_yeast_test, yeast_nLabels
from datasets import create_dataset_scene_train, create_dataset_scene_test, scene_nLabels
from datasets import create_dataset_mediamill_subset_train, create_dataset_mediamill_subset_test, mm_nLabels



In [ ]:

    
datasets = ['yeast', 'scene', 'mediamill']
num_labels = [yeast_nLabels, scene_nLabels, mm_nLabels]
create_dataset_train_funcs = [create_dataset_yeast_train, 
                              create_dataset_scene_train, 
                              create_dataset_mediamill_subset_train]
create_dataset_test_funcs  = [create_dataset_yeast_test,
                              create_dataset_scene_test,
                              create_dataset_mediamill_subset_test]



In [ ]:

    
data_ix = 1



In [ ]:

    
dataset_name = datasets[data_ix]
nLabels = num_labels[data_ix]
create_dataset_train = create_dataset_train_funcs[data_ix]
create_dataset_test  = create_dataset_test_funcs [data_ix]

The sigmoid function.



In [ ]:

    
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

Ranking loss

Multi-label learning with ranking loss.



In [ ]:

    
def obj_ranking(w, X, Y, C):
    """
        Objective with L2 regularisation and ranking loss
        
        Input:
            - w: current weight vector, flattened L x D
            - X: feature matrix, N x D
            - Y: label matrix,   N x L
            - C: regularisation constant, is consistent with scikit-learn C = 1 / (N * \lambda)
    """
    N, D = X.shape
    L = Y.shape[1]
    assert(w.shape[0] == L * D)
    
    W = w.reshape(L, D)  # reshape weight matrix    
    
    J = 0.0  # cost
    G = np.zeros_like(W)  # gradient matrix
    
    for n in range(N):
        Jn = 0.0
        Gn = np.zeros_like(W)
        x = X[n, :]
        y = Y[n, :]
        nPos = np.sum(y)   # num of positive examples
        nNeg = L - nPos    # num of negative examples
        denom = nPos * nNeg
        
        ixPos = np.nonzero(y)[0].tolist()               # indices positive examples
        ixNeg = list(set(np.arange(L)) - set(ixPos))    # indices negative examples
        
        ixmat = np.array(list(itertools.product(ixPos, ixNeg)))  # shape: ixPos*ixNeg by 2
        dW = W[ixmat[:, 0], :] - W[ixmat[:, 1], :]
        sVec = np.dot(dW, x)
        Jn = np.sum(np.log1p(np.exp(-sVec)))
        
        coeffVec = np.divide(1, 1 + np.exp(sVec))
        coeffPos = pd.DataFrame(coeffVec)
        coeffPos['gid'] = ixmat[:, 0]
        coeffPos = coeffPos.groupby('gid', sort=False).sum()
        coeffNeg = pd.DataFrame(coeffVec)
        coeffNeg['gid'] = ixmat[:, 1]
        coeffNeg = coeffNeg.groupby('gid', sort=False).sum()
        
        #print(coeffPos)
        #print(coeffNeg)
        
        coeffs = np.ones(L)
        coeffs[ixPos] = -coeffPos.loc[ixPos].values.squeeze()
        coeffs[ixNeg] = coeffNeg.loc[ixNeg].values.squeeze()
        
        #print(coeffs)
        Gn = np.tile(x, (L, 1)) * coeffs[:, None]
                        
        J += Jn / denom
        G = G + Gn / denom
        
    #J = 0.5 * C * np.dot(w, w) + J / N
    #G = C * W + G / N
    
    # be consistent with scikit-learn C = 1 / (N * \lambda)
    #J = 0.5 * np.dot(w, w) + C * J
    #G = W + C * G
    
    # normalise the objective J by dividing it C 
    J = np.dot(w, w) / (2.0 * C) + J / N
    G = W / C + G / N
    
    return (J, G.ravel())

Check gradient



In [ ]:

    
X_train, Y_train = create_dataset_train()
X_test,  Y_test  = create_dataset_test()



In [ ]:

    
#%%script false
C = 1
w0 = np.random.rand(X_train.shape[1] * nLabels)
check_grad(lambda w: obj_ranking(w, X_train[:10], Y_train[:10], C)[0], \
           lambda w: obj_ranking(w, X_train[:10], Y_train[:10], C)[1], w0)



In [ ]:

    
class MLC_ranking(BaseEstimator):
    """All methods are necessary for a scikit-learn estimator"""
    
    def __init__(self, C=1):
        """Initialisation"""
        
        assert C > 0
        self.C = C
        self.trained = False
        
    def fit(self, X_train, Y_train):
        """Model fitting by optimising the objective"""
        
        opt_method = 'BFGS' #'Newton-CG'
        options = {'disp': True}
        if options['disp']: 
            print('\nC: %g' % self.C)
            
        D = X_train.shape[1]
        L = Y_train.shape[1]
        w0 = np.random.rand(L * D)  # initial guess
        opt = minimize(obj_ranking, w0, args=(X_train, Y_train, self.C), \
                       method=opt_method, jac=True, options=options)
        if opt.success is True:
            self.w = opt.x
            self.trained = True
        else:
            sys.stderr.write('Optimisation failed')
            self.trained = False
    
            
    def decision_function(self, X_test):
        """Make predictions (score is real number)"""
        
        assert self.trained is True, "Can't make prediction before training"
        D = X_test.shape[1]
        return np.dot(X_test, self.w.reshape(-1, D).T)
        
    
    def predict(self, X_test):
        """Make predictions (score is boolean)"""
        
        preds = self.decision_function(X_test)
        return (preds > 0)
    
    
    def score(self, X, Y):
        """Compute scoring metric"""
        
        allPreds = self.decision_function(X)
        return avgPrecisionK(Y, allPreds)
    
    # inherit from BaseEstimator instead of re-implement
    #
    #def get_params(self, deep = True):
    #def set_params(self, **params):



In [ ]:

    
#parameters = [{'C': [10**(e) for e in range(-6,7)]}]
parameters = [{'C': [10**(e) for e in range(-6,1)]}]

clf = GridSearchCV(MLC_ranking(), parameters, cv=5)
clf.fit(X_train, Y_train)

print("\nBest parameters set found on development set:")
print(clf.best_params_)



In [ ]:

    
for mean, std, params in zip(clf.cv_results_['mean_test_score'], clf.cv_results_['std_test_score'], \
                             clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))



In [ ]:

    
preds_train = clf.decision_function(X_train)
preds_test  = clf.decision_function(X_test)



In [ ]:

    
print('Training set:')
printEvaluation(Y_train, preds_train)
print()
print('Test set:')
printEvaluation(Y_test, preds_test)

Result analysis



In [ ]:

    
precisions_train = [avgPrecision(Y_train, preds_train, k) for k in range(1, nLabels+1)]
precisions_test  = [avgPrecision(Y_test,  preds_test,  k) for k in range(1, nLabels+1)]



In [ ]:

    
precisionK_train = avgPrecisionK(Y_train, preds_train)
precisionK_test  = avgPrecisionK(Y_test,  preds_test)



In [ ]:

    
plt.figure(figsize=[10,5])
plt.plot(precisions_train, ls='--', c='r', label='Train')
plt.plot(precisions_test,  ls='-',  c='g', label='Test')
plt.plot([precisionK_train for k in range(nLabels)], ls='-', c='r', label='Train, Precision@K')
plt.plot([precisionK_test  for k in range(nLabels)], ls='-', c='g', label='Test, Precision@K')
plt.xticks(np.arange(nLabels), np.arange(1,nLabels+1))
plt.xlabel('k')
plt.ylabel('Precision@k')
plt.legend(loc='best')
plt.title('MLC w. Rank Loss on ' + dataset_name + ' dataset')
plt.savefig(dataset_name + '_rank.svg')