Multi-label classification -- p-classification loss


In [ ]:
%matplotlib inline
%load_ext line_profiler
%load_ext autoreload
%autoreload 2

import os, sys, time
import pickle as pkl
import numpy as np
import pandas as pd

from scipy.optimize import minimize
from scipy.optimize import check_grad
from scipy.special import expit as sigmoid

from sklearn.base import BaseEstimator
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, make_scorer, label_ranking_loss

import matplotlib.pyplot as plt
import seaborn as sns
from joblib import Parallel, delayed

In [ ]:
#sys.path.append('src')
#from evaluate import avgPrecisionK, evaluatePrecision, evaluateF1, evaluateRankingLoss, f1_score_nowarn, calcLoss
#from datasets import create_dataset, dataset_names, nLabels_dict
from models import obj_pclassification
from tools import create_dataset, dataset_names, nLabels_dict

In [ ]:
dataset_names

In [ ]:
data_ix = 0

In [ ]:
dataset_name = dataset_names[data_ix]
nLabels = nLabels_dict[dataset_name]
print(dataset_name, nLabels)

In [ ]:
data_dir = 'data'
SEED = 918273645
fmodel_base = os.path.join(data_dir, 'pc-' + dataset_name + '-base.pkl')
fmodel_prec = os.path.join(data_dir, 'pc-' + dataset_name + '-prec.pkl')
fmodel_f1 = os.path.join(data_dir, 'pc-' + dataset_name + '-f1.pkl')

Load data.


In [ ]:
X_train, Y_train = create_dataset(dataset_name, train_data=True, shuffle=True, random_state=SEED)
X_test,  Y_test  = create_dataset(dataset_name, train_data=False)

Feature normalisation.


In [ ]:
X_train_mean = np.mean(X_train, axis=0).reshape((1, -1))
X_train_std = np.std(X_train, axis=0).reshape((1, -1)) + 10 ** (-6)
X_train -= X_train_mean
X_train /= X_train_std
X_test  -= X_train_mean
X_test  /= X_train_std

In [ ]:
def print_dataset_info(X_train, Y_train, X_test, Y_test):
    N_train, D = X_train.shape
    K = Y_train.shape[1]
    N_test = X_test.shape[0]
    print('%-45s %s' % ('Number of training examples:', '{:,}'.format(N_train)))
    print('%-45s %s' % ('Number of test examples:', '{:,}'.format(N_test)))
    print('%-45s %s' % ('Number of features:', '{:,}'.format(D)))
    print('%-45s %s' % ('Number of labels:', '{:,}'.format(K)))
    avgK_train = np.mean(np.sum(Y_train, axis=1))
    avgK_test  = np.mean(np.sum(Y_test, axis=1))
    print('%-45s %.3f (%.2f%%)' % ('Average number of positive labels (train):', avgK_train, 100*avgK_train / K))
    print('%-45s %.3f (%.2f%%)' % ('Average number of positive labels (test):', avgK_test, 100*avgK_test / K))
    #print('%-45s %.4f%%' % ('Average label occurrence (train):', np.mean(np.sum(Y_train, axis=0)) / N_train))
    #print('%-45s %.4f%%' % ('Average label occurrence (test):', np.mean(np.sum(Y_test, axis=0)) / N_test))
    print('%-45s %.3f%%' % ('Sparsity (percent) (train):', 100 * np.sum(Y_train) / np.prod(Y_train.shape)))
    print('%-45s %.3f%%' % ('Sparsity (percent) (test):', 100 * np.sum(Y_test) / np.prod(Y_test.shape)))

In [ ]:
print('%-45s %s' % ('Dataset:', dataset_name))
print_dataset_info(X_train, Y_train, X_test, Y_test)

check gradient.


In [ ]:
PU = np.zeros((Y_train.shape[0], 3), dtype=Y_train.dtype)
PU[[0, 1, 2, 10], [0, 1, 1, 2]] = 1
upl_ix = [[2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13, 14, 15]]
w0 = 0.001 * np.random.randn((Y_train.shape[1] + 3) * X_train.shape[1] + 1)
loss = 'both'
check_grad(\
lambda w: obj_pclassification(w, X_train, Y_train, C1=10, C2=1, C3=2, p=3, loss_type=loss,
                              PU=PU, user_playlist_indices=upl_ix)[0], 
lambda w: obj_pclassification(w, X_train, Y_train, C1=10, C2=1, C3=2, p=3, loss_type=loss,
                              PU=PU, user_playlist_indices=upl_ix)[1],w0)

p-classification loss

Multi-label learning with p-classification loss.


In [ ]:
def obj_pclassification(w, X, Y, C, p, weighting=True):
    """
        Objective with L2 regularisation and p-classification loss
        
        Input:
            - w: current weight vector, flattened L x D + 1 (bias)
            - X: feature matrix, N x D
            - Y: label matrix,   N x L
            - C: regularisation constant, is consistent with scikit-learn C = 1 / (N * \lambda)
            - p: constant for p-classification push loss
    """
    N, D = X.shape
    K = Y.shape[1]
    assert(w.shape[0] == K * D + 1)
    assert(p >= 1)
    assert(C > 0)
    
    W = w[1:].reshape(K, D)  # reshape weight matrix
    b = w[0]           # bias
    OneN = np.ones(N)  # N by 1
    OneK = np.ones(K)  # K by 1
    
    if weighting is True:
        #KPosAll = np.sum(Y, axis=1)  # number of positive labels for each example, N by 1
        KPosAll = np.dot(Y, OneK)
        KNegAll = K - KPosAll        # number of negative labels for each example, N by 1
    else:
        KPosAll = np.ones(N)
        KNegAll = np.ones(N)
    A_diag = np.divide(1, KPosAll)  # N by 1
    P_diag = np.divide(1, KNegAll)  # N by 1
    
    #T1 = np.dot(X, W.T)  # N by K
    T1 = np.dot(X, W.T) + b # N by K
    
    T1p = np.multiply(Y, T1)
    T2 = np.multiply(Y, np.exp(-T1p))  # N by K
    T3 = T2 * A_diag[:, None]  # N by K
    
    #T1n = np.multiply(1-Y, T1)
    T1n = T1 - T1p
    T4 = np.multiply(1-Y, np.exp(p * T1n))  # N by K
    T5 = T4 * P_diag[:, None]  # N by K
    
    J = np.dot(W.ravel(), W.ravel()) * 0.5 / C 
    J += (np.dot(OneN, np.dot(T3, OneK)) + np.dot(OneN, np.dot(T5/p, OneK))) / N
    #J = np.dot(W.ravel(), W.ravel()) * 0.5 / C + (np.dot(OneN, np.dot(T3 + T5/p, OneK))) / N  # not as efficient
    
    #G = W / C + (np.dot(T3.T, -X) + np.dot(T5.T, X)) / N
    G = W / C + (np.dot((-T3 + T5).T, X)) / N   # more efficient
    
    db = np.dot(OneN, np.dot(-T3 + T5, OneK)) / N
    
    gradients = np.concatenate(([db], G.ravel()), axis=0) 
    
    return (J, gradients)

In [ ]:
def loss_pclassification(X, Y, p, W, b, weighting=True):
    """
        Accumulated loss for p-classification, for test tightness of bound.
        
        Input:
            - w: current weight vector, flattened L x D
            - X: feature matrix, N x D
            - Y: label matrix,   N x L
            - p: constant for p-classification push loss
    """
    N, D = X.shape
    K = Y.shape[1]
    assert W.shape == (K, D)
    assert p >= 1
    
    OneN = np.ones(N)  # N by 1
    OneK = np.ones(K)  # K by 1
    
    if weighting is True:
        #KPosAll = np.sum(Y, axis=1)  # number of positive labels for each example, N by 1
        KPosAll = np.dot(Y, OneK)
        KNegAll = K - KPosAll        # number of negative labels for each example, N by 1
    else:
        KPosAll = np.ones(N)
        KNegAll = np.ones(N)
    A_diag = np.divide(1, KPosAll)  # N by 1
    P_diag = np.divide(1, KNegAll)  # N by 1
    
    T1 = np.dot(X, W.T) + b  # N by K
    
    T1p = np.multiply(Y, T1)
    T2 = np.multiply(Y, np.exp(-T1p))  # N by K
    T3 = T2 * A_diag[:, None]  # N by K
    
    #T1n = np.multiply(1-Y, T1)
    T1n = T1 - T1p
    T4 = np.multiply(1-Y, np.exp(p * T1n))  # N by K
    T5 = T4 * P_diag[:, None]  # N by K
    
    return np.dot(T3 + T5/p, OneK)

In [ ]:
def obj_pclassification_loop(w, X, Y, C, p, weighting=True):
    """
        Objective with L2 regularisation and p-classification loss
        
        Input:
            - w: current weight vector, flattened L x D
            - X: feature matrix, N x D
            - Y: label matrix,   N x L
            - C: regularisation constant, is consistent with scikit-learn C = 1 / (N * \lambda)
            - p: constant for p-classification push loss
    """
    N, D = X.shape
    L = Y.shape[1]
    assert(w.shape[0] == L * D + 1)
    assert(p >= 1)
    assert(C > 0)
    
    W = w[1:].reshape(L, D)  # reshape weight matrix
    b = w[0]
    
    J = 0.0  # cost
    G = np.zeros_like(W)  # gradient matrix
    db = 0.0
    if weighting is True:
        nPosAll = np.sum(Y, axis=1)  # number of positive labels for each example, N by 1
        nNegAll = L - nPosAll        # number of negative labels for each example, N by 1
    else:
        nPosAll = np.ones(N)
        nNegAll = np.ones(N)
    
    for k in range(L):
        wk = W[k, :]
        Yk = Y[:, k]
        sPosVec = np.dot(X[Yk == 1, :], wk) + b  # Nk+ by 1
        sNegVec = np.dot(X[Yk == 0, :], wk) + b  # NK- by 1
        nPosVec = nPosAll[Yk == 1]               # Nk+ by 1
        nNegVec = nNegAll[Yk == 0]               # NK- by 1
        
        #nPosVec = np.sum(Y[Yk == 1, :], axis=1)  # Nk+ by 1
        #nNegVec = np.sum(Y[Yk == 0, :], axis=1)  # NK- by 1
        
        #nPosVec = np.sum(Y[Yk == 1, :], axis=1) + 0.01 # Nk+ by 1 with smoothing
        #nNegVec = np.sum(Y[Yk == 0, :], axis=1) + 0.01 # NK- by 1 with smoothing
        
        #nPosVec = np.ones_like(sPosVec) * N
        #nNegVec = np.ones_like(sNegVec) * N
        
        lossPos = np.divide(np.exp(-sPosVec), nPosVec)     # NK+ by 1
        lossNeg = np.divide(np.exp(p * sNegVec), nNegVec)  # NK- by 1
        
        J += np.sum(lossPos) + np.sum(lossNeg) / p
        db += -np.sum(lossPos) + np.sum(lossNeg)
        #print(X[Yk == 0, :][0])
        #print(np.exp(np.dot(X[Yk == 0, :][0], wk)))
        
        GradPos = -X[Yk == 1, :] * lossPos[:, None]
        GradNeg =  X[Yk == 0, :] * lossNeg[:, None]
        
        G[k, :] = np.sum(GradPos, axis=0) + np.sum(GradNeg, axis=0)
                
    #J = 0.5 * C * np.dot(w, w) + J / N
    #G = C * W + G / N
    
    # be consistent with scikit-learn C = 1 / (N * \lambda)
    # normalise the objective J by dividing it C 
    J = np.dot(W.ravel(), W.ravel()) / (2.0 * C) + J / N
    G = W / C + G / N
    
    gradients = np.concatenate(([db/N], G.ravel()), axis=0)
    
    return (J, gradients)

In [ ]:
def obj_pclassification_loop0(w, X, Y, C, p, weighting=True):
    """
        Objective with L2 regularisation and p-classification loss
        
        Input:
            - w: current weight vector, flattened L x D
            - X: feature matrix, N x D
            - Y: label matrix,   N x L
            - C: regularisation constant, is consistent with scikit-learn C = 1 / (N * \lambda)
            - p: constant for p-classification push loss
    """
    N, D = X.shape
    K = Y.shape[1]
    assert(w.shape[0] == K * D + 1)
    assert(p >= 1)
    assert(C > 0)
    
    W = w[1:].reshape(K, D)  # reshape weight matrix
    b = w[0]
    
    J = 0.0  # cost
    G = np.zeros_like(W)  # gradient matrix
    db = 0.0
    if weighting is True:
        KPosAll = np.sum(Y, axis=1)  # number of positive labels for each example, N by 1
        KNegAll = K - KPosAll        # number of negative labels for each example, N by 1
    else:
        KPosAll = np.ones(N)
        KNegAll = np.ones(N)
    
    for k in range(K):
        for n in range(N):
            score = np.dot(W[k, :], X[n, :]) + b
            if Y[n, k] == 1:
                t1 = np.exp(-score) / KPosAll[n]
                J += t1
                db -= t1
                G[k, :] = G[k, :] - X[n, :] * t1
            else:
                t2 = np.exp(p * score) / KNegAll[n]
                J += t2 / p
                db += t2
                G[k, :] = G[k, :] + X[n, :] * t2
                
    J = np.dot(W.ravel(), W.ravel()) * 0.5 / C + J / N
    db = db / N
    G = W / C + G / N
    
    gradients = np.concatenate(([db/N], G.ravel()), axis=0)
    
    return (J, gradients)

Check gradient


In [ ]:
#w0 = 0.001 * np.random.randn(Y_train.shape[1] * X_train.shape[1] + 1)
#check_grad(lambda w: obj_pclassification(w, X_train, Y_train, C=1, p=8)[0], 
#           lambda w: obj_pclassification(w, X_train, Y_train, C=1, p=8)[1], w0)

In [ ]:
#w0 = 0.001 * np.random.randn(Y_train.shape[1] * X_train.shape[1] + 1)
#check_grad(lambda w: obj_pclassification_loop(w, X_train, Y_train, C=1, p=8)[0], 
#           lambda w: obj_pclassification_loop(w, X_train, Y_train, C=1, p=8)[1], w0)

In [ ]:
def cmp_loop_vec(func_loop, func_vec, X_train, Y_train, p=1):
    print('%15s %15s %15s %15s %15s' % ('C','J_Diff', 'J_loop', 'J_vec', 'G_Diff'))
    w0 = 0.001 * np.random.randn(Y_train.shape[1] * X_train.shape[1] + 1)
    for e in range(-6, 10):
        C = 10**(e)
        J,  G  = func_loop(w0, X_train, Y_train, C, p=p)
        J1, G1 = func_vec( w0, X_train, Y_train, C, p=p)
        Gdiff = G1 - G
        print('%15g %15g %15g %15g %15g' % (C, J1 - J, J, J1, np.dot(Gdiff, Gdiff)))

In [ ]:
#cmp_loop_vec(obj_pclassification_loop, obj_pclassification, X_train, Y_train, p=8)

Line profiling


In [ ]:
#C = 10; p = 2
#w0 = np.random.rand(X_train.shape[1] * nLabels + 1)
#%lprun -f obj_pclassification check_grad(lambda w: obj_pclassification(w, X_train, Y_train, C, p)[0], \
#                                         lambda w: obj_pclassification(w, X_train, Y_train, C, p)[1], w0)

Class definition.


In [ ]:
class MLC_pclassification(BaseEstimator):
    """All methods are necessary for a scikit-learn estimator"""
    
    def __init__(self, C=1, p=1, weighting=True):
        """Initialisation"""
        
        assert C >  0
        assert p >= 1
        self.C = C
        self.p = p
        self.weighting = weighting
        self.obj_func = obj_pclassification
        self.trained = False
        
    def fit(self, X_train, Y_train):
        """Model fitting by optimising the objective"""
        opt_method = 'L-BFGS-B' #'BFGS' #'Newton-CG'
        options = {'disp': 1, 'maxiter': 10**5, 'maxfun': 10**5} # , 'iprint': 99}
        sys.stdout.write('\nC: %g, p: %g, weighting: %s\n' % (self.C, self.p, self.weighting))
        sys.stdout.flush()
            
        N, D = X_train.shape
        K = Y_train.shape[1]
        #w0 = np.random.rand(K * D + 1) - 0.5  # initial guess in range [-1, 1]
        w0 = 0.001 * np.random.randn(K * D + 1)
        opt = minimize(self.obj_func, w0, args=(X_train, Y_train, self.C, self.p, self.weighting), \
                       method=opt_method, jac=True, options=options)
        if opt.success is True:
            self.b = opt.x[0]
            self.W = np.reshape(opt.x[1:], (K, D))
            self.trained = True
        else:
            sys.stderr.write('Optimisation failed')
            print(opt.items())
            self.trained = False
            
            
    def decision_function(self, X_test):
        """Make predictions (score is real number)"""
        
        assert self.trained is True, "Can't make prediction before training"
        D = X_test.shape[1]
        return np.dot(X_test, self.W.T) + self.b  # log of prediction score
        
    
    def predict(self, X_test):
        return self.decision_function(X_test)
    #    """Make predictions (score is boolean)"""   
    #    preds = sigmoid(self.decision_function(X_test))
    #    #return (preds >= 0)
    #    assert self.TH is not None
    #    return preds >= self.TH        
        
    # inherit from BaseEstimator instead of re-implement
    #
    #def get_params(self, deep = True):
    #def set_params(self, **params):

In [ ]:
def dump_results(predictor, X_train, Y_train, X_test, Y_test, rankingLoss=False):
    """
        Compute and save performance results
    """
    preds_train = predictor.decision_function(X_train)
    preds_test  = predictor.decision_function(X_test)
    
    print('Training set:')
    perf_dict_train = evaluatePrecision(Y_train, preds_train, verbose=1)
    print()
    print('Test set:')
    perf_dict_test = evaluatePrecision(Y_test, preds_test, verbose=1)
    
    if rankingLoss is True:
        print()
        print('Training set:')
        perf_dict_train.update(evaluateRankingLoss(Y_train, preds_train))
        print(label_ranking_loss(Y_train, preds_train))
        print()
        print('Test set:')
        perf_dict_test.update(evaluateRankingLoss(Y_test, preds_test))
        print(label_ranking_loss(Y_test, preds_test))

In [ ]:
def avgF1(Y_true, Y_pred):
    #THs = [0, 0.05, 0.10, 0.15, 0.2, 0.25, 0.30, 0.35, 0.4, 0.45, 0.5, 0.55, 0.60, 0.65, 0.70, 0.75]  # SPEN THs
    THs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85]
    F1 = Parallel(n_jobs=-1)(delayed(f1_score_nowarn)(Y_true, Y_pred >= th, average='samples') for th in THs)
    bestix = np.argmax(F1)
    print('best threshold: %g, best F1: %g, #examples: %g' % (THs[bestix], F1[bestix], Y_true.shape[0]))
    return F1[bestix]

In [ ]:
def avgF1_0(Y_true, Y_pred):
    F1 = f1_score_nowarn(Y_true, Y_pred >= 0, average='samples')
    print('F1: %g, #examples: %g' % (F1, Y_true.shape[0]))
    return F1

In [ ]:
if os.path.exists(fmodel_f1):
    clf = pkl.load(open(fmodel_f1, 'rb'))

In [ ]:
avgF1(Y_test, clf.decision_function(X_test))

In [ ]:
avgF1(Y_train, clf.decision_function(X_train))

In [ ]:
clf.best_threshold = 0.7

In [ ]:
f1_score_nowarn(Y_test, clf.decision_function(X_test) >= clf.best_threshold, average='samples')

In [ ]:
f1_score_nowarn(Y_test, clf.decision_function(X_test) >= clf.best_threshold, average='macro')

In [ ]:
pkl.dump(clf, open(fmodel_f1, 'wb'))

In [ ]:
clf = MLC_pclassification(C=100, p=2, weighting=True)
clf.fit(X_train, Y_train)
print(avgF1(Y_train, clf.decision_function(X_train)))
print(avgF1(Y_test, clf.decision_function(X_test)))

In [ ]:


In [ ]:
C_set = [0.01, 0.1, 1, 10, 100, 1000]  # bibtex, bookmarks level 1
p_set = [1, 2, 3, 4, 5, 6]
parameters = [{'C': C_set, 'p': p_set, 'weighting': [True]}]
#scorer = {'Prec': make_scorer(avgPrecisionK)}
scorer = {'F1': make_scorer(avgF1)}

In [ ]:
clf = GridSearchCV(MLC_pclassification(), parameters, scoring=scorer, cv=5, n_jobs=1, refit='F1')
clf.fit(X_train, Y_train)
#pkl.dump(clf, open(fmodel_f1, 'wb'))

In [ ]:
clf.cv_results_['mean_test_F1'].reshape(len(C_set), len(p_set))

In [ ]:
clf.best_params_

In [ ]:
C_set = [150, 200, 250, 500, 750] # bookmarks level 2
p_set = [2, 3, 4, 5, 6]
parameters = [{'C': C_set, 'p': p_set, 'weighting': [True]}]

In [ ]:
clf2 = GridSearchCV(MLC_pclassification(), parameters, scoring=scorer, cv=5, n_jobs=1, refit='F1')
clf2.fit(X_train, Y_train)
#pkl.dump(clf, open(fmodel_f1, 'wb'))

In [ ]:
clf2.cv_results_['mean_test_F1'].reshape(len(C_set), len(p_set))

In [ ]:
print(avgF1(Y_train, clf2.decision_function(X_train)))
print(avgF1(Y_test, clf2.decision_function(X_test)))

In [ ]:
pkl.dump(clf2, open(fmodel_f1, 'wb'))

In [ ]:
print('Train (' + dataset_name + '):', avgF1(Y_train, clf.decision_function(X_train))); print()
print('Test (' + dataset_name + '):', avgF1(Y_test, clf.decision_function(X_test)))

In [ ]:
# use the testing threshold of the best hyper-params in cross validation above
# it is 0.8 for both bibtex and bookmarks dataset
threshold = 0.7
print('average F1:', f1_score_nowarn(Y_test, clf2.decision_function(X_test) >= threshold, average='samples'))

Results


In [ ]:
#ysum = Y_train.sum(axis=1)

In [ ]:
#ax = plt.subplot('111')
#ax.hist(ysum, bins=20)
#ax.set_yscale('log')

In [ ]:
def calcF1(Y_true, Y_pred):
    """
    Compute F1 scores for multilabel prediction, one score for each example.
    precision = true_positive / n_true
    recall = true_positive / n_positive
    f1 = (2 * precision * recall) / (precision + recall) = 2 * true_positive / (n_true + n_positive)
    """
    assert Y_true.shape == Y_pred.shape
    N, K = Y_true.shape
    OneK = np.ones(K)
    
    n_true = np.dot(Y_true, OneK)
    n_positive = np.dot(Y_pred, OneK)
    true_positive = np.dot(np.multiply(Y_true, Y_pred), OneK)
    
    numerator = 2 * true_positive
    denominator = n_true + n_positive
    nonzero_ix = np.nonzero(denominator)[0]
    
    f1 = np.zeros(N)
    f1[nonzero_ix] = np.divide(numerator[nonzero_ix], denominator[nonzero_ix])
    
    return f1

In [ ]:
f1_score_nowarn(Y_train, clf.decision_function(X_train) > 0.9, average='samples')

In [ ]:
np.mean(calcF1(Y_train, clf.decision_function(X_train) > 0.9))

In [ ]:
from util import plot_loss
xlabel = 'P-Classification Loss'
ylabel = '1 - F1'

In [ ]:
losses = loss_pclassification(X=X_train, Y=Y_train, \
                              W=clf.best_estimator_.W, b=clf.best_estimator_.b, p=clf.best_params_['p'])

In [ ]:
f11 = 1 - calcF1(Y_train, clf.decision_function(X_train) >= 0)

In [ ]:
f11

In [ ]:
np.nonzero(losses < f11)

In [ ]:
ind = 2666
X_train[ind]

In [ ]:
losses[ind]

In [ ]:
f11[ind]

In [ ]:
pred = clf.decision_function(X_train)[ind] < 0

In [ ]:
Y_train[ind]

In [ ]:
np.multiply(Y_train[ind], pred).sum()

In [ ]:
th_train = 0
title = 'Train (' + dataset_name + ')'
loss_train = loss_pclassification(X=X_train, Y=Y_train, \
                                  W=clf.best_estimator_.W, b=clf.best_estimator_.b, p=clf.best_params_['p'])
plot_loss(loss_train, 1-calcF1(Y_train, clf.decision_function(X_train) >= th_train), xlabel, ylabel, title)

In [ ]:
th_test = 0
title = 'Test (' + dataset_name + ')'
loss_test = loss_pclassification(X=X_test, Y=Y_test, \
                                 W=clf.best_estimator_.W, b=clf.best_estimator_.b, p=clf.best_params_['p'])
plot_loss(loss_test, 1-calcF1(Y_test, clf.decision_function(X_test) >= th_test), xlabel, ylabel, title)

In [ ]:
max(loss_test)

In [ ]:
indices = pd.MultiIndex.from_product([['yeast', 'bibtex'], ['Logistic Regression', 'P-Classification'], 
                                      ['Train', 'Test']], names=['Dataset', 'Method', 'Split'])

In [ ]:
result = pd.DataFrame(index=indices, columns=['F1', 'Precision@K', 'P-Classification Loss'])

In [ ]:
#result = pkl.load(open('tmp.tmp', 'rb'))

In [ ]:
result.loc[(dataset_name, 'P-Classification', 'Train'), 'F1'] = avgF1(Y_train, clf.decision_function(X_train))
result.loc[(dataset_name, 'P-Classification', 'Test'), 'F1'] = avgF1(Y_test, clf.decision_function(X_test))

result.loc[(dataset_name, 'P-Classification', 'Train'), 'Precision@K'] = avgPrecisionK(Y_train, 
                                                                                   clf.decision_function(X_train))
result.loc[(dataset_name, 'P-Classification', 'Test'), 'Precision@K'] = avgPrecisionK(Y_test, 
                                                                                  clf.decision_function(X_test))

result.loc[(dataset_name, 'P-Classification', 'Train'), 'P-Classification Loss'] = \
    np.mean(loss_pclassification(W=clf.best_estimator_.W, X=X_train, Y=Y_train, p=clf.best_params_['p']))
result.loc[(dataset_name, 'P-Classification', 'Test'), 'P-Classification Loss'] = \
    np.mean(loss_pclassification(W=clf.best_estimator_.W, X=X_test, Y=Y_test, p=clf.best_params_['p']))

In [ ]:
from BinaryRelevance import BinaryRelevance
br_clf = pkl.load(open(os.path.join(data_dir, 'br-' + dataset_name + '-f1.pkl'), 'rb'))

In [ ]:
br_clf.best_estimator_.estimator.coef_.shape

In [ ]:
losses = loss_pclassification(W=br_clf.best_estimator_.estimator.coef_, X=X_train, Y=Y_train, p=2)
#np.mean(losses[losses < 1000])
#np.sum(losses > 1000)

In [ ]:
losses.min()

In [ ]:
print('%g' % losses.max())

In [ ]:
45 / losses.shape[0]
losses.shape

In [ ]:
p = 2  # dataset specific from cross validation of P-Classification

In [ ]:
result.loc[(dataset_name, 'Logistic Regression', 'Train'), 'F1'] = \
    f1_score_nowarn(Y_train, br_clf.decision_function(X_train) >= 0, average='samples')
result.loc[(dataset_name, 'Logistic Regression', 'Test'), 'F1'] = \
    f1_score_nowarn(Y_test, br_clf.decision_function(X_test) >= 0, average='samples')

result.loc[(dataset_name, 'Logistic Regression', 'Train'), 'Precision@K'] = \
    avgPrecisionK(Y_train, br_clf.decision_function(X_train))
result.loc[(dataset_name, 'Logistic Regression', 'Test'), 'Precision@K'] = \
    avgPrecisionK(Y_test, br_clf.decision_function(X_test))

result.loc[(dataset_name, 'Logistic Regression', 'Train'), 'P-Classification Loss'] = \
    np.mean(loss_pclassification(W=br_clf.best_estimator_.estimator.coef_, X=X_train, Y=Y_train, p=p))
result.loc[(dataset_name, 'Logistic Regression', 'Test'), 'P-Classification Loss'] = \
    np.mean(loss_pclassification(W=br_clf.best_estimator_.estimator.coef_, X=X_test, Y=Y_test, p=p))

In [ ]:
pkl.dump(result, open('tmp.tmp', 'wb'))

In [ ]:
rstr = result.to_latex(float_format=lambda x: '$%.4f$' % x, na_rep='-', multirow=True, escape=False)
print('\\begin{table}[!h]')
print('\centering')
#print('\\caption{Performance on test set}')
print('\\label{tab:perf}')    
print(rstr)
print('\\end{table}')