Ensemble learning


In [1]:
model_names = [
    'vgg16-keras', 
    'vgg19-keras', 
    'resnet50-keras',
    'incv3-keras',   
    'Inception_v3'
]

# out best classifiers build on top of pretrained models
classifier_filepath = {
    'incv3-keras'    : 'classifiers/7577-incv3-keras.pkl',
    'vgg16-keras'    : 'classifiers/8515-vgg16-keras.pkl',
    'vgg19-keras'    : 'classifiers/8654-vgg19-keras.pkl',
    'Inception_v3'   : 'classifiers/9061-Inception_v3.pkl',
    'resnet50-keras' : 'classifiers/9158-resnet50-keras.pkl'
}

import numpy as np
data = dict()
for model_name in model_names:
    data[model_name] = np.load('features/CIFAR10_{model}_features.npz'.format(model=model_name))

In [2]:
!ls -1 classifiers/*.pkl


classifiers/7577-incv3-keras.pkl
classifiers/8515-vgg16-keras.pkl
classifiers/8654-vgg19-keras.pkl
classifiers/9061-Inception_v3.pkl
classifiers/9158-resnet50-keras.pkl

In [3]:
from sklearn.externals import joblib

clf = dict()
for name in model_names:
    clf[name] = joblib.load(classifier_filepath[name])

In [4]:
clf['resnet50-keras'].get_params()


Out[4]:
{'C': 0.001,
 'class_weight': None,
 'dual': True,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'loss': 'squared_hinge',
 'max_iter': 1000,
 'multi_class': 'ovr',
 'penalty': 'l2',
 'random_state': None,
 'tol': 0.0001,
 'verbose': 0}

Let us remind the classifiers scores on training dataset from CIFAR10

incv3    => 7577
vgg16    => 8515
vgg19    => 8654
Incv3    => 9061
resnet50 => 9158

Majority voting


In [5]:
import myutils
import numpy as np

In [6]:
_, data_testing = myutils.load_CIFAR_dataset(shuffle=False)

y_testing = np.array( data_testing )[:,1]
n_testing = y_testing.shape[0]

In [7]:
from collections import Counter

def majority_vote(i):
    votes = np.zeros(10);
    for name in model_names:
        y = y_predictions[name][i]
        votes[y] += 1
    return votes.argmax()

y_predictions = dict()
for name in model_names:
    y_predictions[name] = clf[name].predict( data[name]['features_testing'] )
    
y_ensembled = [ majority_vote(i) for i in range(n_testing) ]

In [8]:
np.sum(y_ensembled == y_testing)


Out[8]:
9184

Simple majority voting increases our best result to 91.84%

Weighted voting


In [87]:
# Assume, we know how good are our models. We can give some weight to their votes.
classifier_weights = {
    'incv3-keras'    : 2,
    'vgg16-keras'    : 3,
    'vgg19-keras'    : 4,
    'Inception_v3'   : 7,
    'resnet50-keras' : 7
}

def weighted_vote(i):
    votes = np.zeros(10);
    for name in model_names:
        y = y_predictions[name][i]
        votes[y] = votes[y] + classifier_weights[name]
    return votes.argmax()

y_ensembled = [ weighted_vote(i) for i in range(n_testing) ]

np.sum( y_ensembled == y_testing )


Out[87]:
9284

Dynamic voting

Another idea is as follows

  1. Take the image to classify
  2. Find its K nearest neighbors
  3. Classify the neighbor using all classifiers
  4. Use weighted voting

Adaboost

In case of our classiers we cannot use boosting techniques. However, one can try with svm.SVC(kernel='linear', probability=True) in order to boost the model.

#  base_clf = clf['resnet50-keras']
from sklearn import svm
base_clf = svm.SVC(probability=True,kernel='linear')

n_training = 1000 # TODO: try with bigger testing data

X_train = data['resnet50-keras']['features_training'][:n_training]
y_train = data['resnet50-keras']['labels_training'][:n_training]

base_clf.fit( X_train, y_train )
base_clf.score( data['resnet50-keras']['features_testing'], data['resnet50-keras']['labels_testing'] )

from sklearn.ensemble import AdaBoostClassifier
boosted_model = AdaBoostClassifier(base_estimator = base_clf)
boosted_model.fit( X_train, y_train )
boosted_model.score( data['resnet50-keras']['features_testing'], data['resnet50-keras']['labels_testing'] )

Adaboost did not boost.

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

0.83030000000000004

AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          learning_rate=1.0, n_estimators=50, random_state=None)

0.78369999999999995

Model stacking

Here we present the reults with stacking our two best models. One can try with stacking more models.


In [2]:
model_params = {
    'vgg16-keras':    [ {'C':0.0001} ],
    'vgg19-keras':    [ {'C':0.001}  ],
    'resnet50-keras': [ {'C':0.001}   ],
    'Inception_v3':   [ {'C':0.01}   ],
    'incv3-keras':    [ {'C':0.001}  ]
}

So we choose Inception v3 from Tensorflow and ResNET50 from keras.applications


In [3]:
model1_name = 'Inception_v3'
model2_name = 'resnet50-keras'
n_models   = 2

In [4]:
from sklearn.svm import LinearSVC
model1 = LinearSVC( **model_params[model1_name][0] )
model2 = LinearSVC( **model_params[model2_name][0] )

We are not looking at the testing data from CIFAR10 database, yet.


In [5]:
X1_training = data[model1_name]['features_training']
y1_training = data[model1_name]['labels_training']

X2_training = data[model2_name]['features_training']
y2_training = data[model2_name]['labels_training']

n_training = X1_training.shape[0]

We build training data for the stacked model


In [6]:
X_cross_training = np.zeros( (n_training, n_models) )

In [7]:
X1_training.shape,  X2_training.shape


Out[7]:
((50000, 2048), (50000, 2048))

In [8]:
# y_cross_training[ [3,4,5], 0 ] = [ 6,7,2 ]

Let's build X_cross_training


In [9]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=False)

X = X1_training
y = y1_training
for train_index, test_index in kf.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model1.fit( X_train, y_train )
    y_predict = model1.predict( X_test )
    X_cross_training[test_index,0] = y_predict


TRAIN: [10000 10001 10002 ..., 49997 49998 49999] TEST: [   0    1    2 ..., 9997 9998 9999]
TRAIN: [    0     1     2 ..., 49997 49998 49999] TEST: [10000 10001 10002 ..., 19997 19998 19999]
TRAIN: [    0     1     2 ..., 49997 49998 49999] TEST: [20000 20001 20002 ..., 29997 29998 29999]
TRAIN: [    0     1     2 ..., 49997 49998 49999] TEST: [30000 30001 30002 ..., 39997 39998 39999]
TRAIN: [    0     1     2 ..., 39997 39998 39999] TEST: [40000 40001 40002 ..., 49997 49998 49999]

In [10]:
X = X2_training
y = y2_training
for train_index, test_index in kf.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model2.fit( X_train, y_train )
    y_predict = model1.predict( X_test )
    X_cross_training[test_index,1] = y_predict


TRAIN: [10000 10001 10002 ..., 49997 49998 49999] TEST: [   0    1    2 ..., 9997 9998 9999]
TRAIN: [    0     1     2 ..., 49997 49998 49999] TEST: [10000 10001 10002 ..., 19997 19998 19999]
TRAIN: [    0     1     2 ..., 49997 49998 49999] TEST: [20000 20001 20002 ..., 29997 29998 29999]
TRAIN: [    0     1     2 ..., 49997 49998 49999] TEST: [30000 30001 30002 ..., 39997 39998 39999]
TRAIN: [    0     1     2 ..., 39997 39998 39999] TEST: [40000 40001 40002 ..., 49997 49998 49999]

In [12]:
X_cross_training[:10,0:n_models]


Out[12]:
array([[ 6.,  6.],
       [ 9.,  6.],
       [ 8.,  6.],
       [ 4.,  2.],
       [ 1.,  1.],
       [ 1.,  1.],
       [ 2.,  4.],
       [ 7.,  1.],
       [ 8.,  6.],
       [ 3.,  1.]])

In [13]:
from sklearn.svm import SVR, SVC
stacked_model = SVC()

In [14]:
# Training stacked model
# One can include some training features from model1 and model1   # TODO
y_training = data[model2_name]['labels_training']  # one can take any model
stacked_model.fit( X_cross_training, y_training )


Out[14]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [15]:
model1.fit( X1_training , y1_training )
X1_testing = data[model1_name]['features_testing']
y1_predictions = model1.predict( X1_testing )

In [16]:
model2.fit( X2_training , y2_training )
X2_testing = data[model2_name]['features_testing']
y2_predictions = model2.predict( X2_testing )

In [17]:
y1_predictions, y2_predictions


Out[17]:
(array([3, 8, 8, ..., 5, 0, 7]), array([3, 8, 8, ..., 5, 1, 7], dtype=uint8))

In [18]:
y_predictions = stacked_model.predict( np.column_stack( (y1_predictions,y2_predictions) ) )

This is the moment to look at the testing data from CIFAR10 database.


In [19]:
y_testing = data[model2_name]['labels_testing']  # one can take any model

In [20]:
np.sum( y_predictions == y_testing )


Out[20]:
9061

Stacking model accuracy is 90.61%, which is not better than the accuracy gained by the base models.

Let us remind the accuracies of the base models:


In [21]:
model1.score( X1_testing, y_testing ),  model2.score( X2_testing, y_testing )


Out[21]:
(0.90610000000000002, 0.91579999999999995)

In [ ]: