I want to test the following hypothesis: my resnet is learning to predict subpose correctly, but is only predicting biposelets by relying on irrelevant cues present in the training set (i.e. overfitting to biposelets, but not necessarily to subposes). Skip to the "interesting stuff starts here" heading for actual results.


In [ ]:
%matplotlib inline

import h5py

from keras.optimizers import SGD
from keras.utils.io_utils import HDF5Matrix

import matplotlib.pyplot as plt

import numpy as np

from scipy.stats import entropy

import sklearn.metrics as skmetrics

import cPickle as pickle

from os import path

import train
import evaluate
from train import infer_sizes
import models

In [ ]:
# Load data and get a model
no_preds = False
dest_dir = '/home/sam/delete-me/resnet-preds/'
cache_dir = '../cache/mpii-cooking/'
train_h5_path = path.join(cache_dir, 'train-patches/samples-000001.h5')
train_neg_h5_path = path.join(cache_dir, 'train-patches/negatives.h5')
val_h5_path = path.join(cache_dir, 'val-patches/samples-000001.h5')
val_neg_h5_path = path.join(cache_dir, 'val-patches/negatives.h5')
train_h5 = h5py.File(train_h5_path, 'r')
train_neg_h5 = h5py.File(train_neg_h5_path, 'r')
val_h5 = h5py.File(val_h5_path, 'r')
val_neg_h5 = h5py.File(val_neg_h5_path, 'r')
train_images, train_flow = train_h5['images'], train_h5['flow']
train_neg_images, train_neg_flow = train_neg_h5['images'], train_neg_h5['flow']
val_images, val_flow = val_h5['images'], val_h5['flow']
val_neg_images, val_neg_flow = val_neg_h5['images'], val_neg_h5['flow']
ds_shape = infer_sizes(train_h5_path)
mp_path = '../cache/mpii-cooking/mean_pixel.mat'

In [ ]:
sgd = SGD(lr=0.0001, nesterov=True, momentum=0.9)
poselet_model = models.resnet34_poselet_class(ds_shape, sgd, 'glorot_normal')
poselet_model.load_weights('../cache/mpii-cooking/keras-checkpoints-resnet-from-3582/checkpoints/model-iter-23808-r604131.h5')

In [ ]:
evaluate = reload(evaluate)
def test_on_data(images, batch_size=32, model=poselet_model, mp_path=mp_path):
    """Test on an HDF5 file of training or validation data.
    Return a matrix with each row giving the predicted output
    distribution."""
    return evaluate.get_predictions(model, mp_path, {'images': images}, batch_size)

def save_preds(preds, dest_name):
    assert len(preds.keys()) == 1 and 'poselet' in preds.keys()
    dest_file = path.join(dest_dir, dest_name + '.npy')
    np.save(dest_file, preds['poselet'])
    print('Saved to ' + dest_file)

def load_preds(dest_name):
    dest_file = path.join(dest_dir, dest_name + '.npy')
    return np.load(dest_file)

In [ ]:
if no_preds:
    val_neg_preds = test_on_data(val_neg_images)
    save_preds(val_neg_preds, 'val_neg_preds')

    val_preds = test_on_data(val_images)
    save_preds(val_preds, 'val_preds')

    train_neg_preds = test_on_data(train_neg_images)
    save_preds(train_neg_preds, 'train_neg_preds')

    train_preds = test_on_data(train_images)
    save_preds(train_preds, 'train_preds')
else:
    val_neg_preds = load_preds('val_neg_preds')
    val_preds = load_preds('val_preds')
    train_neg_preds = load_preds('train_neg_preds')
    train_preds = load_preds('train_preds')

In [ ]:
val_neg_gt = np.array(val_neg_h5['poselet']).astype('float32')
val_gt = np.array(val_h5['poselet']).astype('float32')
train_neg_gt = np.array(train_neg_h5['poselet']).astype('float32')
train_gt = np.array(train_h5['poselet']).astype('float32')

In [ ]:
print(val_neg_gt.shape, val_gt.shape)
all_val_preds = np.concatenate((val_preds, val_neg_preds))
all_train_preds = np.concatenate((train_preds, train_neg_preds))
all_val_gt = np.concatenate((val_gt, val_neg_gt))
all_train_gt = np.concatenate((train_gt, train_neg_gt))

In [ ]:
target_names=('bg', 'shols', 'luarm', 'lmarm', 'llarm', 'lhand', 'ruarm', 'rmarm', 'rlarm', 'rhand')

# From http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(target_names))
    plt.xticks(tick_marks, target_names, rotation=45)
    plt.yticks(tick_marks, target_names)
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
def eval_sp(gt_bp_dist, pred_bp_dist):
    gt_bp_labels = argmax_preds(gt_bp_dist)
    pred_bp_labels = argmax_preds(pred_bp_dist)
    print('Biposelet accuracy: %.4f' % skmetrics.accuracy_score(gt_bp_labels, pred_bp_labels))
    
    entropies = entropy(pred_bp_dist.T, base=2)
    # Make sure that we did that over the right axis
    assert entropies.ndim == 1 and len(entropies) == pred_bp_dist.shape[0]
    print(u'Prediction entropy: %.2f bits (+- %.2f)' % (entropies.mean(), entropies.std()))
    print('Uniform entropy would be %.2f bits' % entropy(np.ones((pred_bp_dist.shape[1],)) / pred_bp_dist.shape[1], base=2))

def vis_label_skew(label_mat, title='Label skew'):
    max_inds = argmax_preds(label_mat)
    _, counts = np.unique(max_inds, return_counts=True)
    plt.hist(np.log(counts+1), 20)
    plt.xlabel('log(#instances+1)')
    plt.ylabel('Classes with that #instances')
    plt.title(title)
    for i in range(11): print('#classes with <=%i instances: %i' % (i, (counts <= i).sum()))
    
def evaluate_preds(preds, gts):
    # Visualise skew in ground truth and predicted labels
    vis_label_skew(preds, title='Predicted label skew')
    plt.show()
    vis_label_skew(gts, title='True label skew')
    plt.show()
    
    # Visualise subpose classification accuracy
    sp_marginal_gt, bp_dist_gt = split_preds(gts)
    sp_marginal_pred, bp_dist_pred = split_preds(preds)
    sp_label_gt = argmax_preds(sp_marginal_gt)
    sp_label_pred = argmax_preds(sp_marginal_pred)
    matrix = skmetrics.confusion_matrix(sp_label_gt, sp_label_pred)
    plot_confusion_matrix(matrix, title='Subpose confusion matrix')
    plt.show()
    
    print('\n\nSubpose classification report:')
    print(skmetrics.classification_report(sp_label_gt, sp_label_pred, target_names=target_names))
    
    # Examine biposelet classification accuracy
    num_sp = len(bp_dist_gt)
    for sp_idx in range(num_sp):
        print('\n\nWorking on subpose %i/%i (%s)' % (sp_idx+1, num_sp, target_names[sp_idx+1]))
        relevant = sp_label_gt == (sp_idx + 1)
        eval_sp(bp_dist_gt[sp_idx][relevant, :], bp_dist_pred[sp_idx][relevant, :])

def split_preds(preds, num_bp=100):
    # Split the preds according to both subpose class (+ background)
    # vs. biposelet class
    assert preds.ndim == 2
    num_sp = int((preds.shape[1] - 1) / num_bp)
    assert preds.shape[1] == num_bp * num_sp + 1
    subpose_preds = np.zeros((preds.shape[0], num_sp + 1))
    biposelet_preds = tuple()
    subpose_preds[:, 0] = preds[:, 0]
    for sp_idx in range(num_sp):
        start_idx = sp_idx * num_bp + 1
        end_idx = start_idx + num_bp
        this_block = preds[:, start_idx:end_idx]
        sums = np.sum(this_block, axis=1)
        subpose_preds[:, sp_idx+1] = sums
        sums[sums <= 0] = 1
        norm_block = this_block / sums.reshape((sums.size, 1))
        biposelet_preds += (norm_block,)
    return subpose_preds, biposelet_preds

def argmax_preds(preds):
    # Just compute argmax of predictions matrix
    assert preds.ndim == 2
    return np.argmax(preds, axis=1)

Interesting stuff starts here


In [ ]:
print('On validation set:')
evaluate_preds(all_val_preds, all_val_gt)
print('\n\n\n\nOn training set:')
evaluate_preds(all_train_preds, all_train_gt)

In [ ]:
# Plot one instance of each subpose just to confirm that they're correct
for sp_idx, sp_name in enumerate(target_names[1:]):
    print('Instance of label %i (purported type %s):' % (sp_idx, sp_name))
    start = 100 * sp_idx + 1
    end = start + 100
    am_gt = argmax_preds(train_gt)
    matching_idxs, = np.nonzero((am_gt >= start) & (am_gt < end))
    to_get = matching_idxs[0]
    assert isinstance(to_get, int)
    im = train_h5['images'][to_get, 0:3, :, :].transpose((1, 2, 0))
    plt.imshow(im)
    plt.show()

In [ ]: