In [17]:
%pylab inline

import os
import cv2
import config
from datasets import lfw
from tools import *
from stats import *
from learning.pca import *
from cpp_wrapper.descriptors import *


Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['pylab', 'draw_if_interactive', 'random']
`%matplotlib` prevents importing * from pylab and numpy

In [5]:
training_set, _ = lfw.loadDevData("train")
test_set, test_set_ground_truth = lfw.loadDevData("test")
descs_id_for_lda, y = lfw.loadTrainingDataLabels(training_set, min_nb_samples_per_class=10)


def reindex(indexes, ground_truth_mapping):
    result_mapping = []
    for mapping in ground_truth_mapping:
        new_mapping = {}
        for k in mapping.keys():
            l = mapping[k]
            new_mapping[indexes.index(k)] = []
            for e in l:
                new_mapping[indexes.index(k)].append(indexes.index(e))
        result_mapping.append(new_mapping)
    return tuple(result_mapping)

test_set_ground_truth = reindex(test_set, test_set_ground_truth)


descriptors = np.load(os.path.join(config.descriptors_path, "ulbp_not_normalized_lfwa.npy"))

In [6]:
from scipy.sparse import lil_matrix
from sklearn.decomposition import PCA
from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection
from sklearn.lda import LDA


class AnalysisFramework:
    def __init__(self, ground_truth):
        self.descriptors = None
        self.compressed_descriptors = None
        self.ground_truth = ground_truth
        
    
    def computeStats(self, descriptors, distance=cosineDistance):
        scores = computeDistanceMatrix(descriptors, self.ground_truth, distance)
        acc = computeAccuracy(scores)
        roc = computeROC(scores)
        return acc, roc
    
    
    def displayStats(self, labels, accs, rocs):
        for label, acc in zip(labels, accs):
            print "%s: %0.4f"%(label, acc)

        plotROC(rocs, labels, "roc")
    
    
    def computeDescriptors(self, descriptor_func, data):
        if self.descriptors is not None:
            del self.descriptors
            self.descriptors = None
            
        self.descriptors = descriptor_func(data)
        return self.computeStats(self.descriptors)
    
    
    def compressDescriptors(self, method="pca", dim=500, training_data=None):
        if self.compressed_descriptors is not None:
            del self.compressed_descriptors
            self.compressed_descriptors = None
        
        if method == "pca":
            self.compression = computeProbabilisticPCA(training_data, dim=dim)
            whitening_ = np.diag(np.power(self.compression.explained_variance_, -0.5))
        elif method == "rp":
            X_ = lil_matrix((len(self.descriptors), self.descriptors.shape[1]))
            self.compression = GaussianRandomProjection(n_components=dim)
            self.compression.fit(X_)
            whitening_ = np.eye(dim)
        elif method == "srp":
            X_ = lil_matrix((len(self.descriptors), self.descriptors.shape[1]))
            self.compression = SparseRandomProjection(n_components=dim, dense_output=True)
            self.compression.fit(X_)
            whitening_ = np.eye(dim)
        else:
            raise Exception("Compression method unknown")
        
        self.compressed_descriptors = self.compression.transform(self.descriptors)
        
        descriptors_ = np.copy(self.compressed_descriptors)
        for i in xrange(len(descriptors_)):
            descriptors_[i] = np.dot(whitening_, descriptors_[i])
        return self.computeStats(descriptors_)
    
    
    def supervisedLearning(self, method="lda", dim=50, training_data=None):
        if method == "lda":
            self.supervised_learning = LDA(dim)
            self.supervised_learning.fit(training_data[0], training_data[1])
        elif method == "joint_bayesian":
            self.supervised_learning = JointBayesian()
            self.supervised_learning.fit(training_data[0], training_data[1])
        else:
            raise Exception("Supervised learning method unknown")
            
        descriptors_ = self.supervised_learning.transform(self.compressed_descriptors)
        return self.computeStats(descriptors_)

In [7]:
analysis = AnalysisFramework(test_set_ground_truth)

random_subset = random.sample(training_set, 2000)
supervised_learning_subset = descs_id_for_lda

labels = []
accs = []
rocs = []

labels.append("ULBP original descriptor")
analysis.descriptors = descriptors[test_set]
acc, roc = analysis.computeStats(descriptors[test_set])
accs.append(acc)
rocs.append(roc)

labels.append("ULBP + WPCA")
acc, roc = analysis.compressDescriptors(method="pca", dim=200, training_data=descriptors[random_subset])
supervised_learning_training_data = analysis.compression.transform(descriptors[training_set][supervised_learning_subset])
accs.append(acc)
rocs.append(roc)

labels.append("ULBP + PCA + LDA")
acc, roc = analysis.supervisedLearning(method="lda", dim=50, training_data=(supervised_learning_training_data, y))
accs.append(acc)
rocs.append(roc)

In [8]:
analysis.displayStats(labels, accs, rocs)


ULBP original descriptor: 0.7110
ULBP + WPCA: 0.7770
ULBP + PCA + LDA: 0.8170

In [14]:
from learning.joint_bayesian import *

In [11]:
print "Starting Joint Bayesian..."
sys.stdout.flush()

joint_bayesian = JointBayesian()
joint_bayesian.fit(supervised_learning_training_data, y)


Starting Joint Bayesian...
Finished iteration #1 with error reduction: 217.379351 834.424542
Finished iteration #2 with error reduction: 51.406102 15.723811
Finished iteration #3 with error reduction: 24.591806 13.907304
Finished iteration #4 with error reduction: 14.551673 11.361848
Finished iteration #5 with error reduction: 9.872998 8.981031
Finished iteration #6 with error reduction: 7.457357 7.237352
Finished iteration #7 with error reduction: 6.062262 6.039721
Finished iteration #8 with error reduction: 5.138910 5.293729
Finished iteration #9 with error reduction: 4.519652 4.505311
Finished iteration #10 with error reduction: 3.987041 4.324048
Finished iteration #11 with error reduction: 3.190325 4.775307
Finished iteration #12 with error reduction: 2.366270 4.418294
Finished iteration #13 with error reduction: 1.974082 3.354190
Finished iteration #14 with error reduction: 1.834167 2.963023
Finished iteration #15 with error reduction: 1.771914 2.864526
Finished iteration #16 with error reduction: 1.868400 2.032806
Finished iteration #17 with error reduction: 1.982533 1.943115
Finished iteration #18 with error reduction: 1.954239 2.499514
Finished iteration #19 with error reduction: 1.623969 2.948244
Finished iteration #20 with error reduction: 0.970848 2.681647
Finished iteration #21 with error reduction: 0.466461 1.399972
Finished iteration #22 with error reduction: 0.334239 0.282680
Finished iteration #23 with error reduction: 0.317318 0.153975
Finished iteration #24 with error reduction: 0.325094 0.166700

In [12]:
scores = computeDistanceMatrix(analysis.compressed_descriptors, test_set_ground_truth, distance=joint_bayesian.mesureDistance)
_ = hist(scores[0], bins=80, histtype="step", color="g")
_ = hist(scores[1], bins=80, histtype="step", color="r")
roc = computeROC(scores)
acc = computeAccuracy(scores)
labels.append("ULBP + PCA + Joint Bayesian")
rocs.append(roc)
accs.append(acc)



In [15]:
transformed_descs = joint_bayesian.transform(analysis.compressed_descriptors)
transformed_scores = computeDistanceMatrix(transformed_descs, test_set_ground_truth, distance=jointBayesianDistance)
roc = computeROC(transformed_scores)
acc = computeAccuracy(transformed_scores)
labels.append("ULBP + PCA + Joint Bayesian (transformed)")
rocs.append(roc)
accs.append(acc)

In [10]:
from lda import loadLDA

pca = Pca(filename="PCA/ulbp_wlfdb_PCA_200_dim.txt")
joint_bayesian = loadLDA("JB/wlfdb_JB_200_dim.txt")
compressed_descs = []
for desc in analysis.descriptors:
    compressed_descs.append(pca.project(desc))
jb_descs_200 = joint_bayesian.transform(np.asarray(compressed_descs))

In [11]:
jb_descs_200_scores = computeDistanceMatrix(jb_descs_200, test_set_ground_truth, distance=joint_bayesian.mesureTransformedDistance)
_ = hist(jb_descs_200_scores[0], bins=80, histtype="step", color="g")
_ = hist(jb_descs_200_scores[1], bins=80, histtype="step", color="r")
roc = computeROC(jb_descs_200_scores)
acc = computeAccuracy(jb_descs_200_scores)
labels.append("ULBP + PCA + Joint Bayesian 200 (WLFDB)")
rocs.append(roc)
accs.append(acc)



In [12]:
pca = Pca(filename="PCA/ulbp_wlfdb_PCA_2000_dim.txt")
joint_bayesian = loadLDA("JB/wlfdb_JB_2000_dim.txt")
compressed_descs = []
for desc in analysis.descriptors:
    compressed_descs.append(pca.project(desc))
jb_descs_2000 = joint_bayesian.transform(np.asarray(compressed_descs))

In [13]:
jb_descs_2000_scores = computeDistanceMatrix(jb_descs_2000, test_set_ground_truth, distance=joint_bayesian.mesureTransformedDistance)
_ = hist(jb_descs_2000_scores[0], bins=80, histtype="step", color="g")
_ = hist(jb_descs_2000_scores[1], bins=80, histtype="step", color="r")
roc = computeROC(jb_descs_2000_scores)
acc = computeAccuracy(jb_descs_2000_scores)
labels.append("ULBP + PCA + Joint Bayesian 2000 (WLFDB)")
rocs.append(roc)
accs.append(acc)



In [16]:
analysis.displayStats(labels, accs, rocs)


ULBP original descriptor: 0.7110
ULBP + WPCA: 0.7770
ULBP + PCA + LDA: 0.8170
ULBP + PCA + Joint Bayesian: 0.8140
ULBP + PCA + Joint Bayesian (transformed): 0.8140

In [15]:
plot(np.linalg.svd(joint_bayesian.S_mu)[1])


Out[15]:
[<matplotlib.lines.Line2D at 0x3f2efd0>]

In [16]:
cumsum = np.cumsum(np.linalg.svd(joint_bayesian.S_mu)[1]) / np.sum(np.linalg.svd(joint_bayesian.S_mu)[1])
plot(cumsum)


Out[16]:
[<matplotlib.lines.Line2D at 0x3e8ff50>]

In [17]:
print len(cumsum), np.sum(cumsum < 1)


2000 346

In [18]:
cumsum = np.cumsum(np.linalg.svd(joint_bayesian.S_eps)[1]) / np.sum(np.linalg.svd(joint_bayesian.S_eps)[1])
plot(cumsum)


Out[18]:
[<matplotlib.lines.Line2D at 0x3eddf90>]

In [19]:
W = joint_bayesian._computeCovarianceMatrices(analysis.supervised_learning.transform(supervised_learning_training_data), y)[1]

In [20]:
wccn_descriptors = np.asarray([np.dot(np.linalg.inv(W), analysis.supervised_learning.transform(desc).ravel()) for desc in analysis.compressed_descriptors])

In [21]:
scores = computeDistanceMatrix(wccn_descriptors, test_set_ground_truth)
positive, _, _ = hist(scores[0], range=(-0.6, 1.0), bins=80, histtype="step", color="g")
negative, _, _ = hist(scores[1], range=(-0.6, 1.0), bins=80, histtype="step", color="r")
print np.sum(np.min([(positive / np.sum(positive)), (negative / np.sum(negative))], axis=0))*100
roc = computeROC(scores)
acc = computeAccuracy(scores)
labels.append("ULBP + PCA + LDA + WCCN")
rocs.append(roc)
accs.append(acc)


35.6

In [22]:
analysis.displayStats(labels, accs, rocs)


ULBP original descriptor: 0.7110
ULBP + WPCA: 0.7740
ULBP + PCA + LDA: 0.8270
ULBP + PCA + Joint Bayesian: 0.8270
ULBP + PCA + Joint Bayesian (transformed): 0.8270
ULBP + PCA + Joint Bayesian 200 (WLFDB): 0.7960
ULBP + PCA + Joint Bayesian 2000 (WLFDB): 0.7870
ULBP + PCA + LDA + WCCN: 0.8160

In [22]: