In [17]:
%pylab inline
import os
import cv2
import config
from datasets import lfw
from tools import *
from stats import *
from learning.pca import *
from cpp_wrapper.descriptors import *
In [5]:
training_set, _ = lfw.loadDevData("train")
test_set, test_set_ground_truth = lfw.loadDevData("test")
descs_id_for_lda, y = lfw.loadTrainingDataLabels(training_set, min_nb_samples_per_class=10)
def reindex(indexes, ground_truth_mapping):
result_mapping = []
for mapping in ground_truth_mapping:
new_mapping = {}
for k in mapping.keys():
l = mapping[k]
new_mapping[indexes.index(k)] = []
for e in l:
new_mapping[indexes.index(k)].append(indexes.index(e))
result_mapping.append(new_mapping)
return tuple(result_mapping)
test_set_ground_truth = reindex(test_set, test_set_ground_truth)
descriptors = np.load(os.path.join(config.descriptors_path, "ulbp_not_normalized_lfwa.npy"))
In [6]:
from scipy.sparse import lil_matrix
from sklearn.decomposition import PCA
from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection
from sklearn.lda import LDA
class AnalysisFramework:
def __init__(self, ground_truth):
self.descriptors = None
self.compressed_descriptors = None
self.ground_truth = ground_truth
def computeStats(self, descriptors, distance=cosineDistance):
scores = computeDistanceMatrix(descriptors, self.ground_truth, distance)
acc = computeAccuracy(scores)
roc = computeROC(scores)
return acc, roc
def displayStats(self, labels, accs, rocs):
for label, acc in zip(labels, accs):
print "%s: %0.4f"%(label, acc)
plotROC(rocs, labels, "roc")
def computeDescriptors(self, descriptor_func, data):
if self.descriptors is not None:
del self.descriptors
self.descriptors = None
self.descriptors = descriptor_func(data)
return self.computeStats(self.descriptors)
def compressDescriptors(self, method="pca", dim=500, training_data=None):
if self.compressed_descriptors is not None:
del self.compressed_descriptors
self.compressed_descriptors = None
if method == "pca":
self.compression = computeProbabilisticPCA(training_data, dim=dim)
whitening_ = np.diag(np.power(self.compression.explained_variance_, -0.5))
elif method == "rp":
X_ = lil_matrix((len(self.descriptors), self.descriptors.shape[1]))
self.compression = GaussianRandomProjection(n_components=dim)
self.compression.fit(X_)
whitening_ = np.eye(dim)
elif method == "srp":
X_ = lil_matrix((len(self.descriptors), self.descriptors.shape[1]))
self.compression = SparseRandomProjection(n_components=dim, dense_output=True)
self.compression.fit(X_)
whitening_ = np.eye(dim)
else:
raise Exception("Compression method unknown")
self.compressed_descriptors = self.compression.transform(self.descriptors)
descriptors_ = np.copy(self.compressed_descriptors)
for i in xrange(len(descriptors_)):
descriptors_[i] = np.dot(whitening_, descriptors_[i])
return self.computeStats(descriptors_)
def supervisedLearning(self, method="lda", dim=50, training_data=None):
if method == "lda":
self.supervised_learning = LDA(dim)
self.supervised_learning.fit(training_data[0], training_data[1])
elif method == "joint_bayesian":
self.supervised_learning = JointBayesian()
self.supervised_learning.fit(training_data[0], training_data[1])
else:
raise Exception("Supervised learning method unknown")
descriptors_ = self.supervised_learning.transform(self.compressed_descriptors)
return self.computeStats(descriptors_)
In [7]:
analysis = AnalysisFramework(test_set_ground_truth)
random_subset = random.sample(training_set, 2000)
supervised_learning_subset = descs_id_for_lda
labels = []
accs = []
rocs = []
labels.append("ULBP original descriptor")
analysis.descriptors = descriptors[test_set]
acc, roc = analysis.computeStats(descriptors[test_set])
accs.append(acc)
rocs.append(roc)
labels.append("ULBP + WPCA")
acc, roc = analysis.compressDescriptors(method="pca", dim=200, training_data=descriptors[random_subset])
supervised_learning_training_data = analysis.compression.transform(descriptors[training_set][supervised_learning_subset])
accs.append(acc)
rocs.append(roc)
labels.append("ULBP + PCA + LDA")
acc, roc = analysis.supervisedLearning(method="lda", dim=50, training_data=(supervised_learning_training_data, y))
accs.append(acc)
rocs.append(roc)
In [8]:
analysis.displayStats(labels, accs, rocs)
In [14]:
from learning.joint_bayesian import *
In [11]:
print "Starting Joint Bayesian..."
sys.stdout.flush()
joint_bayesian = JointBayesian()
joint_bayesian.fit(supervised_learning_training_data, y)
In [12]:
scores = computeDistanceMatrix(analysis.compressed_descriptors, test_set_ground_truth, distance=joint_bayesian.mesureDistance)
_ = hist(scores[0], bins=80, histtype="step", color="g")
_ = hist(scores[1], bins=80, histtype="step", color="r")
roc = computeROC(scores)
acc = computeAccuracy(scores)
labels.append("ULBP + PCA + Joint Bayesian")
rocs.append(roc)
accs.append(acc)
In [15]:
transformed_descs = joint_bayesian.transform(analysis.compressed_descriptors)
transformed_scores = computeDistanceMatrix(transformed_descs, test_set_ground_truth, distance=jointBayesianDistance)
roc = computeROC(transformed_scores)
acc = computeAccuracy(transformed_scores)
labels.append("ULBP + PCA + Joint Bayesian (transformed)")
rocs.append(roc)
accs.append(acc)
In [10]:
from lda import loadLDA
pca = Pca(filename="PCA/ulbp_wlfdb_PCA_200_dim.txt")
joint_bayesian = loadLDA("JB/wlfdb_JB_200_dim.txt")
compressed_descs = []
for desc in analysis.descriptors:
compressed_descs.append(pca.project(desc))
jb_descs_200 = joint_bayesian.transform(np.asarray(compressed_descs))
In [11]:
jb_descs_200_scores = computeDistanceMatrix(jb_descs_200, test_set_ground_truth, distance=joint_bayesian.mesureTransformedDistance)
_ = hist(jb_descs_200_scores[0], bins=80, histtype="step", color="g")
_ = hist(jb_descs_200_scores[1], bins=80, histtype="step", color="r")
roc = computeROC(jb_descs_200_scores)
acc = computeAccuracy(jb_descs_200_scores)
labels.append("ULBP + PCA + Joint Bayesian 200 (WLFDB)")
rocs.append(roc)
accs.append(acc)
In [12]:
pca = Pca(filename="PCA/ulbp_wlfdb_PCA_2000_dim.txt")
joint_bayesian = loadLDA("JB/wlfdb_JB_2000_dim.txt")
compressed_descs = []
for desc in analysis.descriptors:
compressed_descs.append(pca.project(desc))
jb_descs_2000 = joint_bayesian.transform(np.asarray(compressed_descs))
In [13]:
jb_descs_2000_scores = computeDistanceMatrix(jb_descs_2000, test_set_ground_truth, distance=joint_bayesian.mesureTransformedDistance)
_ = hist(jb_descs_2000_scores[0], bins=80, histtype="step", color="g")
_ = hist(jb_descs_2000_scores[1], bins=80, histtype="step", color="r")
roc = computeROC(jb_descs_2000_scores)
acc = computeAccuracy(jb_descs_2000_scores)
labels.append("ULBP + PCA + Joint Bayesian 2000 (WLFDB)")
rocs.append(roc)
accs.append(acc)
In [16]:
analysis.displayStats(labels, accs, rocs)
In [15]:
plot(np.linalg.svd(joint_bayesian.S_mu)[1])
Out[15]:
In [16]:
cumsum = np.cumsum(np.linalg.svd(joint_bayesian.S_mu)[1]) / np.sum(np.linalg.svd(joint_bayesian.S_mu)[1])
plot(cumsum)
Out[16]:
In [17]:
print len(cumsum), np.sum(cumsum < 1)
In [18]:
cumsum = np.cumsum(np.linalg.svd(joint_bayesian.S_eps)[1]) / np.sum(np.linalg.svd(joint_bayesian.S_eps)[1])
plot(cumsum)
Out[18]:
In [19]:
W = joint_bayesian._computeCovarianceMatrices(analysis.supervised_learning.transform(supervised_learning_training_data), y)[1]
In [20]:
wccn_descriptors = np.asarray([np.dot(np.linalg.inv(W), analysis.supervised_learning.transform(desc).ravel()) for desc in analysis.compressed_descriptors])
In [21]:
scores = computeDistanceMatrix(wccn_descriptors, test_set_ground_truth)
positive, _, _ = hist(scores[0], range=(-0.6, 1.0), bins=80, histtype="step", color="g")
negative, _, _ = hist(scores[1], range=(-0.6, 1.0), bins=80, histtype="step", color="r")
print np.sum(np.min([(positive / np.sum(positive)), (negative / np.sum(negative))], axis=0))*100
roc = computeROC(scores)
acc = computeAccuracy(scores)
labels.append("ULBP + PCA + LDA + WCCN")
rocs.append(roc)
accs.append(acc)
In [22]:
analysis.displayStats(labels, accs, rocs)
In [22]: