In [5]:
%pylab inline
%load_ext ipycache
import cv2, random
from dataset import *
from stats import *
from utils.parallelize import parallelize
from learning.gmm import GMM
from fisher_vector import *
from utils.file_manager import *
from PythonWrapper.descriptors import *
from sklearn.decomposition import PCA
In [6]:
training_set, _ = loadDevData("train")
test_set, test_set_ground_truth = loadDevData("test")
data = np.load("../lfw/lfwa.npy")
training_data = data[training_set]
test_data = data[test_set]
print len(training_set), len(test_set)
supervised_learning_indexes, y = loadTrainingDataLabels(training_set, min_nb_samples_per_class=20)
print len(set(y)), len(supervised_learning_indexes)
def reindex(indexes, ground_truth_mapping):
result_mapping = []
for mapping in ground_truth_mapping:
new_mapping = {}
for k in mapping.keys():
l = mapping[k]
new_mapping[indexes.index(k)] = []
for e in l:
new_mapping[indexes.index(k)].append(indexes.index(e))
result_mapping.append(new_mapping)
return tuple(result_mapping)
test_set_ground_truth = reindex(test_set, test_set_ground_truth)
In [3]:
%timeit computeDenseDescriptor(training_data[0])
computeDenseDescriptor(training_data[0]).shape
Out[3]:
In [4]:
%%cache -d cache/fisher_vectors pca.pkl pca
var_cumsum = np.cumsum(pca.explained_variance_ratio_)
plot(var_cumsum)
xlim(xmax=59)
ylim(ymax=1.0)
print np.sum(var_cumsum < 0.95)
print np.sum(var_cumsum < 0.98)
print var_cumsum[20-1]
In [5]:
pca.n_components_ = 20
pca.components_ = pca.components_[:20]
In [6]:
%timeit computeDenseDescriptor(training_data[0], pca=pca)
computeDenseDescriptor(training_data[0], pca=pca).shape
Out[6]:
In [3]:
"""
%%cache -d cache/fisher_vectors -f gmm.pkl gmm
gmm = GMM(n_components=256, n_threads=16)
gmm.fit(gmm_descs_subset)
"""
Out[3]:
In [3]:
#fisher_vector = pickleLoad('../models/fisher_vector_ulbp_descriptor.pkl')
#fisher_vector = pickleLoad('../fisher_vector.pkl')
fisher_vector = pickleLoad('../fisher_vector_learning.pkl')
#fisher_vector.gmm.initYaelGmm()
In [8]:
%timeit fisher_vector.gmm.computeResponsabilities(computeDenseDescriptor(training_data[0], pca=fisher_vector.pca, embed_spatial_information=False))
In [10]:
gmm_test_image = np.copy(training_data[1])
def PEP(image, pca, gmm):
patches = computeDenseDescriptor(image, pca=pca, embed_spatial_information=False)
responsabilities = gmm.computeResponsabilities(patches)
argmaxes = np.argmax(responsabilities, axis=0)
return argmaxes
#return zip(np.argsort(np.diag(responsabilities[argmaxes]))[-20:][::-1], argmaxes[np.argsort(np.diag(responsabilities[argmaxes]))[-20:][::-1]])
centers = np.argsort(fisher_vector.gmm.weights_)[-20:][::-1]
argmaxes = PEP(gmm_test_image, fisher_vector.pca, fisher_vector.gmm)
print argmaxes
gmm_test_image = np.copy(gmm_test_image)
for k, index in enumerate(argmaxes[centers]):
#for k, index in argmaxes:
cell_size, step = (24, 2)
img = gmm_test_image[45:205,63:187]
height, width = img.shape
scale = 1
n_cells_x = (width-cell_size)/step+1
pt1 = np.array([(index%n_cells_x)*step, (index/n_cells_x)*step])
pt2 = pt1 + (np.array([cell_size, cell_size]) * (1.41**(scale-1))).astype(np.int)
if pt1[1] > height or pt2[1] > height:
continue
cv2.rectangle(img, tuple(pt1), tuple(pt2), (255, 255, 255))
imshow(img)
gray()
In [13]:
def computePEP(image, pca, gmm):
patches = computeDenseDescriptor(image, pca=pca, embed_spatial_information=False)
responsabilities = gmm.computeResponsabilities(patches)
return patches[np.argmax(responsabilities, axis=0)].ravel()
def computeEigenPEP(image, pca, gmm, pep_pca):
descriptor = computePEP(image, pca, gmm)
return pep_pca.transform(descriptor).ravel()
In [14]:
%timeit computeEigenPEP(training_data[0], fisher_vector.pca, fisher_vector.gmm, pep_pca)
In [4]:
#%%cache -d cache/fisher_vectors pep_pca.pkl pep_pca
random_indexes = random.sample(range(len(training_data)), 1000)
descriptors = map(lambda image: computePEP(image, fisher_vector.pca, fisher_vector.gmm), training_data[random_indexes])
pep_pca = PCA(copy=False)
pep_pca.fit(descriptors)
In [7]:
plot(np.cumsum(pep_pca.explained_variance_ratio_))
_ = ylim(ymax=1.0)
In [8]:
pep_pca.n_components_ = 400
pep_pca.components_ = pep_pca.components_[:400]
pep_pca.whiten_ = True
In [9]:
%timeit computeEigenPEP(training_data[0], fisher_vector.pca, fisher_vector.gmm, pep_pca)
In [10]:
eigenpep_descriptors = map(lambda image: computeEigenPEP(image, fisher_vector.pca, fisher_vector.gmm, pep_pca), test_data)
scores = computeDistanceMatrix(eigenpep_descriptors, test_set_ground_truth, cosineDistance)
acc = computeAccuracy(scores)
roc = computeROC(scores)
plotROC([roc], ["Eigen PEP"])
In [4]:
def computeFisherVector(patches, gmm, improved=True):
K = gmm.n_components
d = patches[0].shape[0]
N = len(patches)
vector = np.empty((2*K, d), dtype=np.float32)
soft_assignments = gmm.computeResponsabilities(patches)
squared_patches = patches ** 2
for k in range(K):
S_0 = soft_assignments[:,k].mean()
S_1 = (soft_assignments[:,k,np.newaxis] * patches).mean(axis=0)
S_2 = (soft_assignments[:,k,np.newaxis] * squared_patches).mean(axis=0)
vector[2*k] = (S_1 - gmm.means_[k]*S_0) / (np.sqrt(gmm.weights_[k] * gmm.covars_[k]))
vector[2*k+1] = (S_2 - 2*gmm.means_[k]*S_1 + (gmm.means_[k]**2-gmm.covars_[k]**2)*S_0) / (np.sqrt(2*gmm.weights_[k]) * gmm.covars_[k])
vector = vector.ravel()
if improved:
# Signed square-rooting
vector = np.sign(vector) * np.sqrt(np.abs(vector))
# L2 normalization
vector /= np.linalg.norm(vector)
return vector
In [5]:
fisher_vector.computeFisherVector(computeDenseDescriptor(training_data[0], pca=fisher_vector.pca, embed_spatial_information=False), fisher_vector.gmm)
Out[5]:
In [6]:
patches = computeDenseDescriptor(training_data[0], pca=fisher_vector.pca, embed_spatial_information=False)
%timeit fisher_vector.computeFisherVector(patches, fisher_vector.gmm)
In [7]:
patches = computeDenseDescriptor(training_data[0], pca=fisher_vector.pca)
fv = fisher_vector.yaelFV(patches, improved=True)
np.linalg.norm(fv)
Out[7]:
In [8]:
patches = computeDenseDescriptor(training_data[0], pca=fisher_vector.pca)
%timeit fisher_vector.yaelFV(patches)
In [9]:
supervised_learning_data = training_data[supervised_learning_indexes]
In [10]:
def computeCompleteFisherVector(image, fisher_vector, embed_spatial_information=False):
patches = computeDenseDescriptor(image, pca=fisher_vector.pca, embed_spatial_information=embed_spatial_information)
#return fisher_vector.computeFisherVector(patches)
return fisher_vector.yaelFV(patches)
In [5]:
%%cache -d cache/fisher_vectors training_fisher_vectors.pkl training_fisher_vectors
def parallelizedCompleteFisherVector(data, output, i, fisher_vector):
output[i] = computeCompleteFisherVector(data[i], fisher_vector)
n_samples = supervised_learning_data.shape[0]
n_features = fisher_vector.gmm.n_components * 2 * 20
training_fisher_vectors_filename = "cache/fisher_vectors/training_fisher_vectors.mmap"
#training_fisher_vectors = parallelize(parallelizedCompleteFisherVector, supervised_learning_data, (n_samples, n_features), np.float32, args=[fisher_vector], n_jobs=8, output_file=training_fisher_vectors_filename)
training_fisher_vectors = np.empty((n_samples, n_features), dtype=np.float32)
for i, image in enumerate(supervised_learning_data):
training_fisher_vectors[i] = computeCompleteFisherVector(image, fisher_vector)
In [5]:
print training_fisher_vectors.shape
In [6]:
labels = list(set(y))
sampled_labels = random.sample(labels, 20)
sampled_fv = np.empty((0, training_fisher_vectors.shape[1]), dtype=np.float32)
sampled_y = np.empty((0), dtype=np.int8)
for label in sampled_labels:
subset = random.sample(np.where(y==label)[0], 10)
sampled_fv = np.append(sampled_fv, training_fisher_vectors[subset], axis=0)
sampled_y = np.append(sampled_y, y[subset], axis=0)
print sampled_fv.shape, sampled_y.shape
In [7]:
del training_fisher_vectors
In [8]:
%%cache -d cache/fisher_vectors -f diagonal_metric.pkl diagonal_metric
from learning.mahalanobis_metric import DiagonalMahalanobisMetric
diagonal_metric = DiagonalMahalanobisMetric()
#diagonal_metric.fit(training_fisher_vectors, y, n_samples=int(1e4))
"""
random_sampling = random.sample(range(len(training_fisher_vectors)), 200)
diagonal_metric.fit(training_fisher_vectors[random_sampling], y[random_sampling])
"""
diagonal_metric.fit(sampled_fv, sampled_y)
In [8]:
print diagonal_metric.W_, diagonal_metric.b_
In [11]:
%%cache -d cache/fisher_vectors -f test_fisher_vectors.pkl test_fisher_vectors
def parallelizedCompleteFisherVector(data, output, i, fisher_vector):
output[i] = computeCompleteFisherVector(data[i], fisher_vector)
n_samples = test_data.shape[0]
n_features = fisher_vector.gmm.n_components * 2 * 20
training_fisher_vectors_filename = "cache/fisher_vectors/test_data.mmap"
test_fisher_vectors = np.empty((n_samples, n_features), dtype=np.float32)
for i, image in enumerate(test_data):
test_fisher_vectors[i] = computeCompleteFisherVector(image, fisher_vector)
#test_fisher_vectors = parallelize(parallelizedCompleteFisherVector, test_data, (n_samples, n_features), np.float32, args=[pca, gmm], n_jobs=8, output_file=test_fisher_vectors_filename)
In [14]:
scores = computeDistanceMatrix(test_fisher_vectors, test_set_ground_truth, diagonal_metric.mesureDistance)
acc = computeAccuracy(scores, thresholds=np.linspace(0.00003, 0.00009, num=100))
roc = computeROC(scores, thresholds=np.linspace(0.00003, 0.00009, num=100))
plotROC([roc], ["Fisher vectors (diag metric)"])
print acc
In [11]:
_ = hist(scores[0], bins=80, histtype="step", color="g")
_ = hist(scores[1], bins=80, histtype="step", color="r")
In [6]:
#%%cache -d cache/fisher_vectors large_margin_dimension_reduction.pkl large_margin_dimension_reduction
from learning.large_margin_dimension_reduction import LargeMarginDimensionReduction
large_margin_dimension_reduction = LargeMarginDimensionReduction(n_components=128, n_iter=int(1e3))
large_margin_dimension_reduction.fit(training_fisher_vectors, y)
In [14]:
compressed_data = large_margin_dimension_reduction.transform(test_fisher_vectors)
scores = computeDistanceMatrix(compressed_data, test_set_ground_truth, lambda x,y: -np.inner(x-y, x-y))
acc = computeAccuracy(scores, thresholds=np.linspace(-40000, -10000, num=100))
roc = computeROC(scores, thresholds=np.linspace(-40000, -10000, num=100))
print acc
plotROC([roc], ["Large-margin dimension reduction"])
In [13]:
_ = hist(scores[0], bins=80, histtype="step", color="g")
_ = hist(scores[1], bins=80, histtype="step", color="r")
In [115]:
print large_margin_dimension_reduction.W_.shape, np.asarray(test_descriptors).shape
In [ ]: