notebook.community

Edit and run



In [5]:

    
%pylab inline
%load_ext ipycache
import cv2, random
from dataset import *
from stats import *
from utils.parallelize import parallelize
from learning.gmm import GMM
from fisher_vector import *
from utils.file_manager import *
from PythonWrapper.descriptors import *
from sklearn.decomposition import PCA









    



Populating the interactive namespace from numpy and matplotlib
The ipycache extension is already loaded. To reload it, use:
  %reload_ext ipycache






    



WARNING: pylab import has clobbered these variables: ['pylab', 'random']
`%matplotlib` prevents importing * from pylab and numpy



In [6]:

    
training_set, _ = loadDevData("train")
test_set, test_set_ground_truth = loadDevData("test")

data = np.load("../lfw/lfwa.npy")
training_data = data[training_set]
test_data = data[test_set]
print len(training_set), len(test_set)

supervised_learning_indexes, y = loadTrainingDataLabels(training_set, min_nb_samples_per_class=20)
print len(set(y)), len(supervised_learning_indexes)


def reindex(indexes, ground_truth_mapping):
    result_mapping = []
    for mapping in ground_truth_mapping:
        new_mapping = {}
        for k in mapping.keys():
            l = mapping[k]
            new_mapping[indexes.index(k)] = []
            for e in l:
                new_mapping[indexes.index(k)].append(indexes.index(e))
        result_mapping.append(new_mapping)
    return tuple(result_mapping)

test_set_ground_truth = reindex(test_set, test_set_ground_truth)



In [3]:

    
%timeit computeDenseDescriptor(training_data[0])
computeDenseDescriptor(training_data[0]).shape









    



1 loops, best of 3: 11.3 ms per loop






    Out[3]:





(5712, 59)

Compute LBP-PCA

Used to reduce dimension and to decorrelate variables before GMM estimation



In [4]:

    
%%cache -d cache/fisher_vectors pca.pkl pca
var_cumsum = np.cumsum(pca.explained_variance_ratio_)
plot(var_cumsum)
xlim(xmax=59)
ylim(ymax=1.0)

print np.sum(var_cumsum < 0.95)
print np.sum(var_cumsum < 0.98)
print var_cumsum[20-1]









    



[Skipped the cell's code and loaded variables pca from file '/home/tlorieul/Dev/Snoop/src/lib/Python/notebooks/cache/fisher_vectors/pca.pkl'.]
15
26
0.968107253558



In [5]:

    
pca.n_components_ = 20
pca.components_ = pca.components_[:20]



In [6]:

    
%timeit computeDenseDescriptor(training_data[0], pca=pca)
computeDenseDescriptor(training_data[0], pca=pca).shape









    



100 loops, best of 3: 8.35 ms per loop






    Out[6]:





(5712, 20)

Compute GMM



In [3]:

    
"""
%%cache -d cache/fisher_vectors -f gmm.pkl gmm
gmm = GMM(n_components=256, n_threads=16)
gmm.fit(gmm_descs_subset)
"""









    Out[3]:





'\n%%cache -d cache/fisher_vectors -f gmm.pkl gmm\ngmm = GMM(n_components=256, n_threads=16)\ngmm.fit(gmm_descs_subset)\n'



In [3]:

    
#fisher_vector = pickleLoad('../models/fisher_vector_ulbp_descriptor.pkl')
#fisher_vector = pickleLoad('../fisher_vector.pkl')
fisher_vector = pickleLoad('../fisher_vector_learning.pkl')
#fisher_vector.gmm.initYaelGmm()



In [8]:

    
%timeit fisher_vector.gmm.computeResponsabilities(computeDenseDescriptor(training_data[0], pca=fisher_vector.pca, embed_spatial_information=False))









    



100 loops, best of 3: 18.4 ms per loop



In [10]:

    
gmm_test_image = np.copy(training_data[1])

def PEP(image, pca, gmm):
    patches = computeDenseDescriptor(image, pca=pca, embed_spatial_information=False)
    responsabilities = gmm.computeResponsabilities(patches)
    argmaxes = np.argmax(responsabilities, axis=0)
    return argmaxes
    #return zip(np.argsort(np.diag(responsabilities[argmaxes]))[-20:][::-1], argmaxes[np.argsort(np.diag(responsabilities[argmaxes]))[-20:][::-1]])
    
centers = np.argsort(fisher_vector.gmm.weights_)[-20:][::-1]
argmaxes = PEP(gmm_test_image, fisher_vector.pca, fisher_vector.gmm)
print argmaxes

gmm_test_image = np.copy(gmm_test_image)
for k, index in enumerate(argmaxes[centers]):
#for k, index in argmaxes:
    cell_size, step = (24, 2)
    img = gmm_test_image[45:205,63:187]
    height, width = img.shape
    scale = 1
    n_cells_x = (width-cell_size)/step+1
    pt1 = np.array([(index%n_cells_x)*step, (index/n_cells_x)*step])
    pt2 = pt1 + (np.array([cell_size, cell_size]) * (1.41**(scale-1))).astype(np.int)
    if pt1[1] > height or pt2[1] > height:
        continue
    cv2.rectangle(img, tuple(pt1), tuple(pt2), (255, 255, 255))
    
imshow(img)
gray()









    



[3281 4270  459  649 5570 3281 3438 3529  697 4958  935 2778 2061  638  867
 1471 2365  561 3464 4669 5499 3280  803 3286 3491 3549 1093 2125 4900 2255
 3220 5088 5639 5698 2982 1241  918 5042 2252 3482  640  839 1599 4776 1365
 1682 3255 1043  905  851 2294 5520 1436  101    0 1133    0 3281  234 2142
 1395 5475    0    0 3508 1950 5510 3469 3440 3328  889 3116 4788 2909  863
 2935 1632 1081  496 5707 4402    0  602 3775  570 1290 1341 3910 3966   85
 2025 1846 1346 1703    0 2984 3582 4783 3479 1282 1300 3517    0 1137 5670
  642  970 3481 1479 2939 3196 1849 2024 5511 1620  935 2650  639   77 5247
 1587  256  561    0 2585  792 3491 1199 5510 5521  736 1880 4938 4016  983
 1416 1477 5490  194  918   51 3159 3507 2377 5055    0 4512 1291 3073 1395
 5125  769  692 5582  313 4936 1280 2152 3514   94 1040 3385 1529  978 5077
 1106 2266 2024 4849 3479 1460 5452 2723 5497 5609 5235 1411   27 1023 5512
  408 1080 1393  492 3512 4975 3304  565    0 2696 1614 2519 4105  205 1315
 2200 3476 3484 1099 5670 5133 1032  498 2447 3493 1820  901 5711 2914  935
  809 2810 1296   44  138 1546 3077 2034 3075 5682 5002 2766 3512 2995  483
 1281 1329 1864 3479 1784   84 5679  127 1396  715  885  348 1820 4959 2216
 1030 5175 2223 5173 3281 1223   23 3122    7 4651 5671 5167    0  628 4842
 2397 5013    0 3263  889 1304 2859 1097 1075 1095 1291 3674  937 5708 1642
 3928 1977 1552 1724 3886 4606 3456  789 3337 5660 1743 1094 5660 4229  188
 3924 2063 5561    0 3463  758 5511   30 3378 2355 3415 3187 3455 4014 5141
 3073 3679 3502 5054 1427 5564 1609 5676 3282 2510 3970 3462 4201  356 5709
 3479 3301  120 1921 3540 3259 5436   20 1391 5490  260    0 3459 3491 5135
    0  945 4964 5552 5615  920 5154 3482 3380  554  925 2858 1173 4863 5453
 1091  226 3702 1695 1214 2014   74 3315 2514 2582 1275  102 1284 4822 2003
 5006 5518 3760  285  890 4936  132 4008 3412 3911 3374 1958 3175 5507 4983
 1308  153 2704 5700 1149 1099 3281 1862 3505    0 4735 1262 3881 3518 5213
 2262 3341 2568 3283 4146 1498 3975 4610  777 3213 5507    0 4819    0 4984
 1031 1664 5365  696 4971    0 3521 2430 1809 2265  353   59 4215 5072  883
 3491 3176 3182 1617 1446 3518 5176 2770 1594 3947 1128 5016 2762 5640 2195
 2399 1209 1593  922 1230 1183 2607 2707 4735 5710 2271 2855 4160 2979 4767
 4558 1532 3175  357    0 3281 2617 4190 2728 3486 3809  930 4000 5680   35
 2023 5495 2220 1137 3488 4958 1290 5375    0 5654 1415 5575    0 3281  683
  933   78 3854 3282 3292  206  941 5459  571 1947 3498 3317  978 3944 1817
  142   35 3218 2957 1087 4105 4812 5325 5573  349 1074 1426 2827 1278 4768
 2550 1279]

PEP descriptor



In [13]:

    
def computePEP(image, pca, gmm):
    patches = computeDenseDescriptor(image, pca=pca, embed_spatial_information=False)
    responsabilities = gmm.computeResponsabilities(patches)
    return patches[np.argmax(responsabilities, axis=0)].ravel()

def computeEigenPEP(image, pca, gmm, pep_pca):
    descriptor = computePEP(image, pca, gmm)
    return pep_pca.transform(descriptor).ravel()



In [14]:

    
%timeit computeEigenPEP(training_data[0], fisher_vector.pca, fisher_vector.gmm, pep_pca)









    



10 loops, best of 3: 45.3 ms per loop



In [4]:

    
#%%cache -d cache/fisher_vectors pep_pca.pkl pep_pca
random_indexes = random.sample(range(len(training_data)), 1000)
descriptors = map(lambda image: computePEP(image, fisher_vector.pca, fisher_vector.gmm), training_data[random_indexes])

pep_pca = PCA(copy=False)
pep_pca.fit(descriptors)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-6e63cc6000bb> in <module>()
      1 #%%cache -d cache/fisher_vectors pep_pca.pkl pep_pca
      2 random_indexes = random.sample(range(len(training_data)), 1000)
----> 3 descriptors = map(lambda image: computePEP(image, fisher_vector.pca, fisher_vector.gmm), training_data[random_indexes])
      4 
      5 pep_pca = PCA(copy=False)

<ipython-input-4-6e63cc6000bb> in <lambda>(image)
      1 #%%cache -d cache/fisher_vectors pep_pca.pkl pep_pca
      2 random_indexes = random.sample(range(len(training_data)), 1000)
----> 3 descriptors = map(lambda image: computePEP(image, fisher_vector.pca, fisher_vector.gmm), training_data[random_indexes])
      4 
      5 pep_pca = PCA(copy=False)

NameError: global name 'computePEP' is not defined



In [7]:

    
plot(np.cumsum(pep_pca.explained_variance_ratio_))
_ = ylim(ymax=1.0)



In [8]:

    
pep_pca.n_components_ = 400
pep_pca.components_ = pep_pca.components_[:400]
pep_pca.whiten_ = True



In [9]:

    
%timeit computeEigenPEP(training_data[0], fisher_vector.pca, fisher_vector.gmm, pep_pca)









    



10 loops, best of 3: 43.1 ms per loop



In [10]:

    
eigenpep_descriptors = map(lambda image: computeEigenPEP(image, fisher_vector.pca, fisher_vector.gmm, pep_pca), test_data)
scores = computeDistanceMatrix(eigenpep_descriptors, test_set_ground_truth, cosineDistance)
acc = computeAccuracy(scores)
roc = computeROC(scores)
plotROC([roc], ["Eigen PEP"])

Fisher vector



In [4]:

    
def computeFisherVector(patches, gmm, improved=True):
    K = gmm.n_components
    d = patches[0].shape[0]
    N = len(patches)

    vector = np.empty((2*K, d), dtype=np.float32)
    
    soft_assignments = gmm.computeResponsabilities(patches)
    squared_patches = patches ** 2
    
    for k in range(K):
        S_0 = soft_assignments[:,k].mean()
        S_1 = (soft_assignments[:,k,np.newaxis] * patches).mean(axis=0)
        S_2 = (soft_assignments[:,k,np.newaxis] * squared_patches).mean(axis=0)
        
        vector[2*k] = (S_1 - gmm.means_[k]*S_0) / (np.sqrt(gmm.weights_[k] * gmm.covars_[k]))
        vector[2*k+1] = (S_2 - 2*gmm.means_[k]*S_1 + (gmm.means_[k]**2-gmm.covars_[k]**2)*S_0) / (np.sqrt(2*gmm.weights_[k]) * gmm.covars_[k])
    
    vector = vector.ravel()
    
    if improved:
        # Signed square-rooting
        vector = np.sign(vector) * np.sqrt(np.abs(vector))
        
        # L2 normalization
        vector /= np.linalg.norm(vector)
        
    return vector



In [5]:

    
fisher_vector.computeFisherVector(computeDenseDescriptor(training_data[0], pca=fisher_vector.pca, embed_spatial_information=False), fisher_vector.gmm)









    Out[5]:





array([ -9.96797535e-05,   2.87454779e-04,   2.18010944e-04, ...,
        -1.08816735e-02,  -7.70612899e-03,  -1.11131035e-02], dtype=float32)



In [6]:

    
patches = computeDenseDescriptor(training_data[0], pca=fisher_vector.pca, embed_spatial_information=False)
%timeit fisher_vector.computeFisherVector(patches, fisher_vector.gmm)









    



1 loops, best of 3: 338 ms per loop



In [7]:

    
patches = computeDenseDescriptor(training_data[0], pca=fisher_vector.pca)
fv = fisher_vector.yaelFV(patches, improved=True)
np.linalg.norm(fv)









    Out[7]:





1.0000004



In [8]:

    
patches = computeDenseDescriptor(training_data[0], pca=fisher_vector.pca)
%timeit fisher_vector.yaelFV(patches)









    



10 loops, best of 3: 65.1 ms per loop



In [9]:

    
supervised_learning_data = training_data[supervised_learning_indexes]



In [10]:

    
def computeCompleteFisherVector(image, fisher_vector, embed_spatial_information=False):
    patches = computeDenseDescriptor(image, pca=fisher_vector.pca, embed_spatial_information=embed_spatial_information)
    #return fisher_vector.computeFisherVector(patches)
    return fisher_vector.yaelFV(patches)



In [5]:

    
%%cache -d cache/fisher_vectors training_fisher_vectors.pkl training_fisher_vectors

def parallelizedCompleteFisherVector(data, output, i, fisher_vector):
    output[i] = computeCompleteFisherVector(data[i], fisher_vector)

n_samples = supervised_learning_data.shape[0]
n_features = fisher_vector.gmm.n_components * 2 * 20
training_fisher_vectors_filename = "cache/fisher_vectors/training_fisher_vectors.mmap"
#training_fisher_vectors = parallelize(parallelizedCompleteFisherVector, supervised_learning_data, (n_samples, n_features),  np.float32, args=[fisher_vector], n_jobs=8, output_file=training_fisher_vectors_filename)
training_fisher_vectors = np.empty((n_samples, n_features), dtype=np.float32)
for i, image in enumerate(supervised_learning_data):
    training_fisher_vectors[i] = computeCompleteFisherVector(image, fisher_vector)









    



[Skipped the cell's code and loaded variables training_fisher_vectors from file '/home/tlorieul/Dev/Snoop/src/lib/Python/notebooks/cache/fisher_vectors/training_fisher_vectors.pkl'.]



In [5]:

    
print training_fisher_vectors.shape









    



(3264, 20480)



In [6]:

    
labels = list(set(y))
sampled_labels = random.sample(labels, 20)
sampled_fv = np.empty((0, training_fisher_vectors.shape[1]), dtype=np.float32)
sampled_y = np.empty((0), dtype=np.int8)
for label in sampled_labels:
    subset = random.sample(np.where(y==label)[0], 10)
    sampled_fv = np.append(sampled_fv, training_fisher_vectors[subset], axis=0)
    sampled_y = np.append(sampled_y, y[subset], axis=0)

print sampled_fv.shape, sampled_y.shape









    



(200, 20480) (200,)



In [7]:

    
del training_fisher_vectors



In [8]:

    
%%cache -d cache/fisher_vectors -f diagonal_metric.pkl diagonal_metric

from learning.mahalanobis_metric import DiagonalMahalanobisMetric
diagonal_metric = DiagonalMahalanobisMetric()
#diagonal_metric.fit(training_fisher_vectors, y, n_samples=int(1e4))
"""
random_sampling = random.sample(range(len(training_fisher_vectors)), 200)
diagonal_metric.fit(training_fisher_vectors[random_sampling], y[random_sampling])
"""
diagonal_metric.fit(sampled_fv, sampled_y)









    



[Saved variables diagonal_metric to file '/home/tlorieul/Dev/Snoop/src/lib/Python/notebooks/cache/fisher_vectors/diagonal_metric.pkl'.]
Creating X and y vectors...
Performing SGD...
Finished with score: 0.045226



In [8]:

    
print diagonal_metric.W_, diagonal_metric.b_









    



[  1.74886290e-04   1.76144907e-05   5.77682711e-05 ...,  -3.11440590e-05
  -3.22341805e-05   3.24000081e-05] -0.42892272671



In [11]:

    
%%cache -d cache/fisher_vectors -f test_fisher_vectors.pkl test_fisher_vectors

def parallelizedCompleteFisherVector(data, output, i, fisher_vector):
    output[i] = computeCompleteFisherVector(data[i], fisher_vector)

n_samples = test_data.shape[0]
n_features = fisher_vector.gmm.n_components * 2 * 20
training_fisher_vectors_filename = "cache/fisher_vectors/test_data.mmap"

test_fisher_vectors = np.empty((n_samples, n_features), dtype=np.float32)
for i, image in enumerate(test_data):
    test_fisher_vectors[i] = computeCompleteFisherVector(image, fisher_vector)

#test_fisher_vectors = parallelize(parallelizedCompleteFisherVector, test_data, (n_samples, n_features), np.float32, args=[pca, gmm], n_jobs=8, output_file=test_fisher_vectors_filename)









    



[Saved variables test_fisher_vectors to file '/home/tlorieul/Dev/Snoop/src/lib/Python/notebooks/cache/fisher_vectors/test_fisher_vectors.pkl'.]



In [14]:

    
scores = computeDistanceMatrix(test_fisher_vectors, test_set_ground_truth, diagonal_metric.mesureDistance)
acc = computeAccuracy(scores, thresholds=np.linspace(0.00003, 0.00009, num=100))
roc = computeROC(scores, thresholds=np.linspace(0.00003, 0.00009, num=100))
plotROC([roc], ["Fisher vectors (diag metric)"])
print acc

0.5



In [11]:

    
_ = hist(scores[0], bins=80, histtype="step", color="g")
_ = hist(scores[1], bins=80, histtype="step", color="r")

Large-margin discriminative dimensionality reduction



In [6]:

    
#%%cache -d cache/fisher_vectors large_margin_dimension_reduction.pkl large_margin_dimension_reduction
from learning.large_margin_dimension_reduction import LargeMarginDimensionReduction
large_margin_dimension_reduction = LargeMarginDimensionReduction(n_components=128, n_iter=int(1e3))
large_margin_dimension_reduction.fit(training_fisher_vectors, y)









    



Initializing with PCA...
Initializing b...
1.0
12.0
23.0
34.0
45.0
56.0
67.0
78.0
89.0
100.0
Current best b value: 1.000000
-10.0
-7.55555555556
-5.11111111111
-2.66666666667
-0.222222222222
2.22222222222
4.66666666667
7.11111111111
9.55555555556
12.0
Current best b value: -10.000000
-12.4444444444
-11.9012345679
-11.3580246914
-10.8148148148
-10.2716049383
-9.72839506173
-9.18518518519
-8.64197530864
-8.0987654321
-7.55555555556
Current best b value: -12.444444
-12.987654321






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-6-3f53165a2f3f> in <module>()
      2 from learning.large_margin_dimension_reduction import LargeMarginDimensionReduction
      3 large_margin_dimension_reduction = LargeMarginDimensionReduction(n_components=128, n_iter=int(1e3))
----> 4 large_margin_dimension_reduction.fit(training_fisher_vectors, y)

/home/tlorieul/Dev/Snoop/src/lib/Python/learning/large_margin_dimension_reduction.pyc in fit(self, X, y)
     23         print 'Initializing b...'
     24         sys.stdout.flush()
---> 25         b = self.initializeB_(X, y, W)
     26 
     27         labels = list(set(y))

/home/tlorieul/Dev/Snoop/src/lib/Python/learning/large_margin_dimension_reduction.pyc in initializeB_(self, X, y, W, n_rec, n_num)
    115                         delta = X2[i]-X2[j]
    116                         if y_*(b-np.inner(delta, delta)) > 1:
--> 117                             acc += 1
    118 
    119                 acc_values.append(acc)

KeyboardInterrupt:



In [14]:

    
compressed_data = large_margin_dimension_reduction.transform(test_fisher_vectors)
scores = computeDistanceMatrix(compressed_data, test_set_ground_truth, lambda x,y: -np.inner(x-y, x-y))
acc = computeAccuracy(scores, thresholds=np.linspace(-40000, -10000, num=100))
roc = computeROC(scores, thresholds=np.linspace(-40000, -10000, num=100))
print acc
plotROC([roc], ["Large-margin dimension reduction"])



In [13]:

    
_ = hist(scores[0], bins=80, histtype="step", color="g")
_ = hist(scores[1], bins=80, histtype="step", color="r")



In [115]:

    
print large_margin_dimension_reduction.W_.shape, np.asarray(test_descriptors).shape









    



 (200, 10240) (3708, 10240)



In [ ]: