Normalization Testing

Why do we need normalization ?

Unless we are using an affine transformation (e.g. rotation, scale, ...) invariant descriptor, we need to normalize the face before.

As, we use LBP in its basic form because the rotation-invariant version loses relative orientation : this version of LBP is lossy unlike Scattering Transform1. We just really need to have a rotation normalization because we can more easily create scale-invariant descriptors.

  1. Fast High Dimensional Vector Multiplication Face Recognition, Wolf

Limitations

We get rid of the distances between fiducial points that can caracterize a face.

We could try using a multimodal approach in order to keep this information.


In [1]:
%pylab inline
from tools import *
from alignment import *

import cv2

imgs_name, imgs = readImagesInDir(sample_directory)


Populating the interactive namespace from numpy and matplotlib

Computation done using CSIRO alignment system.


In [2]:
landmark_detector = CSIROLandmarkDetector()

Affine transform

Affine transform computed from several landmarks through an average / median of the different possible transform or using LSE.

Landmarks used :

  • right / left eyes
  • tip of the noise
  • middle of forehead
  • middle of the mouth

In [3]:
import skimage.transform as tfm

normalized_landmarks = np.array([[125.0, 140.0], [100.0, 110.0], [150.0, 110.0], [125.0, 90.0], [125.0, 175.0]])

def normalizationLandmarks(landmarks):
    #return np.array([landmarks[30], mean(np.array(landmarks[36:41]), axis=0), mean(np.array(landmarks[42:47]), axis=0), mean(np.array([landmarks[19], landmarks[24]]), axis=0), mean(np.array(landmarks[48:65]), axis=0)])
    return np.array([landmarks[30], mean(np.array(landmarks[36:41]), axis=0), mean(np.array(landmarks[42:47]), axis=0), mean(np.array([landmarks[19], landmarks[24]]), axis=0), landmarks[64]])

landmarks = normalizationLandmarks(landmark_detector.detectLandmarks(imgs[0]))
transform = tfm.estimate_transform("affine", normalized_landmarks, landmarks)

output = np.array(imgs[0], copy=True)

for landmark in landmarks:
    cv2.circle(output, tuple(landmark.astype(np.int)), 2, (255,0,0))

output = tfm.warp(output, transform)

for landmark,normalized_landmark, in zip(transform.inverse(landmarks), normalized_landmarks):
    cv2.circle(output, tuple(landmark.astype(np.int)), 2, (0,255,0))
    cv2.circle(output, tuple(normalized_landmark.astype(np.int)), 2, (0,0,255))

imshow(output)


Out[3]:
<matplotlib.image.AxesImage at 0x56a6a10>

Test of the choice of landmarks used for normalization process


In [4]:
def normalizationPointsChoiceTests(imgs, normalized_landmarks, normalizationFunction):
    outputs = []

    for img in imgs:
        landmarks = normalizationFunction(landmark_detector.detectLandmarks(img))
        
        output = np.array(img, copy=True)
        for landmark in landmarks:
            cv2.circle(output, tuple(landmark.astype(np.int)), 2, (255,0,0))
            
        transform = tfm.estimate_transform("affine", normalized_landmarks, landmarks)
        outputs.append(tfm.warp(output, transform))
    
    return outputs

landmarks_choices_output = [np.array(img, copy=True) for img in imgs]

Using median of affine transformation matrices

$$A^* = median_{k = 0...\binom{n}{3} }(A_k)$$

(here $n=5$, therefore $\binom{n}{3} = 10$, previous case with $n=4$ does not work properly)

Note: using the mean does not work because of outliers


In [5]:
from itertools import *

def computeMedianAffineTransform(landmarks, normalized_landmarks):
    matrices = []

    for subset in combinations(range(len(landmarks)),3):
        indexes = np.array(subset)
        matrices.append(tfm.estimate_transform("affine", normalized_landmarks[indexes], landmarks[indexes]).params.copy())
    
    matrix = np.median(matrices, axis=0)
    return tfm.AffineTransform(matrix=matrix)



normalized_landmarks = np.array([[125.0, 140.0], [100.0, 110.0], [150.0, 110.0], [125.0, 90.0], [125.0, 175.0]])

def normalizationLandmarks(landmarks):
    return np.array([landmarks[30], mean(np.array(landmarks[36:41]), axis=0), mean(np.array(landmarks[42:47]), axis=0), mean(np.array([landmarks[19], landmarks[24]]), axis=0), landmarks[64]])


outputs = []

for img in imgs:
    landmarks = normalizationLandmarks(landmark_detector.detectLandmarks(img))
    transform = computeMedianAffineTransform(landmarks, normalized_landmarks)
    outputs.append(tfm.warp(img, transform))

landmarks_choices_output += outputs

Using similarity transform conserving ratio

Find transformation: $$\min_{s,R,t} \|\bar{S} - (sRS+t)\|_2^2$$


In [6]:
from itertools import *

def computeSimilarityTransform(landmarks, normalized_landmarks):
    L = landmarks.shape[0]
    
    a = np.zeros((2*L, 4))
    a[:,0] = normalized_landmarks.reshape((2*L,1))[:,0]
    a[::2,1] = -normalized_landmarks[:,1]
    a[1::2,1] = normalized_landmarks[:,0]
    a[::2,2] = 1
    a[1::2,2] = 0
    a[::2,3] = 0
    a[1::2,3] = 1
    
    b = landmarks.reshape((2*L,1))[:,0]
    
    output = np.linalg.lstsq(a, b)[0]
    matrix = np.array([[output[0], -output[1], output[2]], [output[1], output[0], output[3]], [0, 0, 1]])
    return tfm.AffineTransform(matrix=matrix)



normalized_landmarks = np.array([[125.0, 140.0], [100.0, 110.0], [150.0, 110.0], [125.0, 90.0], [125.0, 175.0]])

def normalizationLandmarks(landmarks):
    return np.array([landmarks[30], mean(np.array(landmarks[36:41]), axis=0), mean(np.array(landmarks[42:47]), axis=0), mean(np.array([landmarks[19], landmarks[24]]), axis=0), landmarks[64]])


outputs = []

for img in imgs:
    landmarks = normalizationLandmarks(landmark_detector.detectLandmarks(img))
    transform = computeSimilarityTransform(landmarks, normalized_landmarks)
    outputs.append(tfm.warp(img, transform))

output = np.copy(outputs[-1])

for landmark,normalized_landmark, in zip(transform.inverse(landmarks), normalized_landmarks):
    cv2.circle(output, tuple(landmark.astype(np.int)), 2, (0,255,0), -1)
    cv2.circle(output, tuple(normalized_landmark.astype(np.int)), 2, (0,0,255), -1)

imshow(output)
    
landmarks_choices_output += outputs


C++ version


In [7]:
face_normalization = FaceNormalization()
face_normalization.setReferenceShape(landmark_detector.getReferenceShape())

outputs = []

for img in imgs:
    landmarks = landmark_detector.extractLandmarksForNormalization(landmark_detector.detectLandmarks(img))
    output = np.array(img, copy=True)
    face_normalization.normalize(output, landmarks)
    outputs.append(output)
    
landmarks_choices_output += outputs

Results


In [8]:
showMosaic(landmarks_choices_output, ncols=len(imgs))


Issue : if he is a bit turned, the affine transformation estimation fails...

The estimate used by Skimage is a Total Least Square estimate. Maybe should try OpenCV's estimateRigidTransformation (actually estimates affine transformation) ?

The median of the affine transformations turns out to be the best. But is it a real affine transformation ?

=> better precision due to outliers ???

Time not really comparable (not efficient Python code for the median affine transformation computation).

Normalization results


In [11]:
import config
from itertools import imap
from stats import *
from datasets import lfw
from benchmarks import lfw as lfw_bench


sets_ground_truth = lfw.loadSetsGroundTruth()

labels = ["LFW-a baseline results", "LBF 51 landmarks normalized faces", "LBF 68 landmarks normalized faces", "CSIRO normalized faces", "LFW (no normalization)"]
descs_files = ["ulbp_wpca_lfwa", "ulbp_wpca_lfw_normalized_lbf_51_landmarks" ,"ulbp_wpca_lfw_normalized_lbf_68_landmarks", "ulbp_wpca_lfw_normalized_csiro", "ulbp_wpca_lfw"]
scores = [lfw_bench.computeDistanceMatrix(descs, sets_ground_truth) for descs in imap(lfw_bench.loadDescriptors, descs_files)]
rocs = [lfw_bench.computeMeanROC(score) for score in scores]

In [12]:
plotROC(rocs, labels, title="ROC curve for different normalization methods")



In [13]:
for label, score in zip(labels, scores):
    mean, std = lfw_bench.computeMeanAccuracy(score)
    print "%s: %0.4f +/- %0.4f"%(label, mean, std)


LFW-a baseline results: 0.7805 +/- 0.0024
LBF 51 landmarks normalized faces: 0.7770 +/- 0.0039
LBF 68 landmarks normalized faces: 0.7775 +/- 0.0035
CSIRO normalized faces: 0.7683 +/- 0.0048
LFW (no normalization): 0.7208 +/- 0.0033

Alignment results


In [15]:
data = lfw.loadData("lfw_normalized_csiro")

In [16]:
markLandmarks(data[:40], color=(255,255,255))
showMosaic(data[:40], ncols=5)



In [17]:
%timeit landmark_detector.detectLandmarks(output)


10 loops, best of 3: 92.8 ms per loop

In [18]:
data = lfw.loadData("lfw_normalized_lbf_51_landmarks")
showMosaic(data[:40], ncols=5)



In [21]:
from cpp_wrapper.face_detection import *

high_recall_face_detector = FaceDetector(high_recall=True)
face_detector = FaceDetector()
landmark_detector = LBFLandmarkDetector(detector="opencv", landmarks=68)

In [22]:
face_normalization = FaceNormalization()
face_normalization.setReferenceShape(landmark_detector.getReferenceShape())

outputs = []

for img in imgs:
    output = np.copy(img)
    faces = high_recall_face_detector.detectFaces(output)
    
    if len(faces) > 0:
        face = faces[0]
        faces = face_detector.detectFaces(output)
        if len(faces) > 0:
            face = faces[0]
        
        landmarks = landmark_detector.detectLandmarks(output, face)
        
        cv2.rectangle(output, face[:2], face[2:], (0,0,0), 2)
        for landmark in landmarks:
            cv2.circle(output, tuple(landmark.astype(np.int)), 2, (0,255,0), -1)

        normalization_landmarks = landmark_detector.extractLandmarksForNormalization(landmarks)
        face_normalization.normalize(output, normalization_landmarks)
        
    outputs.append(output)

In [23]:
showMosaic(outputs, ncols=5)



In [24]:
img = np.copy(imgs[2])
face = face_detector.detectFaces(img)[0]
landmarks = landmark_detector.detectLandmarks(img, face)
normalization_landmarks = landmark_detector.extractLandmarksForNormalization(landmarks)
face_normalization.normalize(img, normalization_landmarks)
face = face_detector.detectFaces(img)[0]
landmarks = landmark_detector.detectLandmarks(img, face)
ellipse = cv2.fitEllipse(landmarks[:17].astype(np.float32))
print ellipse
cv2.ellipse(img, ellipse, (255, 0, 0), 2)
cv2.rectangle(img, face[:2], face[2:], (0,0,0), 2)
for landmark in landmarks:
    cv2.circle(img, tuple(landmark.astype(np.int)), 2, (0,255,0), -1)
imshow(img)


((138.21432495117188, 124.53761291503906), (92.60397338867188, 140.00662231445312), 3.7089250087738037)
Out[24]:
<matplotlib.image.AxesImage at 0x8746a10>

In [25]:
print outputs[0].shape
for i in range(len(outputs)):
    outputs[i] = outputs[i][49:-49, 84:-84]
print outputs[0].shape


(250, 250, 3)
(152, 82, 3)

In [26]:
showMosaic(outputs, ncols=5)



In [30]:
%timeit landmark_detector.detectLandmarks(data[0], face)
%timeit face_normalization.normalize(data[0], normalization_landmarks)


1000 loops, best of 3: 643 µs per loop
1000 loops, best of 3: 299 µs per loop

In [24]: