In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt

In [ ]:
import sys, os, re, time
import urllib

import numpy as np

from IPython import parallel

First, initialize OpenCV for simple facial detection


In [ ]:
HAAR_CASCADE_PATH = "haarcascade_frontalface_default.xml"
# if you have opencv installed via homebrew, this would be in
# /usr/local/share/OpenCV/haarcascades/

import cv
storage = cv.CreateMemStorage()
cascade = cv.Load(HAAR_CASCADE_PATH)

Then define a few functions for extracting faces from images


In [ ]:
def extract_faces(image, faces):
    """Returns any faces in an image in a list of numpy arrays"""
    import numpy as np
    A = np.frombuffer(image.tostring(), dtype=np.uint8).reshape((image.height, image.width, image.nChannels))
    A = A[:,:,::-1]
    face_arrays = []
    for face in faces:
        Aface = A[face[1]:face[1]+face[3],face[0]:face[0]+face[2]]
        face_arrays.append(Aface)
    return face_arrays


def detect_faces(filename):
    """Loads an image into OpenCV, and detects faces

    returns None if no image is found,
    (filename, [list of numpy arrays]) if there are faces
    """
    
    image = cv.LoadImage(filename)
    faces = []
    detected = cv.HaarDetectObjects(image, cascade, storage, 1.2, 2, cv.CV_HAAR_DO_CANNY_PRUNING, (100,100))
    if detected:
        for (x,y,w,h),n in detected:
            faces.append((x,y,w,h))
    if faces:
        return filename, extract_faces(image, faces)

Since we don't trust the network, we can just build a list of images from anywhere on our filesystem. Any list of images will do. For instance, you can use the path to the 'Thumbnails' directory in your iPhoto library, which from ~320x240 - 1024x768.


In [ ]:
pictures_dir = 'images'

This will search pictures_dir for any jpegs.

See the Downloading images from flickr notebook for a quick way to populate a folder with images from flickr with a certain tag.


In [ ]:
import glob
pictures = []
for directory, subdirs, files in os.walk(pictures_dir):
    for fname in files:
        if fname.endswith('.jpg'):
            pictures.append(os.path.join(directory, fname))

Let's test our output


In [ ]:
for p in pictures:
    found = detect_faces(p)
    if found:
        break

filename, faces = found
for face in faces:
    plt.figure()
    plt.imshow(face)

Hey, that looks like a face!

Now in parallel

First, we connect our parallel Client


In [ ]:
rc = parallel.Client()
all_engines = rc[:]
view = rc.load_balanced_view()

Then we initialize OpenCV on all of the engines (identical to what we did above)


In [ ]:
%%px
%cd notebooks/parallel

In [ ]:
%%px
HAAR_CASCADE_PATH = "haarcascade_frontalface_default.xml"

import cv
storage = cv.CreateMemStorage()
cascade = cv.Load(HAAR_CASCADE_PATH)

and make sure extract_faces is defined everywhere


In [ ]:
all_engines.push(dict(
    extract_faces=extract_faces,
))

Now we can iterate through all of our pictures, and detect and display any faces we find


In [ ]:
tic = time.time()

amr = view.map_async(detect_faces, pictures[:1000], ordered=False)
nfound = 0
for r in amr:
    if not r:
        continue
    filename, faces = r
    nfound += len(faces)
    print "%i faces found in %s" % (len(faces), filename)
    for face in faces:
        plt.imshow(face)
        plt.show()

toc = time.time()

print "found %i faces in %i images in %f s" % (nfound, len(amr), toc-tic)

In [ ]: