The goal of this notebook is to detect faces in all the images associated to the set of ads provided for the CP1 during the MEMEX Winter QPR 2017.
jq and parallel
sudo apt-get install parallel
facenet repository: https://github.com/svebk/facenet
In [1]:
import os
#facesearch_dir = "ColumbiaFaceSearch"
facesearch_dir = "facesearch"
# set some path parameters
input_dir = "../data/"
FACENET_DIR = "../../../"+facesearch_dir+"/facenet/"
DATA_DIR = "../../../"+facesearch_dir+"/data"
prefix = "train"
input_file = os.path.join(input_dir,prefix+"_image_url_sha1.csv")
IMAGE_DIR = os.path.join(input_dir,prefix+'_images')
In [2]:
# get just sha1 list
IMAGE_SHA1S = os.path.join(input_dir,prefix+"_image_sha1_list.csv")
!cat $input_file | cut -d ',' -f2 | sort | uniq > $IMAGE_SHA1S
DETECTED_FACES = os.path.join(input_dir,prefix+"_faces.jl")
FACES_JOBLOG = prefix+"_faces.joblog"
In [3]:
# find faces in images of ads
!parallel --joblog $FACES_JOBLOG \
--retries 0 \
--arg-file $IMAGE_SHA1S \
--max-args 200 \
--jobs 2 \
python ../scripts/detect_face.py $FACENET_DIR $DATA_DIR $IMAGE_DIR > $DETECTED_FACES
In [ ]: