Capture Faces from Scraped Pictures

We used haarcascade for frontal face from OpenCV to capture the frontal faces from the pictures scraped from My Ladyboy Date and Date in Asia, and cropped them to the 224 by 224 size for input into the model. Girl and Ladyboy pictures are only the first profile pictures on respective dating sites whereas Ladyboy Big are the pictures in the detail section.


In [ ]:
import cv2
from PIL import Image
import math
import copy

#the usual data science stuff
import os,sys
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline

ladyboy_big_input = '../data/ladyboy_big/'
ladyboy_big_output = '../data/processed/ladyboy_big/'
ladyboy_input = '../data/ladyboy/'
ladyboy_output = '../data/processed/ladyboy/'
girl_input = '../data/girl/'
girl_output = '../data/processed/girl/'

In [2]:
cascade_file_src = "haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascade_file_src)

Ladyboy


In [12]:
#i=0
for root, dirs, files in os.walk(ladyboy_input):
    for name in files:
        #print(i)
        #i+=1
        imagePath = os.path.join(root, name)

        # load image on gray scale :
        image = cv2.imread(imagePath)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # detect faces in the image :
        faces = faceCascade.detectMultiScale(gray, 1.2, 5)

        #skip if face not detected
        if(len(faces)==0):
            continue

        #open image
        im = Image.open(imagePath)

        #get box dimensions
        (x, y, w, h) = faces[0]
        center_x = x+w/2
        center_y = y+h/2
        b_dim = min(max(w,h)*1.2,im.width, im.height)
        box = (int(center_x-b_dim/2), int(center_y-b_dim/2), 
               int(center_x+b_dim/2), int(center_y+b_dim/2))
        # Crop Image
        crpim = im.crop(box).resize((224,224))
        #plt.imshow(np.asarray(crpim))
        #save file
        crpim.save(ladyboy_output+name,format='JPEG')

Ladyboy Big


In [8]:
#i=0
for root, dirs, files in os.walk(ladyboy_big_input):
    for name in files:
        #print(i)
        #i+=1
        imagePath = os.path.join(root, name)

        # load image on gray scale :
        image = cv2.imread(imagePath)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # detect faces in the image :
        faces = faceCascade.detectMultiScale(gray, 1.2, 5)

        #skip if face not detected
        if(len(faces)==0):
            continue

        #open image
        im = Image.open(imagePath)

        #get box dimensions
        (x, y, w, h) = faces[0]
        center_x = x+w/2
        center_y = y+h/2
        b_dim = min(max(w,h)*1.2,im.width, im.height)
        box = (int(center_x-b_dim/2), int(center_y-b_dim/2), 
               int(center_x+b_dim/2), int(center_y+b_dim/2))
        # Crop Image
        crpim = im.crop(box).resize((224,224))
        #plt.imshow(np.asarray(crpim))
        #save file
        crpim.save(ladyboy_big_output+name,format='JPEG')

Girl


In [10]:
#i=0
for root, dirs, files in os.walk(girl_input):
    for name in files:
        #print(i)
        #i+=1
        imagePath = os.path.join(root, name)

        # load image on gray scale :
        image = cv2.imread(imagePath)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # detect faces in the image :
        faces = faceCascade.detectMultiScale(gray, 1.2, 5)

        #skip if face not detected
        if(len(faces)==0):
            continue

        #open image
        im = Image.open(imagePath)

        #get box dimensions
        (x, y, w, h) = faces[0]
        center_x = x+w/2
        center_y = y+h/2
        b_dim = min(max(w,h)*1.2,im.width, im.height)
        box = (int(center_x-b_dim/2), int(center_y-b_dim/2), 
               int(center_x+b_dim/2), int(center_y+b_dim/2))
        # Crop Image
        crpim = im.crop(box).resize((224,224))
        #plt.imshow(np.asarray(crpim))
        #save file
        crpim.save(girl_output+name,format='JPEG')