We used haarcascade for frontal face from OpenCV to capture the frontal faces from the pictures scraped from My Ladyboy Date and Date in Asia, and cropped them to the 224 by 224 size for input into the model. This resulted in 4,501 girl faces captured out of 6,645 girl profile pictures and 3,157 ladyboy faces captured out of 8,153 ladyboy profile pictures.
Girl and Ladyboy pictures are only the first profile pictures on respective dating sites whereas Ladyboy Big are the pictures in the detail section.
In [ ]:
import cv2
from PIL import Image
import math
import copy
#the usual data science stuff
import os,sys
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
ladyboy_big_input = '../data/ladyboy_big/'
ladyboy_big_output = '../data/processed/ladyboy_big/'
ladyboy_input = '../data/ladyboy/'
ladyboy_output = '../data/processed/ladyboy/'
girl_input = '../data/girl/'
girl_output = '../data/processed/girl/'
In [2]:
cascade_file_src = "haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascade_file_src)
In [12]:
#i=0
for root, dirs, files in os.walk(ladyboy_input):
for name in files:
#print(i)
#i+=1
imagePath = os.path.join(root, name)
# load image on gray scale :
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# detect faces in the image :
faces = faceCascade.detectMultiScale(gray, 1.2, 5)
#skip if face not detected
if(len(faces)==0):
continue
#open image
im = Image.open(imagePath)
#get box dimensions
(x, y, w, h) = faces[0]
center_x = x+w/2
center_y = y+h/2
b_dim = min(max(w,h)*1.2,im.width, im.height)
box = (int(center_x-b_dim/2), int(center_y-b_dim/2),
int(center_x+b_dim/2), int(center_y+b_dim/2))
# Crop Image
crpim = im.crop(box).resize((224,224))
#plt.imshow(np.asarray(crpim))
#save file
crpim.save(ladyboy_output+name,format='JPEG')
In [8]:
#i=0
for root, dirs, files in os.walk(ladyboy_big_input):
for name in files:
#print(i)
#i+=1
imagePath = os.path.join(root, name)
# load image on gray scale :
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# detect faces in the image :
faces = faceCascade.detectMultiScale(gray, 1.2, 5)
#skip if face not detected
if(len(faces)==0):
continue
#open image
im = Image.open(imagePath)
#get box dimensions
(x, y, w, h) = faces[0]
center_x = x+w/2
center_y = y+h/2
b_dim = min(max(w,h)*1.2,im.width, im.height)
box = (int(center_x-b_dim/2), int(center_y-b_dim/2),
int(center_x+b_dim/2), int(center_y+b_dim/2))
# Crop Image
crpim = im.crop(box).resize((224,224))
#plt.imshow(np.asarray(crpim))
#save file
crpim.save(ladyboy_big_output+name,format='JPEG')
In [10]:
#i=0
for root, dirs, files in os.walk(girl_input):
for name in files:
#print(i)
#i+=1
imagePath = os.path.join(root, name)
# load image on gray scale :
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# detect faces in the image :
faces = faceCascade.detectMultiScale(gray, 1.2, 5)
#skip if face not detected
if(len(faces)==0):
continue
#open image
im = Image.open(imagePath)
#get box dimensions
(x, y, w, h) = faces[0]
center_x = x+w/2
center_y = y+h/2
b_dim = min(max(w,h)*1.2,im.width, im.height)
box = (int(center_x-b_dim/2), int(center_y-b_dim/2),
int(center_x+b_dim/2), int(center_y+b_dim/2))
# Crop Image
crpim = im.crop(box).resize((224,224))
#plt.imshow(np.asarray(crpim))
#save file
crpim.save(girl_output+name,format='JPEG')