In [1]:
import face_recognition
import cv2
In [2]:
import os
from os.path import basename
import glob
import sys
import types
import subprocess
from random import randint
import json
import gc
In [3]:
import skvideo.io
import numpy as np
import scipy.misc
from skimage.transform import rescale, resize, downscale_local_mean
In [4]:
import PIL
In [5]:
import keras
from keras.preprocessing import image
from keras.models import model_from_json
from keras.optimizers import SGD, RMSprop, Adagrad
from keras.applications.inception_v3 import preprocess_input
In [6]:
(major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.')
if int(major_ver) < 3 :
print ("Update OpenCV ...")
sys.exit(1)
In [7]:
source = '../video/One_Direction-Drag_Me_Down.mp4'
In [8]:
try:
video_capture = cv2.VideoCapture(source)
print ("Imported video using OpenCV ...")
except:
video_capture = skvideo.io.vread(source)
print ("Imported video using sci-kit video ...")
In [9]:
sgd = SGD(lr=1e-7, decay=0.5, momentum=1, nesterov=True)
rms = RMSprop(lr=1e-7, rho=0.9, epsilon=1e-08, decay=0.0)
ada = Adagrad(lr=1e-7, epsilon=1e-08, decay=0.0)
optimizer = sgd
IMG_HEIGHT = 299
IMG_WIDTH = 299
In [10]:
length = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
In [11]:
save_path = "../proc_vid.mp4"
save_audio = "../audio.wav"
save_path_w_audio = "../proc_vid_audio.mp4"
output_dir = '../output/'
In [12]:
face_locations = []
face_encodings = []
face_names = []
frame_number = 0
face_count = 0
In [13]:
w, h = int(video_capture.get(3)),int(video_capture.get(4))
print ("Source image width: "+ str(w))
print ("Source image height: "+ str(h))
fps = video_capture.get(cv2.CAP_PROP_FPS)
print ("Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format(fps))
In [14]:
fourcc = cv2.VideoWriter_fourcc(*'XVID')
video_writer = cv2.VideoWriter(save_path, fourcc, fps, (w,h), True)
In [15]:
reference_image_path = "../ref_img/"
file_list = glob.glob(reference_image_path + '/*.jpg')
In [16]:
n_proc_frames = length
resize_img = False
verbose = True
gen_train_img = True
interleaved = False
use_deep_learning = True
annotate = True
process_this_frame = True
inverse_scale_factor = 1
In [17]:
def compile_model(model):
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
In [18]:
def load_prediction_model(args):
try:
with open(args.config_file[0]) as json_file:
model_json = json_file.read()
model = model_from_json(model_json)
except:
print ("Please specify a model configuration file ...")
sys.exit(1)
try:
model.load_weights(args.weights_file[0])
print ("Loaded model weights from: " + str(args.weights_file[0]))
except:
print ("Error loading model weights ...")
sys.exit(1)
try:
with open(args.labels_file[0]) as json_file:
labels = json.load(json_file)
print ("Loaded labels from: " + str(args.labels_file[0]))
except:
print ("No labels loaded ...")
sys.exit(1)
return model, labels
In [19]:
def gen_predict(model):
try:
compile_model(model)
print ("Model successfully compiled ...")
except:
print ("Model failed to compile ...")
print ("Compiling predictor function ...") # to avoid the delay during video capture.
_ = model.predict(np.zeros((1, n, n, 3), dtype=np.float32), batch_size=1)
print ("Compilation completed ...")
In [20]:
args = types.SimpleNamespace()
args.config_file = ['../model/trained_config.json']
args.weights_file = ['../model/trained_weights.model']
args.labels_file = ['../model/trained_labels.json']
args.output_dir = ['../output/']
In [21]:
model, labels = load_prediction_model(args)
In OpenCV using: interpolation = cv2.INTER_CUBIC argument in cv2.resize, performs a bi-cubic interpolation over 4x4 pixel neighborhood.
In [22]:
while (video_capture.isOpened()):
ret, frame = video_capture.read() # Grab a single frame of video
frame_number += 1
if resize_img ==True:
isf = inverse_scale_factor
small_frame = cv2.resize(frame, (0, 0), fx=(1/isf), fy=(1/isf)) # Resize frame of video to 1/inverse_scale_factor size for faster processing
else:
isf = 1
small_frame = frame
if frame_number <=n_proc_frames:
if ret ==True:
if process_this_frame:
face_locations = face_recognition.face_locations(small_frame) # Find all the faces and face encodings in the current frame of video
face_encodings = face_recognition.face_encodings(small_frame, face_locations)
face_names = []
if annotate == True or gen_train_img == True:
for face_encoding in face_encodings:
for file_path in file_list:
reference_image = face_recognition.load_image_file(file_path)
try:
reference_face_encoding = face_recognition.face_encodings(reference_image)[0]
if verbose == True:
print ("Processed face encodings ...")
else:
pass
except:
if verbose == True:
print("Failed processing face encodings ...")
else:
pass
if annotate == True:
name_ID = (os.path.splitext(basename(file_path))[0])
name_ID = name_ID.replace("_", " ")
match = face_recognition.compare_faces([reference_face_encoding], face_encoding) # See if the face is a match for the known face(s)
name = "Unknown"
if match[0]:
name = name_ID
face_names.append(name)
else:
pass
else:
if verbose == True:
print ("Skipping face recognition mode ...")
else:
pass
else:
if verbose == True:
print ("Skipping frame ...")
else:
pass
if interleaved == True:
process_this_frame = not process_this_frame # Only process every other frame of video to save time
else:
process_this_frame = process_this_frame
# Display the results
for (top, right, bottom, left), name in zip(face_locations, face_names):
# Scale back up face locations since the frame we detected in was scaled to scaling factor size
top *= int(isf)
right *= int(isf)
bottom *= int(isf)
left *= int(isf)
# Draw an ellipse around the face
ex = left
ey = top
ew = int(abs(right - ex))
eh = int(abs(bottom - ey))
p1 = int(ew/2 + ex)
p2 = int(eh/2 + ey)
h1 = int(ew/2)
h2 = int(eh/2)
square = frame[max((ey-eh//2,0)):ey+3*eh//2, max((ex-ew//2,0)):ex+3*ew//2]
if use_deep_learning == True and annotate == True:
preds_square = cv2.resize(square.astype(np.float32), \
dsize=(IMG_WIDTH, IMG_HEIGHT),\
interpolation = cv2.INTER_CUBIC)
try:
_X_ = image.img_to_array(preds_square)
del (preds_square)
_X_ = np.expand_dims(_X_, axis=0)
_X_ = preprocess_input(_X_)
probabilities = model.predict(_X_, batch_size=1).flatten()
del (_X_)
prediction = labels[np.argmax(probabilities)]
name = (str(prediction)).replace("_", " ")
print ("Face recognition using deep-learning ...")
print (prediction + "\t" + "\t".join(map(lambda x: "%.2f" % x, probabilities)))
print (str(prediction))
del (prediction)
gc.collect()
except:
print ("Failed to create a prediction ...")
else:
pass
if gen_train_img == True:
random_number = randint(10000000, 99999999)
random_number = str(random_number)
cv2.imwrite(os.path.join(output_dir + "//" +
str(name.replace("", "_")) +"_" +
str(random_number) +
"_loc_" + str(p1) + "_" +
str(p2) + "_" +
str(h1) + "_" +
str(h2) + "_" +
"_frame_%d.jpg" % face_count), square)
if verbose == True:
print ("Saved frame: "+ str(face_count)+" with face detected ..." )
if name != "Unknown":
print ("Possible match for detected face: " + str(name))
else:
pass
else:
pass
cv2.ellipse(frame, (p1, p2), (h1,h2), 0,0,360, (0,255,0), 2)
del (square)
face_count += 1
else:
pass
if annotate == True:
font = cv2.FONT_HERSHEY_DUPLEX
cv2.rectangle(frame, (p1 - 100, bottom - 2), (p1 + 100, bottom + 33), (0, 0, 255), cv2.FILLED)
cv2.putText(frame, name, (p1 - 94, bottom + 23 ), font, 0.75, (255, 255, 255), 1) # Draw a label with a name below the face
else:
if verbose == True:
print ("No identifiers to annotate. Try setting annotate flag to True ...")
else:
pass
try:
video_writer.write(frame)
if verbose == True:
print("Processed frame {} / {}".format(frame_number, length))
else:
pass
except:
if verbose == True:
print("Failed writing frame {} / {}".format(frame_number, length))
else:
pass
else:
if verbose == True:
print("No frame to process ...")
else:
pass
else:
if verbose == True:
print ("Processed "+ str(n_proc_frames) + " frames")
print ("Detected " + str(face_count) + " faces" )
else:
print ("Detected " + str(face_count) + " faces" )
break
In [23]:
video_capture.release()
video_writer.release()
In [24]:
cmd = 'ffmpeg -i %s -ab 320000 -ac 2 -ar 44100 -vn %s' % (source, save_audio)
print (cmd)
subprocess.call(cmd, shell=True)
Out[24]:
In [25]:
cmd = 'ffmpeg -y -i %s -i %s -shortest -c:v copy -c:a aac -b:a 256k %s' % (save_path, save_audio, save_path_w_audio)
print (cmd)
subprocess.call(cmd, shell=True)
print('Muxing completed ...')
print('Saved output file to: %s' % (save_path_w_audio))
In [26]:
from keras.utils import plot_model
import pydot
import graphviz # apt-get install -y graphviz libgraphviz-dev
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
In [27]:
plot_model(model, to_file= os.path.join(args.output_dir[0] + '/model_face_detection.png'))
SVG(model_to_dot(model).create(prog='dot', format='svg'))
Out[27]:
In [ ]: