Previamente a utilizar esta técnica para reconocer los fitolitos en nuestras imagenes, utilizaremos esta técnica para reconocer caras en diversas imagenes. Si el reconocimiento de caras es efectivo, más tarde aplicaremos esta técnica para reconocer fitolitos.
Antes que nada deberemos extraer las caracteristicas de los datos, las cuales trataremos de obtener mediante la técnica HoG ( Histogram of Oriented Gradients), la cual transforma los pixeles en un vector que contiene información mucho más significativa. Y finalmente, utilizaremos una SVM para construir nuestro reconocedor de caras.
Este notebook ha sido creado y está basado partiendo del notebook "Application: A Face Detection Pipeline" del libro Python Data Science Handbook de Jake VanderPlas cuyo contenido se encuentra en GitHub
In [10]:
#Imports
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
Como veniamos contando, HoG es una técnica para la extracción de características, desarrollada en el contexto del procesado de imagenes, que involucra los siguientes pasos:
Utilizando, como ya indicabamos, una SVM junto con nuestro extractor de características HoG construiremos un detector de caras. Para la construcción de este detector se deberán de seguir los siguiente pasos:
Una vez realizados dichos pasos, podríamos enviar nuevas imagenes al clasificador para que tratase de reconocer nuevas caras. Para ello seguiría los dos siguiente pasos:
In [11]:
from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people()
positive_patches = faces.images
positive_patches.shape
Out[11]:
Una vez obtenido nuestro conjunto de positivos, necesitamos obtener un conjunto de imagenes que no tengan caras. Para ello, la técnica que se utiliza en el notebook en el que me estoy basando es obtener diversas imagenes de las cuales se obtiene subimagenes o miniaturas,thumbnails en ingles, con diversas escalas.
In [12]:
from skimage import feature, color, data, transform
imgs_to_use = ['camera', 'text', 'coins', 'moon',
'page', 'clock', 'immunohistochemistry',
'chelsea', 'coffee', 'hubble_deep_field']
images = [color.rgb2gray(getattr(data, name)())
for name in imgs_to_use]
In [13]:
from sklearn.feature_extraction.image import PatchExtractor
def extract_patches(img, N, scale=1.0, patch_size=positive_patches[0].shape):
extracted_patch_size = tuple((scale * np.array(patch_size)).astype(int))
extractor = PatchExtractor(patch_size=extracted_patch_size,
max_patches=N, random_state=0)
patches = extractor.transform(img[np.newaxis])
if scale != 1:
patches = np.array([transform.resize(patch, patch_size)
for patch in patches])
return patches
negative_patches = np.vstack([extract_patches(im, 1000, scale)
for im in images for scale in [0.5, 1.0, 2.0]])
negative_patches.shape
Out[13]:
In [14]:
from itertools import chain
X_train = np.array([feature.hog(im)
for im in chain(positive_patches,
negative_patches)])
y_train = np.zeros(X_train.shape[0])
y_train[:positive_patches.shape[0]] = 1
In [15]:
from sklearn.naive_bayes import GaussianNB
from sklearn.cross_validation import cross_val_score
cross_val_score(GaussianNB(), X_train, y_train)
Out[15]:
In [16]:
from sklearn.svm import LinearSVC
from sklearn.grid_search import GridSearchCV
grid = GridSearchCV(LinearSVC(), {'C': [1.0, 2.0, 4.0, 8.0]})
grid.fit(X_train, y_train)
grid.best_score_
grid.best_params_
model = grid.best_estimator_
model.fit(X_train, y_train)
Out[16]:
Una vez entrenado nuestro clasificador, vamos a probar con una nueva imagen. Como ya explicabamos en la introducción, cuando le enviamos nuevas imagenes a nuestro clasificador este deberá de realizar dos pasos:
La nueva imagen que le vamos a enviar a nuestro clasificador contendrá un único ejemplo.
In [17]:
def sliding_window(img, patch_size=positive_patches[0].shape,
istep=2, jstep=2, scale=1.0):
Ni, Nj = (int(scale * s) for s in patch_size)
for i in range(0, img.shape[0] - Ni, istep):
for j in range(0, img.shape[1] - Ni, jstep):
patch = img[i:i + Ni, j:j + Nj]
if scale != 1:
patch = transform.resize(patch, patch_size)
yield (i, j), patch
In [18]:
from skimage.exposure import rescale_intensity
from skimage import io
from skimage.transform import rescale
img1 = io.imread("../../rsc/img/my_face.jpg")
# Convertimos imagen a escala de grises
from skimage.color import rgb2gray
img1 = rgb2gray(img1)
img1 = rescale(img1, 0.3)
# Mostramos la imagen resultante
plt.imshow(img1, cmap='gray')
plt.axis('off');
In [19]:
indices, patches = zip(*sliding_window(img1))
patches_hog = np.array([feature.hog(patch) for patch in patches])
patches_hog.shape
Out[19]:
In [20]:
labels = model.predict(patches_hog)
labels.sum()
Out[20]:
In [21]:
fig, ax = plt.subplots()
ax.imshow(img1, cmap='gray')
ax.axis('off')
Ni, Nj = positive_patches[0].shape
indices = np.array(indices)
boxes1 = list()
for i, j in indices[labels == 1]:
boxes1.append((j,i,i+Ni,j+Nj))
ax.add_patch(plt.Rectangle((j, i), Nj, Ni, edgecolor='red',
alpha=0.3, lw=2, facecolor='none'))
boxes1 = np.array(boxes1)
Como podemos observar nuestro clasificador reconoce perfectamente una cara. Ahora vamos a probar con una imagen en la que aparezcan varias caras.
In [22]:
img2 = io.imread("../../rsc/img/faces_test.jpg")
# Convertimos imagen a escala de grises
from skimage.color import rgb2gray
img2 = rgb2gray(img2)
img2 = rescale(img2, 0.65)
# Mostramos la imagen resultante
plt.imshow(img2, cmap='gray')
plt.axis('off');
In [23]:
indices, patches = zip(*sliding_window(img2))
patches_hog = np.array([feature.hog(patch) for patch in patches])
patches_hog.shape
Out[23]:
In [24]:
labels = model.predict(patches_hog)
labels.sum()
Out[24]:
In [25]:
fig, ax = plt.subplots()
ax.imshow(img2, cmap='gray')
ax.axis('off')
Ni, Nj = positive_patches[0].shape
indices = np.array(indices)
boxes2 = list()
for i, j in indices[labels == 1]:
boxes2.append((j,i,j+Nj,i+Ni))
ax.add_patch(plt.Rectangle((j, i), Nj, Ni, edgecolor='red',
alpha=0.3, lw=2, facecolor='none'))
boxes2 = np.array(boxes2)
A primera impresión parece que el clasificador es bastante efectivo. En el caso de esta última imagen ha sido capaz de reconocer la mayoría de las caras, aun siendo una imagen compleja para su procesado por el distinto tamaño de las caras y la incompletitud de algunas.
Como hemos podido observar nuestro clasificador detecta muchas más caras de las que realmente hay. Debido a la razón de que normalmente los clasificadores detectan multiples ventanas en torno al objeto a detectar, en este caso caras. Esta problemática viene a ser solucionada mediante Non-Maximum Suppresion
In [26]:
# import the necessary packages
import numpy as np
# Malisiewicz et al.
def non_max_suppression_fast(boxes, overlapThresh):
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats --
# this is important since we'll be doing a bunch of divisions
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
# compute the area of the bounding boxes and sort the bounding
# boxes by the bottom-right y-coordinate of the bounding box
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(y2)
# keep looping while some indexes still remain in the indexes
# list
while len(idxs) > 0:
# grab the last index in the indexes list and add the
# index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
# find the largest (x, y) coordinates for the start of
# the bounding box and the smallest (x, y) coordinates
# for the end of the bounding box
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]
# delete all indexes from the index list that have
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlapThresh)[0])))
# return only the bounding boxes that were picked using the
# integer data type
return boxes[pick].astype("int")
In [27]:
import cv2
# load the image and clone it
#print "[x] %d initial bounding boxes" % (len(boundingBoxes))
image = img1
orig = image.copy()
boundingBoxes = boxes1
# loop over the bounding boxes for each image and draw them
for (startX, startY, endX, endY) in boundingBoxes:
cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2)
# perform non-maximum suppression on the bounding boxes
pick = non_max_suppression_fast(boundingBoxes, 0.2)
#print "[x] after applying non-maximum, %d bounding boxes" % (len(pick))
# loop over the picked bounding boxes and draw them
for (startX, startY, endX, endY) in pick:
cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2)
# display the images
cv2.imshow("Original", orig)
cv2.imshow("After NMS", image)
cv2.waitKey(0)
Out[27]:
In [28]:
# load the image and clone it
#print "[x] %d initial bounding boxes" % (len(boundingBoxes))
image = img2
orig = image.copy()
boundingBoxes = boxes2
# loop over the bounding boxes for each image and draw them
for (startX, startY, endX, endY) in boundingBoxes:
cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2)
# perform non-maximum suppression on the bounding boxes
pick = non_max_suppression_fast(boundingBoxes, 0.2)
#print "[x] after applying non-maximum, %d bounding boxes" % (len(pick))
# loop over the picked bounding boxes and draw them
for (startX, startY, endX, endY) in pick:
cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2)
# display the images
cv2.imshow("Original", orig)
cv2.imshow("After NMS", image)
cv2.waitKey(0)
Out[28]:
In [29]:
from skimage.exposure import rescale_intensity
from skimage import io
from skimage.transform import rescale
img3 = io.imread("../../rsc/img/family.jpg")
# Convertimos imagen a escala de grises
from skimage.color import rgb2gray
img3 = rgb2gray(img3)
img3 = rescale(img3, 0.5)
# Mostramos la imagen resultante
plt.imshow(img3, cmap='gray')
plt.axis('off');
In [30]:
indices, patches = zip(*sliding_window(img3))
patches_hog = np.array([feature.hog(patch) for patch in patches])
patches_hog.shape
Out[30]:
In [31]:
labels = model.predict(patches_hog)
labels.sum()
Out[31]:
In [32]:
fig, ax = plt.subplots()
ax.imshow(img3, cmap='gray')
ax.axis('off')
Ni, Nj = positive_patches[0].shape
indices = np.array(indices)
boxes3 = list()
for i, j in indices[labels == 1]:
boxes3.append((j,i,j+Nj,i+Ni))
ax.add_patch(plt.Rectangle((j, i), Nj, Ni, edgecolor='red',
alpha=0.3, lw=2, facecolor='none'))
boxes3 = np.array(boxes3)
In [33]:
# load the image and clone it
#print "[x] %d initial bounding boxes" % (len(boundingBoxes))
image = img3
orig = image.copy()
boundingBoxes = boxes3
# loop over the bounding boxes for each image and draw them
for (startX, startY, endX, endY) in boundingBoxes:
cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2)
# perform non-maximum suppression on the bounding boxes
pick = non_max_suppression_fast(boundingBoxes, 0.1)
#print "[x] after applying non-maximum, %d bounding boxes" % (len(pick))
# loop over the picked bounding boxes and draw them
for (startX, startY, endX, endY) in pick:
cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2)
# display the images
cv2.imshow("Original", orig)
cv2.imshow("After NMS", image)
cv2.waitKey(0)
Out[33]: