Kevin J. Walchko, created 5 Dec 2016
Let's see if we can build a model to find something. We are going to choose a simple example that is not real, but looks pretty good. Artists typically limit the number of colors used in their art work for a variety of reasons (i.e., print costs, image compression performance on line, etc). Thus, our target will be color limited and thus our model smaller than a real world object would be.
In [1]:
%matplotlib inline
In [2]:
from __future__ import print_function
from __future__ import division
import numpy as np
from matplotlib import pyplot as plt
import cv2
import time
Let's take a look at trying to find Mario in some images. First we need to move switch between RGB and HSV color space.
A good resource for understanding RGB and HSV is colorizer.org where you can play with some sliders and see how it changes the color in different color spaces.
For HSV, Hue range is [0,179], Saturation range is [0,255] and Value range is [0,255]. Different softwares use different scales. So if you are comparing OpenCV values with them, you need to normalize these ranges. ref
The big reason is that it separates color information (chroma) from intensity or lighting (luma). Because value is separated, you can construct a histogram or thresholding rules using only saturation and hue. This in theory will work regardless of lighting changes in the value channel. In practice it is just a nice improvement. Even by singling out only the hue you still have a very meaningful representation of the base color that will likely work much better than RGB. The end result is a more robust color thresholding over simpler parameters.
Hue is a continuous representation of color so that 0 and 360 are the same hue which gives you more flexibility with the buckets you use in a histogram. Geometrically you can picture the HSV color space as a cone or cylinder with H being the degree, saturation being the radius, and value being the height.
In [3]:
images = ['hist_pics/mario-1.png', 'hist_pics/mario-2.png', 'hist_pics/mario-3.png', 'hist_pics/mario-4.png', 'hist_pics/mario-5.png', 'hist_pics/mario-6.png']
rgb = []
for image in images:
i = cv2.imread(image, cv2.IMREAD_COLOR)
i = cv2.cvtColor(i, cv2.COLOR_BGR2RGB) # pretty images
rgb.append(i)
plt.subplot(1,3,1)
plt.imshow(rgb[0]);
plt.subplot(1,3,2)
plt.imshow(rgb[1]);
plt.subplot(1,3,3)
plt.imshow(rgb[2]);
In [4]:
hsv = []
for i, image in enumerate(rgb):
img = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
hsv.append(img)
plt.subplot(1,3,1)
plt.imshow(hsv[0]);
plt.subplot(1,3,2)
plt.imshow(hsv[1]);
plt.subplot(1,3,3)
plt.imshow(hsv[2]);
In [5]:
# let's define a mask to block out things we don't want
# since our back ground is white hsv = [x,0,255] where is x is don't care
# white is s=0, v=255
masks = []
for image in hsv:
h,s,v = cv2.split(image)
# ret,vv = cv2.threshold(v,250,255,cv2.THRESH_BINARY)
# masks.append(vv)
ret,ss = cv2.threshold(s,50,255,cv2.THRESH_BINARY)
masks.append(ss)
# ss = cv2.merge((ss,ss,ss))
plt.subplot(1,4,1)
plt.imshow(masks[0], cmap = 'gray', interpolation = 'bicubic');
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
plt.subplot(1,4,2)
plt.imshow(masks[1], cmap = 'gray', interpolation = 'bicubic');
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
plt.subplot(1,4,3)
plt.imshow(masks[2], cmap = 'gray', interpolation = 'bicubic');
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
plt.colorbar();
# plt.subplot(1,4,4)
# plt.imshow(masks[0]);
In [6]:
# let's take a look at our masks and see what shows through
tmp = []
for i in range(len(masks)):
ret = cv2.bitwise_and(rgb[i], cv2.merge((masks[i],masks[i],masks[i])))
tmp.append(ret)
plt.subplot(1,4,1)
plt.imshow(tmp[0]);
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
plt.subplot(1,4,2)
plt.imshow(tmp[1]);
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
plt.subplot(1,4,3)
plt.imshow(tmp[2]);
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
plt.subplot(1,4,4)
plt.imshow(tmp[3]);
plt.xticks([]), plt.yticks([]); # to hide tick values on X and Y axis
Let's use OpenCV's histogram function to create them.
cv2.calcHist(images, channels, mask, histSize, ranges)
In [7]:
class hsvHistogram(object):
"""
This class creates and holds the histogram information of a HSV image. It
also is used to find our target in a given image. This class is designed
to construct a 2D histogram looking a t hue and saturation, but not value.
"""
hist = None
bins = None
def __init__(self, bins):
self.bins = bins
self.kernel = np.ones((5,5),np.uint8)
def calcHist(self, im_array, masks=None):
"""
Create a histogram from just images, no arrays
"""
hist = cv2.calcHist(
im_array,
[0, 1],
masks,
[self.bins, self.bins],
[0,180, 0 ,256]
)
self.hist = hist
return hist
def calcHistMask(self, im_array, masks):
"""
Create a histogram from just images and an array of masks
"""
hist = self.calcHist([im_array[0]], masks[0])
for i in range(1, len(im_array)):
h = self.calcHist([im_array[i]], masks[i])
hist += h
self.hist = hist
return hist
def normalize(self):
cv2.normalize(self.hist,self.hist,0,self.bins,cv2.NORM_MINMAX)
def find(self, test, threshold=3):
"""
test - target image
threshold - a tunning parameter
"""
if self.hist is None:
print('Need to init histogram first!')
return 1
dst = cv2.calcBackProject(
[test],
[0, 1],
self.hist,
[0,180, 0 ,256],
1
)
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
cv2.filter2D(dst,-1,disc,dst)
# cv2.threshold(image, low, high, method)
ret, thresh = cv2.threshold(dst,threshold,255,cv2.THRESH_BINARY)
# morphological/blobify --------
# thresh = cv2.erode(thresh, self.kernel)
# thresh = cv2.dilate(thresh, self.kernel)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, self.kernel)
return thresh
def plot(self):
# plt.plot(np.linspace(0,180,roiHist.bins), roiHist.hist)
# plt.grid(True)
# plt.xlabel('Hue')
plt.imshow(self.hist,interpolation = 'nearest')
plt.ylabel('Hue [0-179]')
plt.xlabel('Saturation [0-255]')
plt.title('Histogram')
plt.colorbar()
In [8]:
roiHist = hsvHistogram(32)
# roiHist.calcHist([hsv[1]], masks[1])
roiHist.calcHistMask(hsv, masks)
roiHist.normalize()
roiHist.plot()
In [9]:
test_rgb = cv2.imread('hist_pics/mario-test2.jpg')
# test_rgb = rgb[0]
test_rgb = cv2.cvtColor(test_rgb, cv2.COLOR_BGR2RGB) # make pretty for ipython
test_hsv = cv2.cvtColor(test_rgb, cv2.COLOR_RGB2HSV) # now convert to HSV
plt.subplot(1,2,1)
plt.imshow(test_rgb);
plt.xticks([]), plt.yticks([])
plt.subplot(1,2,2)
plt.imshow(test_hsv);
plt.xticks([]), plt.yticks([])
Out[9]:
In [10]:
thresh = roiHist.find(test_hsv, 50)
thresh = cv2.merge((thresh,thresh,thresh)) # make 3 channels
# plot ----------
res = cv2.bitwise_and(test_rgb,thresh)
pics = np.hstack((test_rgb,thresh))
plt.imshow(pics);
plt.xticks([]), plt.yticks([]);
In [11]:
plt.imshow(res);
plt.xticks([]), plt.yticks([]);
In [1]:
test_rgb = cv2.imread('hist_pics/mario-test4.jpg')
test_rgb = cv2.cvtColor(test_rgb, cv2.COLOR_BGR2RGB) # make pretty for ipython
test_hsv = cv2.cvtColor(test_rgb, cv2.COLOR_RGB2HSV) # now convert to HSV
thresh = roiHist.find(test_hsv, 15)
thresh = cv2.merge((thresh,thresh,thresh)) # make 3 channels
res = cv2.bitwise_and(test_rgb,thresh)
pics = np.hstack((test_rgb,res))
plt.imshow(pics);
plt.xticks([]), plt.yticks([]);
In [13]:
test_rgb = cv2.imread('hist_pics/mario-test.png')
test_rgb = cv2.cvtColor(test_rgb, cv2.COLOR_BGR2RGB) # make pretty for ipython
test_hsv = cv2.cvtColor(test_rgb, cv2.COLOR_RGB2HSV) # now convert to HSV
thresh = roiHist.find(test_hsv, 50)
thresh = cv2.merge((thresh,thresh,thresh)) # make 3 channels
res = cv2.bitwise_and(test_rgb,thresh)
pics = np.hstack((test_rgb,res))
plt.imshow(pics);
plt.xticks([]), plt.yticks([]);
In [14]:
# h,s,v = cv2.split(test_hsv)
# plt.subplot(1,2,1)
# plt.grid(True)
# plt.hist(h.ravel(), 32,[0,180], label='hue');
# plt.subplot(1,2,2)
# plt.hist(s.ravel(), 32,[0,255], label='saturation');
# plt.hist(v.ravel(), 32,[0,255], label='value');
# plt.grid(True)
# plt.legend(loc='upper right');
Some conclusions:
This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.