In [1]:
%matplotlib inline
from matplotlib import pyplot as plt, cm
from scipy.spatial import distance as dist
import numpy as np
import glob
import cv2

In [2]:
# cv2.cv.CV_COMP_CORREL: Computes the correlation between the two histograms.
# cv2.cv.CV_COMP_CHISQR: Applies the Chi-Squared distance to the histograms.
# cv2.cv.CV_COMP_INTERSECT: Calculates the intersection between two histograms.
# cv2.cv.CV_COMP_BHATTACHARYYA: Bhattacharyya distance, used to measure the “overlap” between the two histograms.
# cv2.cv.CV_COMP_HELLINGER: A synonym for cv2.cv.CV_COMP_BHATTACHARYYA. I tend to use this synonym over Bhattacharyya, 
# simply because I find it so hard to consistently spell Bhattacharyya.

In [9]:
# initialize the index dictionary to store the image name
# and corresponding histograms and the images dictionary
# to store the images themselves
index = {}
images = {}

# loop over the image paths
for imagePath in glob.glob("./data/jpg/*.jpg"):
	# extract the image filename (assumed to be unique) and
	# load the image, updating the images dictionary
	filename = imagePath[imagePath.rfind("/") + 1:]
	image = cv2.imread(imagePath)
	images[filename] = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
	# extract a 3D RGB color histogram from the image,
	# using 8 bins per channel, normalize, and update
	# the index
	hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
	hist = cv2.normalize(hist).flatten()
	index[filename] = hist

In [16]:
# METHOD #1: UTILIZING OPENCV
# initialize OpenCV methods for histogram comparison
OPENCV_METHODS = (
	("Correlation", cv2.cv.CV_COMP_CORREL),
	("Chi-Squared", cv2.cv.CV_COMP_CHISQR),
	("Intersection", cv2.cv.CV_COMP_INTERSECT), 
	("Hellinger", cv2.cv.CV_COMP_BHATTACHARYYA))
 
# loop over the comparison methods
for (methodName, method) in OPENCV_METHODS:
	# initialize the results dictionary and the sort
	# direction
	results = {}
	reverse = False
 
	# if we are using the correlation or intersection
	# method, then sort the results in reverse order
	if methodName in ("Correlation", "Intersection"):
		reverse = True

	for (k, hist) in index.items():
		# compute the distance between the two histograms
		# using the method and update the results dictionary
		d = cv2.compareHist(index["IMG_3608_1024.jpg"], hist, method)
		results[k] = d
 
	# sort the results
	results = sorted([(v, k) for (k, v) in results.items()], reverse = reverse)
    
	# show the query image
	fig = plt.figure("Query")
	ax = fig.add_subplot(1, 1, 1)
	ax.imshow(images["IMG_3608_1024.jpg"])
	plt.axis("off")
 
	# initialize the results figure
	fig = plt.figure("Results: %s" % (methodName))
	fig.suptitle(methodName, fontsize = 10)
 
	# loop over the results
	for (i, (v, k)) in enumerate(results):
		# show the result
		ax = fig.add_subplot(1, len(images), i + 1)
# 		ax.set_title("%s: %.2f" % (k, v))
		ax.set_title("%.2f" % (v))
		plt.imshow(images[k])
		plt.axis("off")



In [17]:
# METHOD #2: UTILIZING SCIPY
# initialize the scipy methods to compaute distances
SCIPY_METHODS = (
	("Euclidean", dist.euclidean),
	("Manhattan", dist.cityblock),
	("Chebysev", dist.chebyshev))
 
# loop over the comparison methods
for (methodName, method) in SCIPY_METHODS:
	# initialize the dictionary dictionary
	results = {}
 
	# loop over the index
	for (k, hist) in index.items():
		# compute the distance between the two histograms
		# using the method and update the results dictionary
		d = method(index["IMG_3608_1024.jpg"], hist)
		results[k] = d
 
	# sort the results
	results = sorted([(v, k) for (k, v) in results.items()])
 
	# show the query image
	fig = plt.figure("Query")
	ax = fig.add_subplot(1, 1, 1)
	ax.imshow(images["IMG_3608_1024.jpg"])
	plt.axis("off")
 
	# initialize the results figure
	fig = plt.figure("Results: %s" % (methodName))
	fig.suptitle(methodName, fontsize = 20)
 
	# loop over the results
	for (i, (v, k)) in enumerate(results):
		# show the result
		ax = fig.add_subplot(1, len(images), i + 1)
		ax.set_title("%.2f" % (v))
		plt.imshow(images[k])
		plt.axis("off")



In [18]:
# METHOD #3: ROLL YOUR OWN
def chi2_distance(histA, histB, eps = 1e-10):
	# compute the chi-squared distance
	d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps)
		for (a, b) in zip(histA, histB)])
 
	# return the chi-squared distance
	return d

# initialize the results dictionary
results = {}
 
# loop over the index
for (k, hist) in index.items():
	# compute the distance between the two histograms
	# using the custom chi-squared method, then update
	# the results dictionary
	d = chi2_distance(index["IMG_3608_1024.jpg"], hist)
	results[k] = d
 
# sort the results
results = sorted([(v, k) for (k, v) in results.items()])
 
# show the query image
fig = plt.figure("Query")
ax = fig.add_subplot(1, 1, 1)
ax.imshow(images["IMG_3608_1024.jpg"])
plt.axis("off")
 
# initialize the results figure
fig = plt.figure("Results: Custom Chi-Squared")
fig.suptitle("Custom Chi-Squared", fontsize = 20)
 
# loop over the results
for (i, (v, k)) in enumerate(results):
	# show the result
	ax = fig.add_subplot(1, len(images), i + 1)
	ax.set_title("%.2f" % (v))
	plt.imshow(images[k])
	plt.axis("off")
 
# show the custom method
plt.show()