In [1]:
import numpy as np
import pickle
from skimage.feature import peak_local_max as plm
from skimage.filter import vsobel, hsobel
from os import listdir
from pylab import imread
from time import time
MYDIRECTORY = '/Users/sean/Desktop/50_categories' # Change this line if you wish to re-extract training feature sets
def extract_features(image_path_list):
""" Given a list of image paths of training data set, runs feature extraction and returns rectangularized feature
data set plus corresponding target list. """
feature_list = []
target_list = []
k = 100 # max number of evenly spaced percentage announcements (should be <= 100)
print("\tFeature extraction completion:")
announcements = [(i+1)*len(image_path_list)/k for i in range(k)]
for i, image_path in enumerate(image_path_list):
image_array = imread(image_path)
category = image_path.split('/')[-2]
target_list.append(category) # target = name of category
feature_list.append([feature_1(image_array), # image size
feature_2(image_array), # mean red-channel
feature_3(image_array), # mean green-channel
feature_4(image_array), # mean blue-channel
feature_5(image_array), # mean luminosity
feature_6(image_array), # median luminosity
feature_7(image_array), # standard deviation luminosity
feature_8(image_array), # median red-channel
feature_9(image_array), # median green-channel
feature_10(image_array), # median blue-channel
feature_11(image_array), # standard deviation red-channel
feature_12(image_array), # standard deviation green-channel
feature_13(image_array), # standard deviation blue-channel
feature_14(image_array), # mean luminosity of vertical edge map
feature_15(image_array), # median luminosity of vertical edge map
feature_16(image_array), # standard deviation luminosity of vertical edge map
feature_17(image_array), # mean luminosity of horizontal edge map
feature_18(image_array), # median luminosity of horizontal edge map
feature_19(image_array), # standard deviation luminosity of horizontal edge map
feature_20(image_array), # pixels above threshold lum for horizontal edge map
feature_21(image_array), # pixels above threshold lum for vertical edge map
feature_22(image_array), # aspect ratio of image
feature_23(image_array) # number of image peaks
])
# Give the user progress updates regarding how far along feature extraction is (only works if not parallelized)
if (i+1) in announcements:
print("{0:.0f}%...".format(100.0*i/len(image_path_list))),
print('')
return [ np.array(feature_list), np.array(target_list) ] # easier indexing
#----------------------- Features list -----------------------
def feature_1(image_array):
""" Return the size of the image, in pixels """
return image_array.size
def feature_2(image_array):
""" Return the average red-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return image_array[:,:,0].mean()
else:
return image_array.mean()
def feature_3(image_array):
""" Return the average blue-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return image_array[:,:,1].mean()
else:
return image_array.mean()
def feature_4(image_array):
""" Return the average green-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return image_array[:,:,2].mean()
else:
return image_array.mean()
def feature_5(image_array):
""" Return the average luminosity value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return image_array.mean(axis=2).mean()
else:
return image_array.mean()
def feature_6(image_array):
""" Returns the median pixels luminosity """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.median(image_array)
def feature_7(image_array):
""" Returns the standard deviation of the pixels' luminosity """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.std(image_array)
def feature_8(image_array):
""" Return the median red-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.median(image_array[:,:,0])
else:
return np.median(image_array)
def feature_9(image_array):
""" Return the median blue-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.median(image_array[:,:,1])
else:
return np.median(image_array)
def feature_10(image_array):
""" Return the median green-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.median(image_array[:,:,2])
else:
return np.median(image_array)
def feature_11(image_array):
""" Return the standard deviation of the red-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.std(image_array[:,:,0])
else:
return np.std(image_array)
def feature_12(image_array):
""" Return the the standard deviation of the blue-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.std(image_array[:,:,1])
else:
return np.std(image_array)
def feature_13(image_array):
""" Return the the standard deviation of the green-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.std(image_array[:,:,2])
else:
return np.std(image_array)
def feature_14(image_array):
""" Return the average luminosity for vertical edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.mean(vsobel(image_array))
def feature_15(image_array):
""" Returns the median luminosity for vertical edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.median(vsobel(image_array))
def feature_16(image_array):
""" Returns the standard deviation of the luminosity for vertical edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.std(vsobel(image_array))
def feature_17(image_array):
""" Return the average luminosity for horizontal edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.mean(hsobel(image_array))
def feature_18(image_array):
""" Returns the median luminosity for horizontal edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.median(hsobel(image_array))
def feature_19(image_array):
""" Returns the standard deviation of the luminosity for horizontal edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.std(hsobel(image_array))
def feature_20(image_array):
""" Returns the fraction of pixels above a threshold of the luminosity
for the horizontal edges map """
thresh = 20 # Based on looking at histograms of edge maps of pictures
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
h_edge = hsobel(image_array)
return 1.0*sum((h_edge >= thresh).flatten())/h_edge.size
def feature_21(image_array):
""" Returns the fraction of pixels above a threshold of the luminosity
for the vertical edges map """
thresh = 20 # Based on looking at histograms of edge maps of pictures
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
v_edge = vsobel(image_array)
return 1.0*sum((v_edge >= thresh).flatten())/v_edge.size
def feature_22(image_array):
""" Returns the aspect ratio of the image """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
(height, width) = image_array.shape
return 1.0*height/width
def feature_23(image_array):
""" Returns the number of image peaks """
return len(plm(image_array, min_distance=50))
In [3]:
num_categs = 4 # actual number of categories is one less than this b/c of ".DS_Store"
num_pics = 5
image_paths = []
image_names = []
categories = listdir(MYDIRECTORY)
for category in categories:#[:num_categs]:
if category[0] != '.':
image_names = listdir(MYDIRECTORY + "/" + category)
for name in image_names:#[:num_pics]:
image_paths.append(MYDIRECTORY + "/" + category + "/" + name)
else:
print "bad category '" + category +"' was skipped!"
np.random.shuffle(image_paths) # randomize the data set
print "\t Now beginning Feature extraction..."
before_extraction = time()
features = extract_features(image_paths)
after_extraction = time()
pickle.dump( features, open( "extracted_features.p", "wb" ) )
print("Feature extraction complete after {0:.2f} seconds, or {1:.4f} seconds per image, for {2:.0f} total images."\
.format(after_extraction-before_extraction,(after_extraction-before_extraction)/float(len(image_paths)),
len(image_paths)))
print("Feature set contains {0:.0f} instances, each with {1:.0f} features.".format(features[0].shape[0], features[0].shape[1]))
print("Target set contains {0:.0f} unique classes.".format(len(np.unique(features[1]))))
In [ ]: