In [3]:
# imports
import numpy as np
import pickle
from skimage.feature import peak_local_max as plm
from skimage.filter import vsobel, hsobel
from os import listdir
from pylab import imread
from time import time
def extract_features(image_path_list):
""" Given a list of directories to image files, extract features from images and return a single
numpy array containing those features, appropriately formatted for classifier prediction. """
feature_list = []
k = 100 # number of evenly spaced percentage announcements (should be <= 100)
print(" Feature extraction completion:")
announcements = [(i+1)*len(image_path_list)/k for i in range(k)] # Announce the % complete to the user at these points
for i, image_path in enumerate(image_path_list):
image_array = imread(image_path)
feature_list.append([feature_1(image_array), # image size
feature_2(image_array), # mean red-channel
feature_3(image_array), # mean green-channel
feature_4(image_array), # mean blue-channel
feature_5(image_array), # mean luminosity
feature_6(image_array), # median luminosity
feature_7(image_array), # standard deviation luminosity
feature_8(image_array), # median red-channel
feature_9(image_array), # median green-channel
feature_10(image_array), # median blue-channel
feature_11(image_array), # standard deviation red-channel
feature_12(image_array), # standard deviation green-channel
feature_13(image_array), # standard deviation blue-channel
feature_14(image_array), # mean luminosity of vertical edge map
feature_15(image_array), # median luminosity of vertical edge map
feature_16(image_array), # standard deviation luminosity of vertical edge map
feature_17(image_array), # mean luminosity of horizontal edge map
feature_18(image_array), # median luminosity of horizontal edge map
feature_19(image_array), # standard deviation luminosity of horizontal edge map
feature_20(image_array), # pixels above threshold lum for horizontal edge map
feature_21(image_array), # pixels above threshold lum for vertical edge map
feature_22(image_array), # aspect ratio of image
feature_23(image_array) # number of image peaks
])
# Give the user progress updates regarding how far along feature extraction is (only works if not parallel)
if (i+1) in announcements:
print("{0:.0f}%...".format(100.0*i/len(image_path_list))),
print('')
return np.array(feature_list) # easier indexing
#--------------------------------- Start Features list ---------------------------------
def feature_1(image_array):
""" Return the size of the image, in pixels """
return image_array.size
def feature_2(image_array):
""" Return the average red-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return image_array[:,:,0].mean()
else:
return image_array.mean()
def feature_3(image_array):
""" Return the average blue-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return image_array[:,:,1].mean()
else:
return image_array.mean()
def feature_4(image_array):
""" Return the average green-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return image_array[:,:,2].mean()
else:
return image_array.mean()
def feature_5(image_array):
""" Return the average luminosity value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return image_array.mean(axis=2).mean()
else:
return image_array.mean()
def feature_6(image_array):
""" Returns the median pixels luminosity """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.median(image_array)
def feature_7(image_array):
""" Returns the standard deviation of the pixels' luminosity """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.std(image_array)
def feature_8(image_array):
""" Return the median red-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.median(image_array[:,:,0])
else:
return np.median(image_array)
def feature_9(image_array):
""" Return the median blue-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.median(image_array[:,:,1])
else:
return np.median(image_array)
def feature_10(image_array):
""" Return the median green-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.median(image_array[:,:,2])
else:
return np.median(image_array)
def feature_11(image_array):
""" Return the standard deviation of the red-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.std(image_array[:,:,0])
else:
return np.std(image_array)
def feature_12(image_array):
""" Return the the standard deviation of the blue-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.std(image_array[:,:,1])
else:
return np.std(image_array)
def feature_13(image_array):
""" Return the the standard deviation of the green-channel value for the picture (in 0-255 scale) """
if len(image_array.shape) == 3:
return np.std(image_array[:,:,2])
else:
return np.std(image_array)
def feature_14(image_array):
""" Return the average luminosity for vertical edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.mean(vsobel(image_array))
def feature_15(image_array):
""" Returns the median luminosity for vertical edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.median(vsobel(image_array))
def feature_16(image_array):
""" Returns the standard deviation of the luminosity for vertical edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.std(vsobel(image_array))
def feature_17(image_array):
""" Return the average luminosity for horizontal edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.mean(hsobel(image_array))
def feature_18(image_array):
""" Returns the median luminosity for horizontal edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.median(hsobel(image_array))
def feature_19(image_array):
""" Returns the standard deviation of the luminosity for horizontal edges map """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
return np.std(hsobel(image_array))
def feature_20(image_array):
""" Returns the fraction of pixels above a threshold of the luminosity
for the horizontal edges map """
thresh = 20 # Based on looking at histograms of edge maps of pictures
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
h_edge = hsobel(image_array)
return 1.0*sum((h_edge >= thresh).flatten())/h_edge.size
def feature_21(image_array):
""" Returns the fraction of pixels above a threshold of the luminosity
for the vertical edges map """
thresh = 20 # Based on looking at histograms of edge maps of pictures
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
v_edge = vsobel(image_array)
return 1.0*sum((v_edge >= thresh).flatten())/v_edge.size
def feature_22(image_array):
""" Returns the aspect ratio of the image """
if len(image_array.shape) == 3:
image_array = image_array.mean(axis=2)
(height, width) = image_array.shape
return 1.0*height/width
def feature_23(image_array):
""" Returns the number of image peaks """
return len(plm(image_array, min_distance=50))
#--------------------------------- End Features list ---------------------------------
def collect_paths(imDirectory):
""" Given a validation directory, returns a list of image path strings for that directory. Assumes
no sub-folders within the directory; just image files. """
image_paths = []
image_names_filtered = [] # remove any files beginning with .
image_names = listdir(imDirectory)
for name in image_names:
if name[0] != '.':
image_paths.append(imDirectory + "/" + name)
image_names_filtered.append(name)
else:
print "bad image '" + name +"' was skipped!"
return (image_paths, image_names_filtered)
def generate_feature_set(images_directory):
""" takes in the directory for the validation set, and creates / returns a feature set correctly
formatted for prediction by the classifier """
image_paths = collect_paths(images_directory)
print "\t Now beginning rectangularization of validation images..."
before_extraction = time()
features = extract_features(image_paths[0])
after_extraction = time()
print("\nFeature extraction complete after {0:.2f} seconds, or {1:.4f} seconds per image, for {2:.0f} total images."\
.format(after_extraction-before_extraction,(after_extraction-before_extraction)/float(len(image_paths[0])),
len(image_paths[0])))
print("Feature set contains {0:.0f} instances, each with {1:.0f} extracted features."\
.format(features.shape[0], features.shape[1]))
return (features, image_paths[1])
#--------------------------------- Main Function ---------------------------------
def run_final_classifier(path, forest="./trained_classifier.p"):
""" Main function. path = path to directory of validation images, forest = trained_classifier pickle,
generated from hw4_classifier_dev ipython notebook. Creates a file with predictions of classes
of validation images """
clf = pickle.load( open( forest, "rb" ) ) # load up the classifier
(X,val_images) = generate_feature_set(path) # generate feature set from the images
Y_pred = clf.predict(X)
# Create output file
my_str = "filename" + " "*22 + "predicted_class\n" + \
"---------------------------------------------\n"
for i, prediction in enumerate(Y_pred):
my_str += val_images[i] + " "*(30-len(val_images[i])) + prediction + "\n"
with open("Output.txt", "w") as text_file:
text_file.write(my_str)
print "\nSee the file 'Output.txt' for the classifier's predictions."
return (Y_pred, val_images)
#--------------------------------- Main Program & Instructions ---------------------------------
if __name__ == '__main__':
print("Use the function: (predictions, file_names) = run_final_classifier(path, forest) to evaluate classifier on validation set")
In [7]:
(predictions, file_names) = run_final_classifier('/path_to/validation_image_set/goes_here')