In [ ]:
def run_final_classifier(path):
    # import
#!/usr/bin/env python
    import matplotlib.pyplot as plt
    from glob import glob
    import numpy as np
    import scipy.ndimage as ndimg
    import skimage.filter as filter
    import skimage.transform as transform
    import re
    from os import listdir
    from multiprocessing import Pool, cpu_count
    from pylab import imread
    from time import time
    import pickle
    from sklearn.ensemble import RandomForestClassifier
    
    def row(img2d):
        return np.shape(img2d)[0]
        
    def col(img2d):
        return np.shape(img2d)[1]
    
    def layer_mean(img2d):
        img1d = img2d[img2d<255]
        return img1d.mean()
    
    def hist_max(img2d):
        img1d = img2d[img2d<255]
        [n, bins, patches]= plt.hist(img1d, np.arange(255))
        return bins[n.argmax()]
    
    def edge_length(img2d):
        "gray input"
        imgsize = float((img2d.shape[0]*img2d.shape[1]))
        med_filter = ndimg.median_filter(img2d, size = (5,5))
        edges = filter.canny(med_filter,3)
        return edges.sum()/imgsize
    
    def edge_sobel_h(img2d):
        "gray input"
        imgsize = float((img2d.shape[0]*img2d.shape[1]))
        med_filter = ndimg.median_filter(img2d, size = (5,5))
        edges_h = filter.hsobel(med_filter/255.)
        return edges_h.sum()/imgsize
    
    def edge_sobel_v(img2d):
        "gray input"
        imgsize = float((img2d.shape[0]*img2d.shape[1]))
        med_filter = ndimg.median_filter(img2d, size = (5,5))
        edges_v = filter.vsobel(med_filter/255.)
        return edges_v.sum()/imgsize
    
    def edge_sobel(img2d):
        "gray input"
        imgsize = float((img2d.shape[0]*img2d.shape[1]))
        med_filter = ndimg.median_filter(img2d, size = (5,5))
        edges = filter.sobel(med_filter/255.)
        return edges.sum()/imgsize
    
    def houghLine(img2d):
        "gray input"
        med_filter = ndimg.median_filter(img2d, size = (5,5))
        edges = filter.sobel(med_filter/255.)
        [H,theta,distances] = transform.hough_line(edges);
        imgsize = float(len(theta)*len(distances))
        return H.sum()/imgsize
    
    def cate_extract(image_path):
        cate_temp = image_path.replace(MYDIRECTORY,'')
        cate = re.search(r'/.+?/', cate_temp)
        return cate_map[cate.group()[1:-1]]
    
    def features(image_path):
        #red mean
        img2d = ndimg.imread(image_path)
        img2d_gray = ndimg.imread(image_path, flatten= True)
        
        row_n = row(img2d_gray)
        col_n = col(img2d_gray)
        
        red_mean = layer_mean(img2d[...,0])
        green_mean = layer_mean(img2d[...,1])
        blue_mean = layer_mean(img2d[...,2])
        gray_mean = layer_mean(img2d_gray)
        
        red_most = hist_max(img2d[...,0])
        green_most = hist_max(img2d[...,1])
        blue_most = hist_max(img2d[...,2])
        gray_most = hist_max(img2d_gray)
        
        length = edge_length(img2d_gray)
        sobel_h = edge_sobel_h(img2d_gray)
        sobel_v = edge_sobel_v(img2d_gray)
        sobel = edge_sobel(img2d_gray)
        hough = houghLine(img2d_gray)
        
        cate = cate_extract(image_path)
        
        return list([row_n, col_n, red_mean, green_mean, blue_mean, gray_mean,\
                         red_most,green_most,blue_most, gray_most,\
                         length, sobel_h, sobel_v, sobel, hough, cate])
        
    #!/usr/bin/env python
    """
    AY 250 - Scientific Research Computing with Python
    Homework Assignment 4 - Parallel Feature Extraction Example
    Author: Christopher Klein, Joshua Bloom
    """
    ## CHANGE THIS NEXT LINE!
    MYDIRECTORY = path
    
    # FUNCTION DEFINITIONS
    # Quick function to divide up a large list into multiple small lists, 
    # attempting to keep them all the same size. 
    def split_seq(seq, size):
            newseq = []
            splitsize = 1.0/size*len(seq)
            for i in range(size):
                newseq.append(seq[int(round(i*splitsize)):
                    int(round((i+1)*splitsize))])
            return newseq
    # Our simple feature extraction function. It takes in a list of image paths, 
    # does some measurement on each image, then returns a list of the image paths
    # paired with the results of the feature measurement.
    def extract_features(image_path_list):
        feature_list = []
        for image_path in image_path_list:
            image_array = imread(image_path)
            ft = features(image_path)
            
            #ft = image_array.shape # This feature is simple. You can modify this
            # code to produce more complicated features and to produce multiple
            # features in one function call.
            feature_list.append(ft)
        return feature_list
    ### Main program starts here ###################################################
    # We first collect all the local paths to all the images in one list
    image_paths = []
    imgsList = listdir(MYDIRECTORY)
    
    # Load test Images
    for name in imgsList:
        image_paths.append(MYDIRECTORY+'/'+name)
    
    print 'filename' + 15*' '+ 'predicted_class'
    print '_ '*20
    # Then, we run the feature extraction function using multiprocessing.Pool so 
    # so that we can parallelize the process and run it much faster.
    #numprocessors = cpu_count() # To see results of parallelizing, set numprocessors
    # to less than cpu_count().
    
    #create an array to store features
    #nFeature = 13
    #features_array = np.zeros(len(image_paths), nFeature)
    
    #Using multicores
    numprocessors = cpu_count()
    
    # We have to cut up the image_paths list into the number of processes we want to
    # run. 
    split_image_paths = split_seq(image_paths, numprocessors)
    
    # Ok, this block is where the parallel code runs. We time it so we can get a 
    # feel for the speed up.
    start_time = time()
    p = Pool(numprocessors)
    result = p.map_async(extract_features, split_image_paths)
    poolresult = result.get()
    end_time = time()
    #DATA = np.vstack([poolresult[i] for i in range(np.shape(poolresult)[0])])
    # All done, print timing results.
    print ("Finished extracting features. Total time: " + 
        str(round(end_time-start_time, 3)) + " s, or " + 
        str( round( (end_time-start_time)/len(image_paths), 5 ) ) + " s/image.")
    # This took about 10-11 seconds on my 2.2 GHz, Core i7 MacBook Pro. It may also
    # be affected by hard disk read speeds.
    
    # To tidy-up a bit, we loop through the poolresult to create a final list of
    # the feature extraction results for all images.
    combined_result = []
    for single_proc_result in poolresult:
        for single_image_result in single_proc_result:
            combined_result.append(single_image_result)
    X_test = np.array(combined_result)
    # X_test are the features of the test images
    # load the trained classifier
    clf = pickle.load(open('trained_classifier.p', 'rb'))
    #load the category table
    cate_map = dict(zip(range(len(categories)),categories))
    # predict the category for the test image
    Y_pred_num = clf.predict(X_test)
    #printing
    for i in len(Y_pred_num):
        filename = image_paths[i].replace(path,'')
        print filename[1:]+ ' '*6 + cate_map[Y_pred_num[i]]