Vehicle Detection and Tracking

Author: Sergey Morozov


Some of the functions below were provided by Udacity. Author modified some of them and added original code.

Project Steps

  • Perform a Histogram of Oriented Gradients (HOG) feature extraction on a labeled training set of images and train a classifier Linear SVM classifier.
  • Apply a color transform and append binned color features, as well as histograms of color, to your HOG feature vector.
  • Implement a sliding-window technique and use your trained classifier to search for vehicles in images.
  • Run pipeline on a video stream and create a heat map of recurring detections frame by frame to reject outliers and follow detected vehicles.
  • Estimate a bounding box for vehicles detected.

Training Data Extraction

Archive with vehicle and non-vehicle images should be manually downloaded and placed in the root of this repository.

Extract Data

import os
import zipfile
import shutil

# Extract vehicle images
if not (os.path.isdir("vehicle_images")):
    zip_ref = zipfile.ZipFile("", 'r')
    shutil.move("vehicles", "vehicle_images")
# Extract non-vehicle images
if not (os.path.isdir("non-vehicle_images")):
    zip_ref = zipfile.ZipFile("", 'r')
    shutil.move("non-vehicles", "non-vehicle_images")

Explore Data

import os
import cv2
import glob
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline

def data_look(car_list, notcar_list):
    """Extract useful information about provided data."""
    data_dict = {}
    # Define a key in data_dict "n_cars" and store the number of car images
    data_dict["n_cars"] = len(car_list)
    # Define a key "n_notcars" and store the number of notcar images
    data_dict["n_notcars"] = len(notcar_list)
    # Read in a test image, either car or notcar
    example_img = cv2.imread(car_list[0])
    # Define a key "image_shape" and store the test image shape 3-tuple
    data_dict["image_shape"] = example_img.shape
    # Define a key "data_type" and store the data type of the test image.
    data_dict["data_type"] = example_img.dtype
    # Return data_dict
    return data_dict

# List containing paths to training images
vehicle_paths = glob.glob("vehicle_images/*/*.png")
nonvehicle_paths = glob.glob("non-vehicle_images/*/*.png")

# Extract useful information from data
data_info = data_look(vehicle_paths, nonvehicle_paths)

# Define useful constants
cars_cnt = data_info['n_cars']
notcars_cnt = data_info['n_notcars']
image_shape = data_info['image_shape']
data_type = data_info['data_type']

# Print extracted information
print("Vehicle image count:", cars_cnt)
print("Non-vehicle image count:", notcars_cnt)
print("Image shape:", image_shape)
print("Image type:", data_type)

# Show example images from each category
fig, axs = plt.subplots(1,8, figsize=(16, 2))

for i in range(4):
    img = mpimg.imread(vehicle_paths[i * i * 3])
for i in range(4,8):
    img = mpimg.imread(nonvehicle_paths[i * i * 3])

# Create directory where to save output images
if not os.path.isdir("output_images"):

plt.savefig('output_images/data_examples.png', bbox_inches="tight")

Feature Extraction

Histogram of Color

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

def color_hist(img, nbins=32, bins_range=(0, 256), features_only=True):
    """Function to compute color histogram features."""
    # Compute the histogram of the color channels separately
    channel1_hist = np.histogram(img[:,:,0], bins=nbins, range=bins_range)
    channel2_hist = np.histogram(img[:,:,1], bins=nbins, range=bins_range)
    channel3_hist = np.histogram(img[:,:,2], bins=nbins, range=bins_range)
    # Concatenate the histograms into a single feature vector
    hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    # Return the individual histograms, bin_centers and feature vector
    if features_only:
        return hist_features
        # Generating bin centers
        bin_edges = channel1_hist[1]
        bin_centers = (bin_edges[1:]  + bin_edges[0:len(bin_edges)-1])/2
        return channel1_hist, channel2_hist, channel3_hist, bin_centers, hist_features

# Draw historgam for each color channel
# As an example consider BGR color space    
fig, axs = plt.subplots(2,4, figsize=(16, 6))

# Draw histograms for vehicle
vehicle_img = cv2.imread(vehicle_paths[0])
nonvehicle_img = cv2.imread(nonvehicle_paths[0])
imgs_for_hist = [(vehicle_paths[0], "Vehicle"), (nonvehicle_paths[0], "Non-Vehicle")]

for i in range(2):
    image = cv2.imread(imgs_for_hist[i][0])
    ch1_hist, ch2_hist, ch3_hist, bincen, feature_vec = \
        color_hist(image, nbins=32, bins_range=(0, 256), features_only=False)
    axs[i][1].bar(bincen, ch1_hist[0])
    axs[i][1].set_xlim([0, 256])
    axs[i][1].set_title('B Histogram')
    axs[i][2].bar(bincen, ch2_hist[0])
    axs[i][2].set_xlim([0, 256])
    axs[i][2].set_title('G Histogram')
    axs[i][3].bar(bincen, ch3_hist[0])
    axs[i][3].set_xlim([0, 256])
    axs[i][3].set_title('R Histogram')

# Create directory where to save output images
if not os.path.isdir("output_images"):

plt.savefig('output_images/histogram_of_color.png', bbox_inches="tight")

Color Distribution

import cv2
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

def plot3d(pixels, colors_rgb, fig, pos,
           axis_limits=[(0, 255), (0, 255), (0, 255)]):
    """Plot pixels in 3D."""

    # 3D axes
    ax = fig.add_subplot(pos, projection='3d')

    # Set axis limits

    # Set axis labels and sizes
    ax.tick_params(axis='both', which='major', labelsize=7, pad=1)
    ax.set_xlabel(axis_labels[0], fontsize=10, labelpad=1)
    ax.set_ylabel(axis_labels[1], fontsize=10, labelpad=1)
    ax.set_zlabel(axis_labels[2], fontsize=10, labelpad=1)

    # Plot pixel values with colors given in colors_rgb
        pixels[:, :, 0].ravel(),
        pixels[:, :, 1].ravel(),
        pixels[:, :, 2].ravel(),
        c=colors_rgb.reshape((-1, 3)), edgecolors='none')

    return ax  # return Axes3D object for further manipulation

# Read a color image
img = cv2.imread("test_images/test4.jpg")

# Select a small fraction of pixels to plot by subsampling it
scale = max(img.shape[0], img.shape[1], 64) / 64  # at most 64 rows and columns
img_small = cv2.resize(img, ([1] / scale),[0] / scale)), interpolation=cv2.INTER_NEAREST)

# Convert subsampled image to desired color space(s)
img_small_RGB = cv2.cvtColor(img_small, cv2.COLOR_BGR2RGB)  # OpenCV uses BGR, matplotlib likes RGB
img_small_HSV = cv2.cvtColor(img_small, cv2.COLOR_BGR2HSV)
img_small_HLS = cv2.cvtColor(img_small, cv2.COLOR_BGR2HLS)
img_small_LUV = cv2.cvtColor(img_small, cv2.COLOR_BGR2LUV)
img_small_YUV = cv2.cvtColor(img_small, cv2.COLOR_BGR2YUV)
img_small_YCrCb = cv2.cvtColor(img_small, cv2.COLOR_BGR2YCrCb)
img_small_rgb = img_small_RGB / 255.  # scaled to [0, 1], only for plotting

fig = plt.figure(figsize=(15,10))
# Plot and show
ax = fig.add_subplot(332)
ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plot3d(img_small_RGB, img_small_rgb, fig, 334)
plot3d(img_small_HSV, img_small_rgb, fig, 335, axis_labels=list("HSV"))
plot3d(img_small_HLS, img_small_rgb, fig, 336, axis_labels=list("HLS"))
plot3d(img_small_LUV, img_small_rgb, fig, 337, axis_labels=list("LUV"))
plot3d(img_small_YUV, img_small_rgb, fig, 338, axis_labels=list("YUV"))
plot3d(img_small_YCrCb, img_small_rgb, fig, 339, axis_labels=['Y', 'Cr', 'Cb'])

# Create directory where to save output images
if not os.path.isdir("output_images"):
plt.savefig('output_images/color_distribution.png', bbox_inches="tight")

Spatial Binning of Color

In [ ]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

def bin_spatial(img, size=(32, 32)):
    """Function to compute binned color features."""
    # Use cv2.resize().ravel() to create the feature vector
    features = cv2.resize(img, size).ravel() 
    # Return the feature vector
    return features

# Read test image
img = cv2.imread("test_images/test4.jpg")

# Convert subsampled image to desired color space(s)
img_RGB = cv2.cvtColor(img_small, cv2.COLOR_BGR2RGB)  # OpenCV uses BGR, matplotlib likes RGB
img_HSV = cv2.cvtColor(img_small, cv2.COLOR_BGR2HSV)
img_HLS = cv2.cvtColor(img_small, cv2.COLOR_BGR2HLS)
img_LUV = cv2.cvtColor(img_small, cv2.COLOR_BGR2LUV)
img_YUV = cv2.cvtColor(img_small, cv2.COLOR_BGR2YUV)
img_YCrCb = cv2.cvtColor(img_small, cv2.COLOR_BGR2YCrCb)
fig, axs = plt.subplots(2,3, figsize=(16, 6))








# Create directory where to save output images
if not os.path.isdir("output_images"):
plt.savefig('output_images/spatial_binning.png', bbox_inches="tight")

Histogram of Oriented Gradients

import cv2
import numpy as np
from skimage.feature import hog
import matplotlib.pyplot as plt
%matplotlib inline

def get_hog_features(img, orient, pix_per_cell, cell_per_block, vis=False, feature_vec=True):
    """Function to return HOG features and visualization."""
    if vis == True:
        features, hog_image = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=False, 
                                  visualise=True, feature_vector=False)
        return features, hog_image
        features = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                       cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=False, 
                       visualise=False, feature_vector=feature_vec)
        return features
# Read in the image
image = cv2.imread(vehicle_paths[123])
img_RGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # OpenCV uses BGR, matplotlib likes RGB
img_HSV = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
img_HLS = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
img_LUV = cv2.cvtColor(image, cv2.COLOR_BGR2LUV)
img_YUV = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
img_YCrCb = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)

imgs = [(img_RGB, list("RGB")), 
        (img_HSV, list("HSV")), 
        (img_HLS, list("HSL")), 
        (img_LUV, list("LUV")), 
        (img_YUV, list("YUV")), 
        (img_YCrCb, ['Y', 'Cr', 'Cb'])]

# Define HOG parameters
orient = 12
pix_per_cell = 8
cell_per_block = 1

fig = plt.figure(figsize=(32,32))

ax = fig.add_subplot(732)

for i in range(len(imgs)):
    pos = (i + 1) * 3 + 1
    features, hog_image = get_hog_features(imgs[i][0][:,:,0], orient, 
                                           pix_per_cell, cell_per_block, 
                                           vis=True, feature_vec=False)
    ax = fig.add_subplot(7, 3, pos)
    ax.imshow(hog_image, cmap='gray')
    pos += 1
    features, hog_image = get_hog_features(imgs[i][0][:,:,1], orient, 
                                           pix_per_cell, cell_per_block, 
                                           vis=True, feature_vec=False)
    ax = fig.add_subplot(7, 3, pos)
    ax.imshow(hog_image, cmap='gray')
    pos += 1
    features, hog_image = get_hog_features(imgs[i][0][:,:,2], orient, 
                                           pix_per_cell, cell_per_block, 
                                           vis=True, feature_vec=False)
    ax = fig.add_subplot(7, 3, pos)
    ax.imshow(hog_image, cmap='gray')
# Create directory where to save output images
if not os.path.isdir("output_images"):
plt.savefig('output_images/hog.png', bbox_inches="tight")

Train Linear Support Vector Machine Classifier

Define Parameters

# Parameters to be tweaked
color_space = 'HLS' # Can be BGR, HSV, LUV, HLS, YUV, YCrCb
orient = 12
pix_per_cell = 8 # HOG pixels per cell
cell_per_block = 2 # HOG cells per block
hog_channel = "ALL" # Can be 0, 1, 2, or "ALL"
spatial_size = (32, 32) # Spatial binning dimensions
hist_bins = 64    # Number of histogram bins
spatial_feat = True # Spatial features on or off
hist_feat = True # Histogram features on or off
hog_feat = True # HOG features on or off

print('Parameters were defined.')

Combine Features and Normalize

# The next step is quite long. You load scaler from the file system.

import pickle

# Load the scaler
with open('scaler.pkl', 'rb') as file:
    X_scaler = pickle.load(file)    
    print('Scaler was loaded from file',

import cv2
import pickle
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.preprocessing import StandardScaler
%matplotlib inline

def extract_features(imgs, color_space='BGR', spatial_size=(32, 32),
                        hist_bins=32, orient=9, 
                        pix_per_cell=8, cell_per_block=2, hog_channel=0,
                        spatial_feat=True, hist_feat=True, hog_feat=True):
    """Function to extract features from a list of images.
    This function calls bin_spatial(), color_hist(), and get_hog_features().
    # Create a list to append feature vectors to
    features = []
    # Iterate through the list of images
    for file in imgs:
        file_features = []
        # Read in each one by one
        image = cv2.imread(file)
        # apply color conversion if other than 'BGR'
        if color_space != 'BGR':
            if color_space == 'RGB':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            if color_space == 'HSV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
            elif color_space == 'LUV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2LUV)
            elif color_space == 'HLS':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
            elif color_space == 'YUV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
            elif color_space == 'YCrCb':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)
        else: feature_image = np.copy(image)      

        if spatial_feat == True:
            spatial_features = bin_spatial(feature_image, size=spatial_size)
        if hist_feat == True:
            # Apply color_hist()
            hist_features = color_hist(feature_image, nbins=hist_bins)
        if hog_feat == True:
        # Call get_hog_features() with vis=False, feature_vec=True
            if hog_channel == 'ALL':
                hog_features = []
                for channel in range(feature_image.shape[2]):
                                        orient, pix_per_cell, cell_per_block, 
                                        vis=False, feature_vec=True))
                hog_features = np.ravel(hog_features)        
                hog_features = get_hog_features(feature_image[:,:,hog_channel], orient, 
                            pix_per_cell, cell_per_block, vis=False, feature_vec=True)
            # Append the new feature vector to the features list
    # Return list of feature vectors
    return features

# Extract features
car_features = extract_features(vehicle_paths, color_space=color_space, 
                        spatial_size=spatial_size, hist_bins=hist_bins, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        hog_channel=hog_channel, spatial_feat=spatial_feat, 
                        hist_feat=hist_feat, hog_feat=hog_feat)
notcar_features = extract_features(nonvehicle_paths, color_space=color_space, 
                        spatial_size=spatial_size, hist_bins=hist_bins, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        hog_channel=hog_channel, spatial_feat=spatial_feat, 
                        hist_feat=hist_feat, hog_feat=hog_feat)

# Normalize data
X = np.vstack((car_features, notcar_features)).astype(np.float64)                        
# Fit a per-column scaler
X_scaler = StandardScaler().fit(X)
# Apply the scaler to X
scaled_X = X_scaler.transform(X)

# save the scaler
with open('scaler.pkl', 'wb') as file:
    pickle.dump(X_scaler, file)    
    print('Scaler was saved to',

# Visualize normalization
car_ind = 28
# Plot an example of raw and scaled features
fig = plt.figure(figsize=(12,4))
plt.title('Original Image')
plt.title('Raw Features')
plt.title('Normalized Features')

# Create directory where to save output images
if not os.path.isdir("output_images"):
plt.savefig('output_images/feature_normalization.png', bbox_inches="tight")

Train Classifier

# The next step is quite long. You load support vector classifier from the file system.

import pickle

# Load the classifier
with open('svc.pkl', 'rb') as file:
    svc = pickle.load(file)    
    print('Classifier was loaded from file',

import time
import pickle
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.cross_validation import train_test_split

# Define the labels vector
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))

# Split up data into randomized training and test sets
rand_state = np.random.randint(0, 100)
X_train, X_test, y_train, y_test = train_test_split(
    scaled_X, y, test_size=0.2, random_state=rand_state)

    'pixels per cell and', cell_per_block,'cells per block')
print('Feature vector length:', len(X_train[0]))
# Use a linear SVC 
svc = LinearSVC()
# Check the training time for the SVC
t=time.time(), y_train)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to train SVC...')
# Check the score of the SVC
print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4))
# Check the prediction time for a single sample
n_predict = 10
print('My SVC predicts: ', svc.predict(X_test[0:n_predict]))
print('For these',n_predict, 'labels: ', y_test[0:n_predict])
t2 = time.time()
print(round(t2-t, 5), 'Seconds to predict', n_predict,'labels with SVC')

# save the trained classifier
with open('svc.pkl', 'wb') as file:
    pickle.dump(svc, file)    
    print('Classifier was saved to',

Detect Vehicles

Sliding Window

import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

def slide_window(img, x_start_stop=[None, None], y_start_stop=[None, None], 
                    xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
    """Function that returns a list of bounding boxes for the search windows."""
    # If x and/or y start/stop positions not defined, set to image size
    if x_start_stop[0] == None:
        x_start_stop[0] = 0
    if x_start_stop[1] == None:
        x_start_stop[1] = img.shape[1]
    if y_start_stop[0] == None:
        y_start_stop[0] = 0
    if y_start_stop[1] == None:
        y_start_stop[1] = img.shape[0]
    # Compute the span of the region to be searched    
    xspan = x_start_stop[1] - x_start_stop[0]
    yspan = y_start_stop[1] - y_start_stop[0]
    # Compute the number of pixels per step in x/y
    nx_pix_per_step =[0]*(1 - xy_overlap[0]))
    ny_pix_per_step =[1]*(1 - xy_overlap[1]))
    # Compute the number of windows in x/y
    nx_buffer =[0]*(xy_overlap[0]))
    ny_buffer =[1]*(xy_overlap[1]))
    nx_windows = 
    ny_windows = 
    # Initialize a list to append window positions to
    window_list = []
    # Loop through finding x and y window positions
    # Note: you could vectorize this step, but in practice
    # you'll be considering windows one by one with your
    # classifier, so looping makes sense
    for ys in range(ny_windows):
        for xs in range(nx_windows):
            # Calculate window position
            startx = xs*nx_pix_per_step + x_start_stop[0]
            endx = startx + xy_window[0]
            starty = ys*ny_pix_per_step + y_start_stop[0]
            endy = starty + xy_window[1]
            # Append window position to list
            window_list.append(((startx, starty), (endx, endy)))
    # Return the list of windows
    return window_list

def draw_boxes(img, bboxes, color=(0, 0, 255), thick=6):
    """Function that draws color boxes on the output."""
    # Make a copy of the image
    imcopy = np.copy(img)
    # Iterate through the bounding boxes
    for bbox in bboxes:
        # Draw a rectangle given bbox coordinates
        cv2.rectangle(imcopy, bbox[0], bbox[1], color, thick)
    # Return the image copy with boxes drawn
    return imcopy

# Visualize the result
fig = plt.figure(figsize=(20, 15))

img_cnt = 1
for name in os.listdir("test_images"):
    # Read test image
    image = cv2.imread(os.path.join("test_images", name))

    windows = slide_window(image, x_start_stop=[None, None], y_start_stop=[400, 700], 
                           xy_window=(128, 128), xy_overlap=(0.5, 0.5))
    window_img = draw_boxes(image, windows, color=(0, 0, 255), thick=6)                    
    ax = fig.add_subplot(3, 2, img_cnt)
    ax.imshow(cv2.cvtColor(window_img, cv2.COLOR_BGR2RGB))
    img_cnt += 1


# Create directory where to save output images
if not os.path.isdir("output_images"):
plt.savefig('output_images/sliding_window.png', bbox_inches="tight")

Search and Classify

import cv2
import time
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

def single_img_features(img, color_space='BGR', spatial_size=(32, 32),
                        hist_bins=32, orient=9, 
                        pix_per_cell=8, cell_per_block=2, hog_channel=0,
                        spatial_feat=True, hist_feat=True, hog_feat=True):    
    """Function to extract features from a single image window."""
    #1) Define an empty list to receive features
    img_features = []
    #2) Apply color conversion if other than 'BGR'
    if color_space != 'BGR':
        if color_space == 'RGB':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if color_space == 'HSV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        elif color_space == 'LUV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2LUV)
        elif color_space == 'HLS':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2HLS)
        elif color_space == 'YUV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
        elif color_space == 'YCrCb':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
    else: feature_image = np.copy(img)      
    #3) Compute spatial features if flag is set
    if spatial_feat == True:
        spatial_features = bin_spatial(feature_image, size=spatial_size)
        #4) Append features to list
    #5) Compute histogram features if flag is set
    if hist_feat == True:
        hist_features = color_hist(feature_image, nbins=hist_bins)
        #6) Append features to list
    #7) Compute HOG features if flag is set
    if hog_feat == True:
        if hog_channel == 'ALL':
            hog_features = []
            for channel in range(feature_image.shape[2]):
                                    orient, pix_per_cell, cell_per_block, 
                                    vis=False, feature_vec=True))      
            hog_features = get_hog_features(feature_image[:,:,hog_channel], orient, 
                        pix_per_cell, cell_per_block, vis=False, feature_vec=True)
        #8) Append features to list

    #9) Return concatenated array of features
    return np.concatenate(img_features)

def search_windows(img, windows, clf, scaler, color_space='BGR', 
                    spatial_size=(32, 32), hist_bins=32, 
                    hist_range=(0, 256), orient=9, 
                    pix_per_cell=8, cell_per_block=2, 
                    hog_channel=0, spatial_feat=True, 
                    hist_feat=True, hog_feat=True):
    """Search cars in the given image within list of windows to be searched."""
    #1) Create an empty list to receive positive detection windows
    on_windows = []
    #2) Iterate over all windows in the list
    for window in windows:
        #3) Extract the test window from original image
        test_img = cv2.resize(img[window[0][1]:window[1][1], window[0][0]:window[1][0]], (64, 64))      
        #4) Extract features for that window using single_img_features()
        features = single_img_features(test_img, color_space=color_space, 
                            spatial_size=spatial_size, hist_bins=hist_bins, 
                            orient=orient, pix_per_cell=pix_per_cell, 
                            hog_channel=hog_channel, spatial_feat=spatial_feat, 
                            hist_feat=hist_feat, hog_feat=hog_feat)
        #5) Scale extracted features to be fed to classifier
        test_features = scaler.transform(np.array(features).reshape(1, -1))
        #6) Predict using your classifier
        prediction = clf.predict(test_features)
        #7) If positive (prediction == 1) then save the window
        if prediction == 1:
    #8) Return windows for positive detections
    return on_windows

y_start_stop = [400, 700] # Min and max in y to search in slide_window()

# Visualize the result
fig = plt.figure(figsize=(16, 16))
times = []

img_cnt = 1
for name in os.listdir("test_images"):
    # Read test image
    image = cv2.imread(os.path.join("test_images", name))

    draw_image = np.copy(image)
    tm_start = time.time()

    windows = slide_window(image, x_start_stop=[None, None], y_start_stop=y_start_stop, 
                           xy_window=(115, 115), xy_overlap=(0.8, 0.8))

    hot_windows = search_windows(image, windows, svc, X_scaler, color_space=color_space, 
                                 spatial_size=spatial_size, hist_bins=hist_bins, 
                                 orient=orient, pix_per_cell=pix_per_cell, 
                                 hog_channel=hog_channel, spatial_feat=spatial_feat, 
                                 hist_feat=hist_feat, hog_feat=hog_feat)                       

    window_img = draw_boxes(draw_image, hot_windows, color=(0, 0, 255), thick=6) 
    tm_stop = time.time()
    times.append(round(tm_stop-tm_start, 2))
    ax = fig.add_subplot(6, 2, img_cnt)
    ax.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    img_cnt += 1
    ax = fig.add_subplot(6, 2, img_cnt)
    ax.imshow(cv2.cvtColor(window_img, cv2.COLOR_BGR2RGB))
    img_cnt += 1


print("Average time per image:", sum(times)/len(times), "sec")

# Create directory where to save output images
if not os.path.isdir("output_images"):
plt.savefig('output_images/search_and_classify.png', bbox_inches="tight")

Detection is quite good, but very slow. It can not be used in real time systems.

Faster Search (HOG Sub-Sampling)

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pickle
import cv2
%matplotlib inline

def find_cars(img, ystart, ystop, scale, clf, X_scaler, 
              orient, pix_per_cell, cell_per_block, spatial_size, 
              hist_bins, color_space, bins_range):
    draw_img = np.copy(img)
    img_tosearch = img[ystart:ystop,:,:]
    if color_space != 'BGR':
        if color_space == 'RGB':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2RGB)
        if color_space == 'HSV':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2HSV)
        elif color_space == 'LUV':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2LUV)
        elif color_space == 'HLS':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2HLS)
        elif color_space == 'YUV':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2YUV)
        elif color_space == 'YCrCb':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2YCrCb)
    else: img_tosearch = np.copy(img_tosearch) 
    if scale != 1:
        imshape = img_tosearch.shape
        img_tosearch = cv2.resize(img_tosearch, ([1]/scale),[0]/scale)))
    ch1 = img_tosearch[:,:,0]
    ch2 = img_tosearch[:,:,1]
    ch3 = img_tosearch[:,:,2]

    # Define blocks and steps as above
    nxblocks = (ch1.shape[1] // pix_per_cell)-1
    nyblocks = (ch1.shape[0] // pix_per_cell)-1 
    nfeat_per_block = orient*cell_per_block**2
    # 64 was the orginal sampling rate, with 8 cells and 8 pix per cell
    window = image_shape[0]
    nblocks_per_window = (window // pix_per_cell)-1 
    cells_per_step = 2  # Instead of overlap, define how many cells to step
    nxsteps = (nxblocks - nblocks_per_window) // cells_per_step
    nysteps = (nyblocks - nblocks_per_window) // cells_per_step
    # Compute individual channel HOG features for the entire image
    hog1 = get_hog_features(ch1, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog2 = get_hog_features(ch2, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog3 = get_hog_features(ch3, orient, pix_per_cell, cell_per_block, feature_vec=False)
    for xb in range(nxsteps):
        for yb in range(nysteps):
            ypos = yb*cells_per_step
            xpos = xb*cells_per_step
            # Extract HOG for this patch
            hog_feat1 = hog1[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_feat2 = hog2[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_feat3 = hog3[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_features = np.hstack((hog_feat1, hog_feat2, hog_feat3))

            xleft = xpos*pix_per_cell
            ytop = ypos*pix_per_cell

            # Extract the image patch
            subimg = cv2.resize(img_tosearch[ytop:ytop+window, xleft:xleft+window], (64,64))
            # Get color features
            spatial_features = bin_spatial(subimg, size=spatial_size)
            hist_features = color_hist(subimg, nbins=hist_bins, bins_range=bins_range)

            # Scale features and make a prediction
            test_features = X_scaler.transform(np.hstack((spatial_features, hist_features,
                                                          hog_features)).reshape(1, -1))       
            test_prediction = clf.predict(test_features)
            if test_prediction == 1:
                xbox_left =*scale)
                ytop_draw =*scale)
                win_draw =*scale)
                cv2.rectangle(draw_img,(xbox_left, ytop_draw+ystart),
                bbox_list.append(((xbox_left, ytop_draw+ystart),
    return bbox_list

y_start_stop = [400, 700]

# Visualize the result
fig = plt.figure(figsize=(16, 16))
times = []

img_cnt = 1
for name in os.listdir("test_images"):
    # Read test image
    image = cv2.imread(os.path.join("test_images", name))

    draw_image = np.copy(image)
    tm_start = time.time()

    bbox_list = find_cars(image, y_start_stop[0], y_start_stop[1], 1, svc, X_scaler, 
                          orient, pix_per_cell, cell_per_block, spatial_size,
                          hist_bins, color_space, bins_range)
    draw_image = draw_boxes(draw_image, bbox_list)
    tm_stop = time.time()
    times.append(round(tm_stop-tm_start, 2))
    ax = fig.add_subplot(6, 2, img_cnt)
    ax.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    img_cnt += 1
    ax = fig.add_subplot(6, 2, img_cnt)
    ax.imshow(cv2.cvtColor(draw_image, cv2.COLOR_BGR2RGB))
    img_cnt += 1


print("Average time per image:", sum(times)/len(times), "sec")

# Create directory where to save output images
if not os.path.isdir("output_images"):
plt.savefig('output_images/fast_search_and_classify.png', bbox_inches="tight")

Detection is quite good and fast enough. On a very-very good laptop it can process video in near real time.

Filter False Positives

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pickle
import cv2
from scipy.ndimage.measurements import label

def add_heat(heatmap, bbox_list):
    # Iterate through list of bboxes
    for box in bbox_list:
        # Add += 1 for all pixels inside each bbox
        # Assuming each "box" takes the form ((x1, y1), (x2, y2))
        heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1

    # Return updated heatmap
    return heatmap# Iterate through list of bboxes
def apply_threshold(heatmap, threshold):
    # Zero out pixels below the threshold
    heatmap[heatmap <= threshold] = 0
    # Return thresholded map
    return heatmap

def draw_labeled_bboxes(img, labels):
    # Iterate through all detected cars
    for car_number in range(1, labels[1]+1):
        # Find pixels with each car_number label value
        nonzero = (labels[0] == car_number).nonzero()
        # Identify x and y values of those pixels
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        # Define a bounding box based on min/max x and y
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        # Draw the box on the image
        cv2.rectangle(img, bbox[0], bbox[1], (0,0,255), 6)
    # Return the image
    return img

y_start_stop = [400, 700] # Min and max in y to search in slide_window()

# Visualize the result
fig = plt.figure(figsize=(16, 16))

img_cnt = 1
for name in os.listdir("test_images"):
    # Read test image
    image = cv2.imread(os.path.join("test_images", name))
    heat = np.zeros_like(image[:,:,0]).astype(np.float)

    draw_image = np.copy(image)
    bbox_list = find_cars(image, y_start_stop[0], y_start_stop[1], 1, svc, X_scaler, 
                          orient, pix_per_cell, cell_per_block, spatial_size,
                          hist_bins, color_space, bins_range)
    draw_image = draw_boxes(draw_image, bbox_list)
    # Add heat to each box in box list
    heat = add_heat(heat,bbox_list)
    # Apply threshold to help remove false positives
    heat = apply_threshold(heat,1)

    # Visualize the heatmap when displaying    
    heatmap = np.clip(heat, 0, 255)

    # Find final boxes from heatmap using label function
    labels = label(heatmap)
    draw_img = draw_labeled_bboxes(np.copy(image), labels)
    ax = fig.add_subplot(6, 3, img_cnt)
    ax.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    img_cnt += 1
    ax = fig.add_subplot(6, 3, img_cnt)
    ax.imshow(heatmap, cmap='hot')
    img_cnt += 1
    ax = fig.add_subplot(6, 3, img_cnt)
    ax.imshow(cv2.cvtColor(draw_img, cv2.COLOR_BGR2RGB))
    img_cnt += 1


# Create directory where to save output images
if not os.path.isdir("output_images"):
plt.savefig('output_images/heat_map.png', bbox_inches="tight")

Vehicle Detection Pipeline

NOTICE: ideas related to vehicle tracking, such as remembering bboxes identified in previous N frames, were borrowed from Prerit Jaiswal's repository.

import os
from moviepy.editor import VideoFileClip
from IPython.display import HTML
from IPython.display import display

class VehicleDetector:
    def __init__(self, n_iter=25, n_update=2, threshold=70, 
                 scale_yrange_map={0.8 : (380,500),
                                   1.0 : (380,550), 
                                   1.5 : (380,580), 
                                   2.0 : (380,650), 
                                   2.5 : (380,700)}):
        # Number of processed frames
        self.count = 0
        # Labeled bboxes 
        self.out_img_labeled = None
        # Vehicle labels 
        self.labels = [None,0]
        # List of bbox lists from last n iterations
        self.bbox_list_n = [] 
        # Number of frames to smooth over
        self.n_iter = n_iter
        # Number of frames after which to update detection
        self.n_update = n_update
        # Threshold for heat map
        self.threshold = threshold
        # Heat map
        self.heat = np.zeros((720, 1280))
        self.heatmap = np.copy(self.heat)
        # Map scale to y range
        self.scale_yrange_map = scale_yrange_map
    def pipeline(self, image) :
        # Increment number of processed frames
        self.count += 1
        # Image to be processed
        self.image = image

        # Frames are RGB, while the rest of the code works with BGR
        img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        # Find cars of the processed frame
        for scale, y_start_stop in self.scale_yrange_map.items():
            ystart = y_start_stop[0]
            ystop  = y_start_stop[1]
            bbox_list = find_cars(img, ystart, ystop, scale, svc, X_scaler, 
                              orient, pix_per_cell, cell_per_block, spatial_size,
                              hist_bins, color_space, bins_range)
        # Add found car boxes to the list of boxes captured in N previous frames
        # Update heat map each self.n_update frame (smoothing)
        if self.count % self.n_update == 0 :
            for bbox_list in self.bbox_list_n :
                self.heat = add_heat(self.heat,bbox_list)
            self.heat = apply_threshold(self.heat, self.threshold)
            self.heatmap = np.clip(self.heat, 0, 255)
            self.labels = label(self.heatmap)
            self.heat = np.clip(self.labels[0], 0, 1) * 2
        # Remove very old cars boxes
        if len(self.bbox_list_n) > self.n_iter :
        # Draw labeled boxes on the image being processed
        self.out_img_labeled = draw_labeled_bboxes(np.copy(image), self.labels)
        return self.out_img_labeled
# Create output directory for vidoes, if does not exist
if not os.path.isdir("output_videos"):

# Define paths to source and destination videos
vid_src = "test_videos/test_video.mp4"
vid_dst = "output_videos/test_video.mp4"

# Process video frame by frame
video = VideoFileClip(vid_src)
video_clip = video.fl_image(VehicleDetector().pipeline)
%time video_clip.write_videofile(vid_dst, audio = False)
            <video width="960" height="540" controls>
               <source src="{0}">

Apply Pipeline to Long Video

import os
from moviepy.editor import VideoFileClip
from IPython.display import HTML
from IPython.display import display

# Create output directory for vidoes, if does not exist
if not os.path.isdir("output_videos"):

# Define paths to source and destination videos
vid_src = "test_videos/project_video.mp4"
vid_dst = "output_videos/project_video.mp4"

# Process video frame by frame
video = VideoFileClip(vid_src)
video_clip = video.fl_image(VehicleDetector().pipeline)
%time video_clip.write_videofile(vid_dst, audio = False)
            <video width="960" height="540" controls>
               <source src="{0}">