Vehicle Detection and Tracking


Author: Sergey Morozov


NOTICE

Some of the functions below were provided by Udacity. Author modified some of them and added original code.

Project Steps

  • Perform a Histogram of Oriented Gradients (HOG) feature extraction on a labeled training set of images and train a classifier Linear SVM classifier.
  • Apply a color transform and append binned color features, as well as histograms of color, to your HOG feature vector.
  • Implement a sliding-window technique and use your trained classifier to search for vehicles in images.
  • Run pipeline on a video stream and create a heat map of recurring detections frame by frame to reject outliers and follow detected vehicles.
  • Estimate a bounding box for vehicles detected.

Training Data Extraction

Archive with vehicle and non-vehicle images should be manually downloaded and placed in the root of this repository.

Extract Data


In [ ]:
import os
import zipfile
import shutil

# Extract vehicle images
if not (os.path.isdir("vehicle_images")):
    zip_ref = zipfile.ZipFile("vehicles.zip", 'r')
    zip_ref.extractall(".")
    zip_ref.close()
    shutil.move("vehicles", "vehicle_images")
    shutil.rmtree("__MACOSX")
    
# Extract non-vehicle images
if not (os.path.isdir("non-vehicle_images")):
    zip_ref = zipfile.ZipFile("non-vehicles.zip", 'r')
    zip_ref.extractall(".")
    zip_ref.close()
    shutil.move("non-vehicles", "non-vehicle_images")
    shutil.rmtree("__MACOSX")

Explore Data


In [ ]:
import os
import cv2
import glob
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline

def data_look(car_list, notcar_list):
    """Extract useful information about provided data."""
    data_dict = {}
    # Define a key in data_dict "n_cars" and store the number of car images
    data_dict["n_cars"] = len(car_list)
    # Define a key "n_notcars" and store the number of notcar images
    data_dict["n_notcars"] = len(notcar_list)
    # Read in a test image, either car or notcar
    example_img = cv2.imread(car_list[0])
    # Define a key "image_shape" and store the test image shape 3-tuple
    data_dict["image_shape"] = example_img.shape
    # Define a key "data_type" and store the data type of the test image.
    data_dict["data_type"] = example_img.dtype
    # Return data_dict
    return data_dict

# List containing paths to training images
vehicle_paths = glob.glob("vehicle_images/*/*.png")
nonvehicle_paths = glob.glob("non-vehicle_images/*/*.png")

# Extract useful information from data
data_info = data_look(vehicle_paths, nonvehicle_paths)

# Define useful constants
cars_cnt = data_info['n_cars']
notcars_cnt = data_info['n_notcars']
image_shape = data_info['image_shape']
data_type = data_info['data_type']

# Print extracted information
print("Vehicle image count:", cars_cnt)
print("Non-vehicle image count:", notcars_cnt)
print("Image shape:", image_shape)
print("Image type:", data_type)

# Show example images from each category
fig, axs = plt.subplots(1,8, figsize=(16, 2))

for i in range(4):
    img = mpimg.imread(vehicle_paths[i * i * 3])
    axs[i].axis('off')
    axs[i].set_title('Vehicle')
    axs[i].imshow(img)
    
for i in range(4,8):
    img = mpimg.imread(nonvehicle_paths[i * i * 3])
    axs[i].axis('off')
    axs[i].set_title('Non-Vehicle')
    axs[i].imshow(img)

# Create directory where to save output images
if not os.path.isdir("output_images"):
    os.mkdir("output_images")

plt.savefig('output_images/data_examples.png', bbox_inches="tight")

Feature Extraction

Histogram of Color


In [ ]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

def color_hist(img, nbins=32, bins_range=(0, 256), features_only=True):
    """Function to compute color histogram features."""
    # Compute the histogram of the color channels separately
    channel1_hist = np.histogram(img[:,:,0], bins=nbins, range=bins_range)
    channel2_hist = np.histogram(img[:,:,1], bins=nbins, range=bins_range)
    channel3_hist = np.histogram(img[:,:,2], bins=nbins, range=bins_range)
    # Concatenate the histograms into a single feature vector
    hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
    # Return the individual histograms, bin_centers and feature vector
    if features_only:
        return hist_features
    else:
        # Generating bin centers
        bin_edges = channel1_hist[1]
        bin_centers = (bin_edges[1:]  + bin_edges[0:len(bin_edges)-1])/2
        return channel1_hist, channel2_hist, channel3_hist, bin_centers, hist_features

# Draw historgam for each color channel
# As an example consider BGR color space    
fig, axs = plt.subplots(2,4, figsize=(16, 6))

# Draw histograms for vehicle
vehicle_img = cv2.imread(vehicle_paths[0])
nonvehicle_img = cv2.imread(nonvehicle_paths[0])
imgs_for_hist = [(vehicle_paths[0], "Vehicle"), (nonvehicle_paths[0], "Non-Vehicle")]

for i in range(2):
    image = cv2.imread(imgs_for_hist[i][0])
    ch1_hist, ch2_hist, ch3_hist, bincen, feature_vec = \
        color_hist(image, nbins=32, bins_range=(0, 256), features_only=False)
    axs[i][0].axis('off')
    axs[i][0].set_title(imgs_for_hist[i][1])
    axs[i][0].imshow(image)
    axs[i][1].bar(bincen, ch1_hist[0])
    axs[i][1].set_xlim([0, 256])
    axs[i][1].set_title('B Histogram')
    axs[i][2].bar(bincen, ch2_hist[0])
    axs[i][2].set_xlim([0, 256])
    axs[i][2].set_title('G Histogram')
    axs[i][3].bar(bincen, ch3_hist[0])
    axs[i][3].set_xlim([0, 256])
    axs[i][3].set_title('R Histogram')
plt.tight_layout()

# Create directory where to save output images
if not os.path.isdir("output_images"):
    os.mkdir("output_images")

plt.savefig('output_images/histogram_of_color.png', bbox_inches="tight")

Color Distribution


In [ ]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

def plot3d(pixels, colors_rgb, fig, pos,
           axis_labels=list("RGB"), 
           axis_limits=[(0, 255), (0, 255), (0, 255)]):
    """Plot pixels in 3D."""

    # 3D axes
    ax = fig.add_subplot(pos, projection='3d')

    # Set axis limits
    ax.set_xlim(*axis_limits[0])
    ax.set_ylim(*axis_limits[1])
    ax.set_zlim(*axis_limits[2])

    # Set axis labels and sizes
    ax.tick_params(axis='both', which='major', labelsize=7, pad=1)
    ax.set_xlabel(axis_labels[0], fontsize=10, labelpad=1)
    ax.set_ylabel(axis_labels[1], fontsize=10, labelpad=1)
    ax.set_zlabel(axis_labels[2], fontsize=10, labelpad=1)

    # Plot pixel values with colors given in colors_rgb
    ax.scatter(
        pixels[:, :, 0].ravel(),
        pixels[:, :, 1].ravel(),
        pixels[:, :, 2].ravel(),
        c=colors_rgb.reshape((-1, 3)), edgecolors='none')

    return ax  # return Axes3D object for further manipulation


# Read a color image
img = cv2.imread("test_images/test4.jpg")

# Select a small fraction of pixels to plot by subsampling it
scale = max(img.shape[0], img.shape[1], 64) / 64  # at most 64 rows and columns
img_small = cv2.resize(img, (np.int(img.shape[1] / scale), np.int(img.shape[0] / scale)), interpolation=cv2.INTER_NEAREST)

# Convert subsampled image to desired color space(s)
img_small_RGB = cv2.cvtColor(img_small, cv2.COLOR_BGR2RGB)  # OpenCV uses BGR, matplotlib likes RGB
img_small_HSV = cv2.cvtColor(img_small, cv2.COLOR_BGR2HSV)
img_small_HLS = cv2.cvtColor(img_small, cv2.COLOR_BGR2HLS)
img_small_LUV = cv2.cvtColor(img_small, cv2.COLOR_BGR2LUV)
img_small_YUV = cv2.cvtColor(img_small, cv2.COLOR_BGR2YUV)
img_small_YCrCb = cv2.cvtColor(img_small, cv2.COLOR_BGR2YCrCb)
img_small_rgb = img_small_RGB / 255.  # scaled to [0, 1], only for plotting

fig = plt.figure(figsize=(15,10))
# Plot and show
ax = fig.add_subplot(332)
ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
ax.axis("off")
plot3d(img_small_RGB, img_small_rgb, fig, 334)
plot3d(img_small_HSV, img_small_rgb, fig, 335, axis_labels=list("HSV"))
plot3d(img_small_HLS, img_small_rgb, fig, 336, axis_labels=list("HLS"))
plot3d(img_small_LUV, img_small_rgb, fig, 337, axis_labels=list("LUV"))
plot3d(img_small_YUV, img_small_rgb, fig, 338, axis_labels=list("YUV"))
plot3d(img_small_YCrCb, img_small_rgb, fig, 339, axis_labels=['Y', 'Cr', 'Cb'])
plt.tight_layout()

# Create directory where to save output images
if not os.path.isdir("output_images"):
    os.mkdir("output_images")
    
plt.savefig('output_images/color_distribution.png', bbox_inches="tight")

Spatial Binning of Color


In [ ]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

def bin_spatial(img, size=(32, 32)):
    """Function to compute binned color features."""
    # Use cv2.resize().ravel() to create the feature vector
    features = cv2.resize(img, size).ravel() 
    # Return the feature vector
    return features

# Read test image
img = cv2.imread("test_images/test4.jpg")

# Convert subsampled image to desired color space(s)
img_RGB = cv2.cvtColor(img_small, cv2.COLOR_BGR2RGB)  # OpenCV uses BGR, matplotlib likes RGB
img_HSV = cv2.cvtColor(img_small, cv2.COLOR_BGR2HSV)
img_HLS = cv2.cvtColor(img_small, cv2.COLOR_BGR2HLS)
img_LUV = cv2.cvtColor(img_small, cv2.COLOR_BGR2LUV)
img_YUV = cv2.cvtColor(img_small, cv2.COLOR_BGR2YUV)
img_YCrCb = cv2.cvtColor(img_small, cv2.COLOR_BGR2YCrCb)
  
fig, axs = plt.subplots(2,3, figsize=(16, 6))

axs[0][0].plot(bin_spatial(img_RGB))
axs[0][0].set_title('RGB')

axs[0][1].plot(bin_spatial(img_HSV))
axs[0][1].set_title('HSV')

axs[0][2].plot(bin_spatial(img_HLS))
axs[0][2].set_title('HLS')

axs[1][0].plot(bin_spatial(img_LUV))
axs[1][0].set_title('LUV')

axs[1][1].plot(bin_spatial(img_YUV))
axs[1][1].set_title('YUV')

axs[1][2].plot(bin_spatial(img_YCrCb))
axs[1][2].set_title('YCrCb')

plt.tight_layout()

# Create directory where to save output images
if not os.path.isdir("output_images"):
    os.mkdir("output_images")
    
plt.savefig('output_images/spatial_binning.png', bbox_inches="tight")

Histogram of Oriented Gradients


In [ ]:
import cv2
import numpy as np
from skimage.feature import hog
import matplotlib.pyplot as plt
%matplotlib inline

def get_hog_features(img, orient, pix_per_cell, cell_per_block, vis=False, feature_vec=True):
    """Function to return HOG features and visualization."""
    if vis == True:
        features, hog_image = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=False, 
                                  visualise=True, feature_vector=False)
        return features, hog_image
    else:      
        features = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                       cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=False, 
                       visualise=False, feature_vector=feature_vec)
        return features
    
# Read in the image
image = cv2.imread(vehicle_paths[123])
img_RGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # OpenCV uses BGR, matplotlib likes RGB
img_HSV = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
img_HLS = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
img_LUV = cv2.cvtColor(image, cv2.COLOR_BGR2LUV)
img_YUV = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
img_YCrCb = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)

imgs = [(img_RGB, list("RGB")), 
        (img_HSV, list("HSV")), 
        (img_HLS, list("HSL")), 
        (img_LUV, list("LUV")), 
        (img_YUV, list("YUV")), 
        (img_YCrCb, ['Y', 'Cr', 'Cb'])]

# Define HOG parameters
orient = 12
pix_per_cell = 8
cell_per_block = 1

fig = plt.figure(figsize=(32,32))

ax = fig.add_subplot(732)
ax.axis("off")
ax.imshow(image)

for i in range(len(imgs)):
    pos = (i + 1) * 3 + 1
    features, hog_image = get_hog_features(imgs[i][0][:,:,0], orient, 
                                           pix_per_cell, cell_per_block, 
                                           vis=True, feature_vec=False)
    ax = fig.add_subplot(7, 3, pos)
    ax.axis("off")
    ax.imshow(hog_image, cmap='gray')
    ax.set_title(imgs[i][1][0])
    
    pos += 1
    features, hog_image = get_hog_features(imgs[i][0][:,:,1], orient, 
                                           pix_per_cell, cell_per_block, 
                                           vis=True, feature_vec=False)
    ax = fig.add_subplot(7, 3, pos)
    ax.axis("off")
    ax.imshow(hog_image, cmap='gray')
    ax.set_title(imgs[i][1][1])
    
    pos += 1
    features, hog_image = get_hog_features(imgs[i][0][:,:,2], orient, 
                                           pix_per_cell, cell_per_block, 
                                           vis=True, feature_vec=False)
    ax = fig.add_subplot(7, 3, pos)
    ax.axis("off")
    ax.imshow(hog_image, cmap='gray')
    ax.set_title(imgs[i][1][2])
    
# Create directory where to save output images
if not os.path.isdir("output_images"):
    os.mkdir("output_images")
    
plt.savefig('output_images/hog.png', bbox_inches="tight")

Train Linear Support Vector Machine Classifier

Define Parameters


In [ ]:
# Parameters to be tweaked
color_space = 'HLS' # Can be BGR, HSV, LUV, HLS, YUV, YCrCb
orient = 12
pix_per_cell = 8 # HOG pixels per cell
cell_per_block = 2 # HOG cells per block
hog_channel = "ALL" # Can be 0, 1, 2, or "ALL"
spatial_size = (32, 32) # Spatial binning dimensions
hist_bins = 64    # Number of histogram bins
spatial_feat = True # Spatial features on or off
hist_feat = True # Histogram features on or off
hog_feat = True # HOG features on or off
bins_range=(0,256)

print('Parameters were defined.')

Combine Features and Normalize


In [ ]:
#####
# The next step is quite long. You load scaler from the file system.
#####

import pickle

# Load the scaler
with open('scaler.pkl', 'rb') as file:
    X_scaler = pickle.load(file)    
    print('Scaler was loaded from file', file.name)

In [ ]:
import cv2
import pickle
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.preprocessing import StandardScaler
%matplotlib inline

def extract_features(imgs, color_space='BGR', spatial_size=(32, 32),
                        hist_bins=32, orient=9, 
                        pix_per_cell=8, cell_per_block=2, hog_channel=0,
                        spatial_feat=True, hist_feat=True, hog_feat=True):
    """Function to extract features from a list of images.
    
    This function calls bin_spatial(), color_hist(), and get_hog_features().
    """
    # Create a list to append feature vectors to
    features = []
    # Iterate through the list of images
    for file in imgs:
        file_features = []
        # Read in each one by one
        image = cv2.imread(file)
        # apply color conversion if other than 'BGR'
        if color_space != 'BGR':
            if color_space == 'RGB':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            if color_space == 'HSV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
            elif color_space == 'LUV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2LUV)
            elif color_space == 'HLS':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
            elif color_space == 'YUV':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
            elif color_space == 'YCrCb':
                feature_image = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)
        else: feature_image = np.copy(image)      

        if spatial_feat == True:
            spatial_features = bin_spatial(feature_image, size=spatial_size)
            file_features.append(spatial_features)
        if hist_feat == True:
            # Apply color_hist()
            hist_features = color_hist(feature_image, nbins=hist_bins)
            file_features.append(hist_features)
        if hog_feat == True:
        # Call get_hog_features() with vis=False, feature_vec=True
            if hog_channel == 'ALL':
                hog_features = []
                for channel in range(feature_image.shape[2]):
                    hog_features.append(get_hog_features(feature_image[:,:,channel], 
                                        orient, pix_per_cell, cell_per_block, 
                                        vis=False, feature_vec=True))
                hog_features = np.ravel(hog_features)        
            else:
                hog_features = get_hog_features(feature_image[:,:,hog_channel], orient, 
                            pix_per_cell, cell_per_block, vis=False, feature_vec=True)
            # Append the new feature vector to the features list
            file_features.append(hog_features)
        features.append(np.concatenate(file_features))
    # Return list of feature vectors
    return features

# Extract features
car_features = extract_features(vehicle_paths, color_space=color_space, 
                        spatial_size=spatial_size, hist_bins=hist_bins, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        cell_per_block=cell_per_block, 
                        hog_channel=hog_channel, spatial_feat=spatial_feat, 
                        hist_feat=hist_feat, hog_feat=hog_feat)
notcar_features = extract_features(nonvehicle_paths, color_space=color_space, 
                        spatial_size=spatial_size, hist_bins=hist_bins, 
                        orient=orient, pix_per_cell=pix_per_cell, 
                        cell_per_block=cell_per_block, 
                        hog_channel=hog_channel, spatial_feat=spatial_feat, 
                        hist_feat=hist_feat, hog_feat=hog_feat)

# Normalize data
X = np.vstack((car_features, notcar_features)).astype(np.float64)                        
# Fit a per-column scaler
X_scaler = StandardScaler().fit(X)
# Apply the scaler to X
scaled_X = X_scaler.transform(X)

# save the scaler
with open('scaler.pkl', 'wb') as file:
    pickle.dump(X_scaler, file)    
    print('Scaler was saved to', file.name)

# Visualize normalization
car_ind = 28
# Plot an example of raw and scaled features
fig = plt.figure(figsize=(12,4))
plt.subplot(131)
plt.imshow(mpimg.imread(vehicle_paths[car_ind]))
plt.title('Original Image')
plt.subplot(132)
plt.plot(X[car_ind])
plt.title('Raw Features')
plt.subplot(133)
plt.plot(scaled_X[car_ind])
plt.title('Normalized Features')
fig.tight_layout()

# Create directory where to save output images
if not os.path.isdir("output_images"):
    os.mkdir("output_images")
    
plt.savefig('output_images/feature_normalization.png', bbox_inches="tight")

Train Classifier


In [ ]:
#####
# The next step is quite long. You load support vector classifier from the file system.
#####

import pickle

# Load the classifier
with open('svc.pkl', 'rb') as file:
    svc = pickle.load(file)    
    print('Classifier was loaded from file', file.name)

In [ ]:
import time
import pickle
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.cross_validation import train_test_split

# Define the labels vector
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))

# Split up data into randomized training and test sets
rand_state = np.random.randint(0, 100)
X_train, X_test, y_train, y_test = train_test_split(
    scaled_X, y, test_size=0.2, random_state=rand_state)

print('Using:',orient,'orientations',pix_per_cell,
    'pixels per cell and', cell_per_block,'cells per block')
print('Feature vector length:', len(X_train[0]))
# Use a linear SVC 
svc = LinearSVC()
# Check the training time for the SVC
t=time.time()
svc.fit(X_train, y_train)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to train SVC...')
# Check the score of the SVC
print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4))
# Check the prediction time for a single sample
n_predict = 10
print('My SVC predicts: ', svc.predict(X_test[0:n_predict]))
print('For these',n_predict, 'labels: ', y_test[0:n_predict])
t2 = time.time()
print(round(t2-t, 5), 'Seconds to predict', n_predict,'labels with SVC')

# save the trained classifier
with open('svc.pkl', 'wb') as file:
    pickle.dump(svc, file)    
    print('Classifier was saved to', file.name)

Detect Vehicles

Sliding Window


In [ ]:
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

def slide_window(img, x_start_stop=[None, None], y_start_stop=[None, None], 
                    xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
    """Function that returns a list of bounding boxes for the search windows."""
    # If x and/or y start/stop positions not defined, set to image size
    if x_start_stop[0] == None:
        x_start_stop[0] = 0
    if x_start_stop[1] == None:
        x_start_stop[1] = img.shape[1]
    if y_start_stop[0] == None:
        y_start_stop[0] = 0
    if y_start_stop[1] == None:
        y_start_stop[1] = img.shape[0]
    # Compute the span of the region to be searched    
    xspan = x_start_stop[1] - x_start_stop[0]
    yspan = y_start_stop[1] - y_start_stop[0]
    # Compute the number of pixels per step in x/y
    nx_pix_per_step = np.int(xy_window[0]*(1 - xy_overlap[0]))
    ny_pix_per_step = np.int(xy_window[1]*(1 - xy_overlap[1]))
    # Compute the number of windows in x/y
    nx_buffer = np.int(xy_window[0]*(xy_overlap[0]))
    ny_buffer = np.int(xy_window[1]*(xy_overlap[1]))
    nx_windows = np.int((xspan-nx_buffer)/nx_pix_per_step) 
    ny_windows = np.int((yspan-ny_buffer)/ny_pix_per_step) 
    # Initialize a list to append window positions to
    window_list = []
    # Loop through finding x and y window positions
    # Note: you could vectorize this step, but in practice
    # you'll be considering windows one by one with your
    # classifier, so looping makes sense
    for ys in range(ny_windows):
        for xs in range(nx_windows):
            # Calculate window position
            startx = xs*nx_pix_per_step + x_start_stop[0]
            endx = startx + xy_window[0]
            starty = ys*ny_pix_per_step + y_start_stop[0]
            endy = starty + xy_window[1]
            # Append window position to list
            window_list.append(((startx, starty), (endx, endy)))
    # Return the list of windows
    return window_list

def draw_boxes(img, bboxes, color=(0, 0, 255), thick=6):
    """Function that draws color boxes on the output."""
    # Make a copy of the image
    imcopy = np.copy(img)
    # Iterate through the bounding boxes
    for bbox in bboxes:
        # Draw a rectangle given bbox coordinates
        cv2.rectangle(imcopy, bbox[0], bbox[1], color, thick)
    # Return the image copy with boxes drawn
    return imcopy

# Visualize the result
fig = plt.figure(figsize=(20, 15))

img_cnt = 1
for name in os.listdir("test_images"):
    # Read test image
    image = cv2.imread(os.path.join("test_images", name))

    windows = slide_window(image, x_start_stop=[None, None], y_start_stop=[400, 700], 
                           xy_window=(128, 128), xy_overlap=(0.5, 0.5))
                       
    window_img = draw_boxes(image, windows, color=(0, 0, 255), thick=6)                    
    
    ax = fig.add_subplot(3, 2, img_cnt)
    ax.imshow(cv2.cvtColor(window_img, cv2.COLOR_BGR2RGB))
    img_cnt += 1

fig.tight_layout()

# Create directory where to save output images
if not os.path.isdir("output_images"):
    os.mkdir("output_images")
    
plt.savefig('output_images/sliding_window.png', bbox_inches="tight")

Search and Classify


In [ ]:
import cv2
import time
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

def single_img_features(img, color_space='BGR', spatial_size=(32, 32),
                        hist_bins=32, orient=9, 
                        pix_per_cell=8, cell_per_block=2, hog_channel=0,
                        spatial_feat=True, hist_feat=True, hog_feat=True):    
    """Function to extract features from a single image window."""
    #1) Define an empty list to receive features
    img_features = []
    #2) Apply color conversion if other than 'BGR'
    if color_space != 'BGR':
        if color_space == 'RGB':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if color_space == 'HSV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        elif color_space == 'LUV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2LUV)
        elif color_space == 'HLS':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2HLS)
        elif color_space == 'YUV':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
        elif color_space == 'YCrCb':
            feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
    else: feature_image = np.copy(img)      
    #3) Compute spatial features if flag is set
    if spatial_feat == True:
        spatial_features = bin_spatial(feature_image, size=spatial_size)
        #4) Append features to list
        img_features.append(spatial_features)
    #5) Compute histogram features if flag is set
    if hist_feat == True:
        hist_features = color_hist(feature_image, nbins=hist_bins)
        #6) Append features to list
        img_features.append(hist_features)
    #7) Compute HOG features if flag is set
    if hog_feat == True:
        if hog_channel == 'ALL':
            hog_features = []
            for channel in range(feature_image.shape[2]):
                hog_features.extend(get_hog_features(feature_image[:,:,channel], 
                                    orient, pix_per_cell, cell_per_block, 
                                    vis=False, feature_vec=True))      
        else:
            hog_features = get_hog_features(feature_image[:,:,hog_channel], orient, 
                        pix_per_cell, cell_per_block, vis=False, feature_vec=True)
        #8) Append features to list
        img_features.append(hog_features)

    #9) Return concatenated array of features
    return np.concatenate(img_features)

def search_windows(img, windows, clf, scaler, color_space='BGR', 
                    spatial_size=(32, 32), hist_bins=32, 
                    hist_range=(0, 256), orient=9, 
                    pix_per_cell=8, cell_per_block=2, 
                    hog_channel=0, spatial_feat=True, 
                    hist_feat=True, hog_feat=True):
    """Search cars in the given image within list of windows to be searched."""
    #1) Create an empty list to receive positive detection windows
    on_windows = []
    #2) Iterate over all windows in the list
    for window in windows:
        #3) Extract the test window from original image
        test_img = cv2.resize(img[window[0][1]:window[1][1], window[0][0]:window[1][0]], (64, 64))      
        #4) Extract features for that window using single_img_features()
        features = single_img_features(test_img, color_space=color_space, 
                            spatial_size=spatial_size, hist_bins=hist_bins, 
                            orient=orient, pix_per_cell=pix_per_cell, 
                            cell_per_block=cell_per_block, 
                            hog_channel=hog_channel, spatial_feat=spatial_feat, 
                            hist_feat=hist_feat, hog_feat=hog_feat)
        #5) Scale extracted features to be fed to classifier
        test_features = scaler.transform(np.array(features).reshape(1, -1))
        #6) Predict using your classifier
        prediction = clf.predict(test_features)
        #7) If positive (prediction == 1) then save the window
        if prediction == 1:
            on_windows.append(window)
    #8) Return windows for positive detections
    return on_windows

y_start_stop = [400, 700] # Min and max in y to search in slide_window()

# Visualize the result
fig = plt.figure(figsize=(16, 16))
times = []

img_cnt = 1
for name in os.listdir("test_images"):
    # Read test image
    image = cv2.imread(os.path.join("test_images", name))

    draw_image = np.copy(image)
    
    tm_start = time.time()

    windows = slide_window(image, x_start_stop=[None, None], y_start_stop=y_start_stop, 
                           xy_window=(115, 115), xy_overlap=(0.8, 0.8))

    hot_windows = search_windows(image, windows, svc, X_scaler, color_space=color_space, 
                                 spatial_size=spatial_size, hist_bins=hist_bins, 
                                 orient=orient, pix_per_cell=pix_per_cell, 
                                 cell_per_block=cell_per_block, 
                                 hog_channel=hog_channel, spatial_feat=spatial_feat, 
                                 hist_feat=hist_feat, hog_feat=hog_feat)                       

    window_img = draw_boxes(draw_image, hot_windows, color=(0, 0, 255), thick=6) 
    
    tm_stop = time.time()
    
    times.append(round(tm_stop-tm_start, 2))
    
    ax = fig.add_subplot(6, 2, img_cnt)
    ax.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    img_cnt += 1
    
    ax = fig.add_subplot(6, 2, img_cnt)
    ax.imshow(cv2.cvtColor(window_img, cv2.COLOR_BGR2RGB))
    img_cnt += 1

fig.tight_layout()

print("Average time per image:", sum(times)/len(times), "sec")

# Create directory where to save output images
if not os.path.isdir("output_images"):
    os.mkdir("output_images")
    
plt.savefig('output_images/search_and_classify.png', bbox_inches="tight")

Detection is quite good, but very slow. It can not be used in real time systems.

Faster Search (HOG Sub-Sampling)


In [ ]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pickle
import cv2
%matplotlib inline

def find_cars(img, ystart, ystop, scale, clf, X_scaler, 
              orient, pix_per_cell, cell_per_block, spatial_size, 
              hist_bins, color_space, bins_range):
    
    draw_img = np.copy(img)
    
    img_tosearch = img[ystart:ystop,:,:]
    
    if color_space != 'BGR':
        if color_space == 'RGB':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2RGB)
        if color_space == 'HSV':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2HSV)
        elif color_space == 'LUV':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2LUV)
        elif color_space == 'HLS':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2HLS)
        elif color_space == 'YUV':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2YUV)
        elif color_space == 'YCrCb':
            img_tosearch = cv2.cvtColor(img_tosearch, cv2.COLOR_BGR2YCrCb)
    else: img_tosearch = np.copy(img_tosearch) 
    
    if scale != 1:
        imshape = img_tosearch.shape
        img_tosearch = cv2.resize(img_tosearch, (np.int(imshape[1]/scale), np.int(imshape[0]/scale)))
    
    ch1 = img_tosearch[:,:,0]
    ch2 = img_tosearch[:,:,1]
    ch3 = img_tosearch[:,:,2]

    # Define blocks and steps as above
    nxblocks = (ch1.shape[1] // pix_per_cell)-1
    nyblocks = (ch1.shape[0] // pix_per_cell)-1 
    nfeat_per_block = orient*cell_per_block**2
    # 64 was the orginal sampling rate, with 8 cells and 8 pix per cell
    window = image_shape[0]
    nblocks_per_window = (window // pix_per_cell)-1 
    cells_per_step = 2  # Instead of overlap, define how many cells to step
    nxsteps = (nxblocks - nblocks_per_window) // cells_per_step
    nysteps = (nyblocks - nblocks_per_window) // cells_per_step
    
    # Compute individual channel HOG features for the entire image
    hog1 = get_hog_features(ch1, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog2 = get_hog_features(ch2, orient, pix_per_cell, cell_per_block, feature_vec=False)
    hog3 = get_hog_features(ch3, orient, pix_per_cell, cell_per_block, feature_vec=False)
    
    bbox_list=[]
    for xb in range(nxsteps):
        for yb in range(nysteps):
            ypos = yb*cells_per_step
            xpos = xb*cells_per_step
            # Extract HOG for this patch
            hog_feat1 = hog1[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_feat2 = hog2[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_feat3 = hog3[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel() 
            hog_features = np.hstack((hog_feat1, hog_feat2, hog_feat3))

            xleft = xpos*pix_per_cell
            ytop = ypos*pix_per_cell

            # Extract the image patch
            subimg = cv2.resize(img_tosearch[ytop:ytop+window, xleft:xleft+window], (64,64))
          
            # Get color features
            spatial_features = bin_spatial(subimg, size=spatial_size)
            hist_features = color_hist(subimg, nbins=hist_bins, bins_range=bins_range)

            # Scale features and make a prediction
            test_features = X_scaler.transform(np.hstack((spatial_features, hist_features,
                                                          hog_features)).reshape(1, -1))       
            test_prediction = clf.predict(test_features)
            
            if test_prediction == 1:
                xbox_left = np.int(xleft*scale)
                ytop_draw = np.int(ytop*scale)
                win_draw = np.int(window*scale)
                cv2.rectangle(draw_img,(xbox_left, ytop_draw+ystart),
                              (xbox_left+win_draw,ytop_draw+win_draw+ystart),
                              (0,0,1),6)
                bbox_list.append(((xbox_left, ytop_draw+ystart),
                              (xbox_left+win_draw,ytop_draw+win_draw+ystart)))
    return bbox_list

y_start_stop = [400, 700]

# Visualize the result
fig = plt.figure(figsize=(16, 16))
times = []

img_cnt = 1
for name in os.listdir("test_images"):
    # Read test image
    image = cv2.imread(os.path.join("test_images", name))

    draw_image = np.copy(image)
    
    tm_start = time.time()

    bbox_list = find_cars(image, y_start_stop[0], y_start_stop[1], 1, svc, X_scaler, 
                          orient, pix_per_cell, cell_per_block, spatial_size,
                          hist_bins, color_space, bins_range)
    draw_image = draw_boxes(draw_image, bbox_list)
    
    tm_stop = time.time()
    
    times.append(round(tm_stop-tm_start, 2))
    
    ax = fig.add_subplot(6, 2, img_cnt)
    ax.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    img_cnt += 1
    
    ax = fig.add_subplot(6, 2, img_cnt)
    ax.imshow(cv2.cvtColor(draw_image, cv2.COLOR_BGR2RGB))
    img_cnt += 1

fig.tight_layout()

print("Average time per image:", sum(times)/len(times), "sec")

# Create directory where to save output images
if not os.path.isdir("output_images"):
    os.mkdir("output_images")
    
plt.savefig('output_images/fast_search_and_classify.png', bbox_inches="tight")

Detection is quite good and fast enough. On a very-very good laptop it can process video in near real time.

Filter False Positives


In [ ]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pickle
import cv2
from scipy.ndimage.measurements import label

def add_heat(heatmap, bbox_list):
    # Iterate through list of bboxes
    for box in bbox_list:
        # Add += 1 for all pixels inside each bbox
        # Assuming each "box" takes the form ((x1, y1), (x2, y2))
        heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1

    # Return updated heatmap
    return heatmap# Iterate through list of bboxes
    
def apply_threshold(heatmap, threshold):
    # Zero out pixels below the threshold
    heatmap[heatmap <= threshold] = 0
    # Return thresholded map
    return heatmap

def draw_labeled_bboxes(img, labels):
    # Iterate through all detected cars
    for car_number in range(1, labels[1]+1):
        # Find pixels with each car_number label value
        nonzero = (labels[0] == car_number).nonzero()
        # Identify x and y values of those pixels
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])
        # Define a bounding box based on min/max x and y
        bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
        # Draw the box on the image
        cv2.rectangle(img, bbox[0], bbox[1], (0,0,255), 6)
    # Return the image
    return img

y_start_stop = [400, 700] # Min and max in y to search in slide_window()

# Visualize the result
fig = plt.figure(figsize=(16, 16))

img_cnt = 1
for name in os.listdir("test_images"):
    # Read test image
    image = cv2.imread(os.path.join("test_images", name))
    heat = np.zeros_like(image[:,:,0]).astype(np.float)

    draw_image = np.copy(image)
    
    bbox_list = find_cars(image, y_start_stop[0], y_start_stop[1], 1, svc, X_scaler, 
                          orient, pix_per_cell, cell_per_block, spatial_size,
                          hist_bins, color_space, bins_range)
    draw_image = draw_boxes(draw_image, bbox_list)
    
    # Add heat to each box in box list
    heat = add_heat(heat,bbox_list)
    
    # Apply threshold to help remove false positives
    heat = apply_threshold(heat,1)

    # Visualize the heatmap when displaying    
    heatmap = np.clip(heat, 0, 255)

    # Find final boxes from heatmap using label function
    labels = label(heatmap)
    draw_img = draw_labeled_bboxes(np.copy(image), labels)
    
    ax = fig.add_subplot(6, 3, img_cnt)
    ax.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    img_cnt += 1
    
    ax = fig.add_subplot(6, 3, img_cnt)
    ax.imshow(heatmap, cmap='hot')
    img_cnt += 1
    
    ax = fig.add_subplot(6, 3, img_cnt)
    ax.imshow(cv2.cvtColor(draw_img, cv2.COLOR_BGR2RGB))
    img_cnt += 1

fig.tight_layout()

# Create directory where to save output images
if not os.path.isdir("output_images"):
    os.mkdir("output_images")
    
plt.savefig('output_images/heat_map.png', bbox_inches="tight")

Vehicle Detection Pipeline

NOTICE: ideas related to vehicle tracking, such as remembering bboxes identified in previous N frames, were borrowed from Prerit Jaiswal's repository.


In [ ]:
import os
from moviepy.editor import VideoFileClip
from IPython.display import HTML
from IPython.display import display

class VehicleDetector:
    def __init__(self, n_iter=25, n_update=2, threshold=70, 
                 scale_yrange_map={0.8 : (380,500),
                                   1.0 : (380,550), 
                                   1.5 : (380,580), 
                                   2.0 : (380,650), 
                                   2.5 : (380,700)}):
        # Number of processed frames
        self.count = 0
        
        # Labeled bboxes 
        self.out_img_labeled = None
        
        # Vehicle labels 
        self.labels = [None,0]
        
        # List of bbox lists from last n iterations
        self.bbox_list_n = [] 
        
        # Number of frames to smooth over
        self.n_iter = n_iter
        
        # Number of frames after which to update detection
        self.n_update = n_update
        
        # Threshold for heat map
        self.threshold = threshold
        
        # Heat map
        self.heat = np.zeros((720, 1280))
        self.heatmap = np.copy(self.heat)
        
        # Map scale to y range
        self.scale_yrange_map = scale_yrange_map
    
    def pipeline(self, image) :
        # Increment number of processed frames
        self.count += 1
        
        # Image to be processed
        self.image = image

        # Frames are RGB, while the rest of the code works with BGR
        img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # Find cars of the processed frame
        for scale, y_start_stop in self.scale_yrange_map.items():
            ystart = y_start_stop[0]
            ystop  = y_start_stop[1]
            bbox_list = find_cars(img, ystart, ystop, scale, svc, X_scaler, 
                              orient, pix_per_cell, cell_per_block, spatial_size,
                              hist_bins, color_space, bins_range)
        
        # Add found car boxes to the list of boxes captured in N previous frames
        self.bbox_list_n.append(bbox_list)
        
        # Update heat map each self.n_update frame (smoothing)
        if self.count % self.n_update == 0 :
            for bbox_list in self.bbox_list_n :
                self.heat = add_heat(self.heat,bbox_list)
            self.heat = apply_threshold(self.heat, self.threshold)
            self.heatmap = np.clip(self.heat, 0, 255)
            self.labels = label(self.heatmap)
            self.heat = np.clip(self.labels[0], 0, 1) * 2
        
        # Remove very old cars boxes
        if len(self.bbox_list_n) > self.n_iter :
            self.bbox_list_n.pop(0)
            
        # Draw labeled boxes on the image being processed
        self.out_img_labeled = draw_labeled_bboxes(np.copy(image), self.labels)
        return self.out_img_labeled
    
# Create output directory for vidoes, if does not exist
if not os.path.isdir("output_videos"):
        os.mkdir("output_videos")

# Define paths to source and destination videos
vid_src = "test_videos/test_video.mp4"
vid_dst = "output_videos/test_video.mp4"

# Process video frame by frame
video = VideoFileClip(vid_src)
video_clip = video.fl_image(VehicleDetector().pipeline)
%time video_clip.write_videofile(vid_dst, audio = False)
display(HTML(
        """
            <video width="960" height="540" controls>
               <source src="{0}">
            </video>
        """.format(vid_dst)))

Apply Pipeline to Long Video


In [ ]:
import os
from moviepy.editor import VideoFileClip
from IPython.display import HTML
from IPython.display import display

# Create output directory for vidoes, if does not exist
if not os.path.isdir("output_videos"):
        os.mkdir("output_videos")

# Define paths to source and destination videos
vid_src = "test_videos/project_video.mp4"
vid_dst = "output_videos/project_video.mp4"

# Process video frame by frame
video = VideoFileClip(vid_src)
video_clip = video.fl_image(VehicleDetector().pipeline)
%time video_clip.write_videofile(vid_dst, audio = False)
display(HTML(
        """
            <video width="960" height="540" controls>
               <source src="{0}">
            </video>
        """.format(vid_dst)))