In [ ]:
%matplotlib inline

import cv2

import numpy as np
from numpy.fft import fft2, ifft2, ifftshift, fftshift

import matplotlib.pyplot as plt

from menpo.image import Image
from menpo.shape import PointCloud
from menpo.feature import no_op, hog 
from menpo.visualize import visualize_images

from templatetracker.correlationfilter.base import (
    CFTracker, compute_max_peak, compute_meanshift_peak)
from templatetracker.correlationfilter.correlationfilter import (
    learn_mosse, increment_mosse, 
    learn_mccf, increment_mccf,
    learn_deep_cf, increment_deep_cf)
from templatetracker.correlationfilter.utils import (
    generate_bounding_box, build_grid)

In [ ]:
def greyscale(i):
    return i.as_greyscale('average')

def greyscale_hog(i):
    return hog(greyscale(i))

Correlation Filter (CF) based Tracker

This tracker is a first initial implementation of the ideas describes in the following 3 papers regarding template tracking using adaptive correlation filters:

  • David S. Bolme, J. Ross Beveridge, Bruce A. Draper and Yui Man Lui. "Visual Object Tracking using Adaptive Correlation Filters". CVPR, 2010
  • Hamed Kiani Galoogahi, Terence Sim, Simon Lucey. "Multi-Channel Correlation Filters". ICCV, 2013.
  • J. F. Henriques, R. Caseiro, P. Martins, J. Batista. "High-Speed Tracking with Kernelized Correlation Filters". TPAMI, 2015.

Load and manipulate basket ball video

Read, pre-process and store a particular number of frames of the provided basket ball video.


In [ ]:
video_path = '../data/video.mp4'
cam = cv2.VideoCapture(video_path)

print 'Is video capture opened?', cam.isOpened()

In [ ]:
n_frames = 500
resolution = (640, 360)

frames = []
for _ in range(n_frames):
    # read frame
    frame = cam.read()[1]
    # scale down
    frame = cv2.resize(frame, resolution)
    # bgr to rgb
    frame = frame[..., ::-1]
    # pixel values from 0 to 1
    frame = np.require(frame, dtype=np.double)
    frame /= 255
    # roll channel axis to the front
    frame = np.rollaxis(frame, -1)
    # build menpo image and turn it to grayscale
    frame = Image(frame)
    # append to frame list
    frames.append(frame)
    
cam.release()

In [ ]:
visualize_images(frames)

Define the position and size of the target on the first frame. Note that we need to this manually!


In [ ]:
# first frame
frame0 = frames[0]

# manually define target centre
target_centre0 = PointCloud(np.array([168.0, 232.0])[None])
# manually define target size
target_shape = (31.0, 31.0)
# build bounding box containing the target
target_bb = generate_bounding_box(target_centre0, target_shape)

# add target centre and bounding box as frame landmarks
frame0.landmarks['target_centre'] = target_centre0
frame0.landmarks['target_bb'] = target_bb

# visualize initialization
frame0.view_widget()

Track basket ball video

Create and initialize the correlation filter based tracker by giving it the first frame and the target position and size on the first frame.


In [ ]:
# set options

# specify the kind of filters to be learned and incremented
learn_filter = learn_mccf # learn_mosse or learn_mccf
increment_filter = increment_mccf # increment_mosse or increment_mccf; should match with the previous learn filter!

# specify image representation used for tracking
features = no_op # no_op, greyscale, greyscale_hog

In [ ]:
tracker = CFTracker(frame0, target_centre0, target_shape, learn_filter=learn_filter, 
                    increment_filter=increment_filter, features=features)

Visualize the learned correlation filters.


In [ ]:
# only the up to the first 5 channels are shown
n_channels = np.minimum(5, tracker.filter.shape[0])
fig_size = (3*n_channels, 3*n_channels)

fig = plt.figure()
fig.set_size_inches(fig_size)
for j, c in enumerate(tracker.filter[:n_channels]):
    plt.subplot(1, n_channels, j+1)
    plt.title('CF in spatial domain')
    plt.imshow(tracker.filter[j])

fig = plt.figure()
fig.set_size_inches(fig_size)
for j, c in enumerate(tracker.filter[:n_channels]):
    plt.subplot(1, n_channels, j+1)
    plt.title('CF in frequency domain')
    plt.imshow(np.abs(fftshift(fft2(tracker.filter[j]))))

Track the previous frames.


In [ ]:
# set options

# filter adaptive parameter; values close to 0 give more weight to filters derive from the last tracked frames, 
# values close to 0 give more weight to the initial filter
nu = 0.125

# specifies a threshold on the peak to sidelobe measure below which there is to much uncertainty wrt the target 
# position and concequently filters are not updated based on the current frame
psr_threshold = 5

# specifies how the next target position is obtained given the filter response
compute_peak = compute_max_peak # compute_max_peak or compute_meanshift_peak

In [ ]:
target_centre = target_centre0

filters = []
targets = []
psrs = []
rs = []
for j, frame in enumerate(frames):
    # track target
    target_centre, psr, r = tracker.track(frame, target_centre, nu=nu,
                                          psr_threshold=psr_threshold,
                                          compute_peak=compute_peak)
    # add target centre and its bounding box as landmarks
    frame.landmarks['tracked_centre'] = target_centre
    frame.landmarks['tracked_bb'] = generate_bounding_box(target_centre, target_shape)
    # add psr to list
    psrs.append(psr)
    rs.append(r)
    
#     print j

Explore tracked frames.


In [ ]:
visualize_images(frames)

Show peak to sidelobe ratio (PSR) over the entire sequence.


In [ ]:
plt.title('Peak to sidelobe ratio (PSR)')
plt.plot(range(len(psrs)), psrs)