CarND - Behavioral Cloning - Data Augmentation Experiments


In [1]:
# 234567890123456789012345678901234567890123456789012345678901234567890123456789
import csv
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import random

In [2]:
# run options

verbose = True
veryVerbose = False
extremelyVerbose = False

doProcessedDatasetsOnly = False # process ONLY runs from the "secret" jungle track
skipBlockChecks = True # block checks are stress tests on our image pool and pipeline, for testing

In [3]:
# primary data directory
# -- for data as recorded directly from CarSim
dataDir = "./Data/"

if doProcessedDatasetsOnly:
    dataDir += "dropbox/"
    runNames = [ "ComplexForwardLaneLine" ]
else:
    #runNames = [ "Simple", "SimpleReversed", "ComplexForward", "UdacityData" ]
    runNames = [ "Simple", "SimpleReversed" ]

csvPathFormat = dataDir + "{}/driving_log.csv"
imgPathFormat = dataDir + "{}/IMG/{}"

left, center, right = "left", "center", "right"
cameras = [ left, center, right ]
cameraShifts = { left:-266, center:0, right:266 }

In [4]:
# process and pool each frame from all CarSim CSV capture files
captures = {}
allFrames = []

In [5]:
class CaptureFrame():
    """
    store all the filenames and related info for a single, multi-camera capture event 
    from the Udacity SDC CarSim
    """
    def __init__( self, run, index, path = None ):
        self._dict = {}
        self._dict["run"] = run
        self._dict["index"] = index
        self._dict["path"] = path
        self._dict["dateYearMonth"] = str()
        self._dict["dateDayID"] = str()
        
        self._numerics = {}
    
    def run( self ):
        return self._dict["run"]
    
    def index( self ):
        return self._dict["index"]
    
    def path( self ):
        return self._dict["path"]
    
    def setDateYearMonth( self, dateYearMonth ):
        self._dict["dateYearMonth"] = str( dateYearMonth )
        return
        
    def dateYearMonth( self ):
        return self._dict["dateYearMonth"]
    
    def setDateDayID( self, dateDayID ):
        self._dict["dateDayID"] = str( dateDayID )
        return
    
    def dateDayID( self ):
        return self._dict["dateDayID"]
    
    def setNumerics( self, steeringAngle, throttle, brake, speed, error = None ):
        self._numerics["steeringAngle"] = float( steeringAngle )
        self._numerics["throttle"] = float( throttle )
        self._numerics["brake"] = float( brake )
        self._numerics["speed"] = float( speed )
        if error:
            self._numerics["error"] = float( error )
        return
        
    def steeringAngle( self ):
        return self._numerics["steeringAngle"]
    
    def throttle( self ):
        return self._numerics["throttle"]
    
    def brake( self ):
        return self._numerics["brake"]
    
    def speed( self ):
        return self._numerics["speed"]
    
    def error( self ):
        return self._numerics["error"] if "error" in self._numerics else 0.0
    
    def imageFilename( self, camera = center ):
        if doProcessedDatasetsOnly:
            return "{}_{}_{}.jpg".format( self._dict["run"], captures[self._dict["run"]]["dateYearMonth"], self._dict["dateDayID"] )
        else:
            return "{}_{}_{}.jpg".format( camera, captures[self._dict["run"]]["dateYearMonth"], self._dict["dateDayID"] )

In [6]:
# read and process each drive-data file
for run in runNames:
    
    # read one complete csv file
    csvPath = csvPathFormat.format( run )
    print( "Processing CarSim CSV File:", csvPath )
    
    capture = {}
    captures[run] = capture
    
    frames = []
    capture["frames"] = frames
    
    # save the run and path in each frame, 
    # so let's cache a single copy to replicate throughout the dict
    frameRun = str(run)
    framePath = str(csvPath)
    
    with open( csvPath ) as csvFile:
        
        csvReader = csv.reader( csvFile )
        
        for csvLineNumber, csvLine in enumerate( csvReader ):
            
            # always skip the first line (to skip any headers)
            if run == "UdacityData" and csvLineNumber < 1:
                continue
                
            # setup a new frame
            frameIndex = len( frames )
            frame = CaptureFrame( frameRun, frameIndex, framePath )
            frames.append( frame )
            
            # processed CSV format is different from the original CarSim CSV format
            if doProcessedDatasetsOnly:
                
                # rather than store all the mostly similar filenames, we'll just store the unique parts
                centerFileName = csvLine[0].split('/')[-1] # such as, "ComplexForwardLaneLine_2017_04_02_08_35_20_4542.jpg"
                s = centerFileName[:-len(".jpg")].split("_") # list of split elements, such as ["ComplexForwardLaneLine","2017", ...]

                dateYearMonth = "_".join( s[1:3] ) # just the year and month parts, "2017_04"
                capture["dateYearMonth"] = dateYearMonth
                frame.setDateYearMonth( dateYearMonth )
                
                dateDayID = "_".join( s[3:] ) # just the day, time and ID parts, "02_08_35_20_4542"
                frame.setDateDayID( dateDayID )
                                                
            else:
                
                # rather than store all the mostly similar filenames, we'll just store the unique parts
                centerFileName = csvLine[0].split('/')[-1] # such as, "center_2017_03_21_00_20_10_332.jpg"
                s = centerFileName[:-len(".jpg")].split("_") # list of split elements, such as ["center","2017", ...]

                dateYearMonth = "_".join( s[1:3] ) # just the year and month parts, "2017_03"
                capture["dateYearMonth"] = dateYearMonth
                frame.setDateYearMonth( dateYearMonth )

                dateDayID = "_".join( s[3:] ) # just the day, time and ID parts, "21_00_20_10_332"
                frame.setDateDayID( dateDayID )
                
            # verify all is well
            if run is not "UdacityData":
                if veryVerbose:
                    print( "centerFileName =", centerFileName )
                    print( " imageFilename =", frame.imageFilename( center ) )
                    
                assert( centerFileName == frame.imageFilename( center ) ) # will fail if the capture happened while a month ended!

            # store the numerics (steeringAngle, throttle, brake, speed, and error) for this frame
            if len(csvLine) > 7:
                # our "processed" CSV format is different from the original CarSim CSV format
                error = float( csvLine[7] )
            else:
                # use the original format, but normalize the steeringAngle "error" between [-0.5, 0.5]
                error = float( csvLine[3] ) / 2
                
            frame.setNumerics( csvLine[3], csvLine[4], csvLine[5], csvLine[6], error )
    
    # also collect all the frames together, for when we don't care what run they're from
    allFrames += frames
    
# report the total
print( "Total frames available:", len(allFrames) )


Processing CarSim CSV File: ./Data/Simple/driving_log.csv
Processing CarSim CSV File: ./Data/SimpleReversed/driving_log.csv
Total frames available: 16362

In [7]:
# verify the first captured lines
if verbose:
    for run, capture in captures.items():
        print( "Verifying captures for", run )
        
        # test imageFilenameFromFrame()
        frame = capture["frames"][0]
        print( "-- first frame filename: ", frame.imageFilename( center ) )
        
        for frame in capture["frames"]:
            print( "- Frame", frame.index(), ":", frame )
            
            if not veryVerbose:
                break


Verifying captures for Simple
-- first frame filename:  center_2017_03_20_23_55_29_770.jpg
- Frame 0 : <__main__.CaptureFrame object at 0x7f58181a45f8>
Verifying captures for SimpleReversed
-- first frame filename:  center_2017_03_21_00_20_10_332.jpg
- Frame 0 : <__main__.CaptureFrame object at 0x7f5818479908>

In [8]:
# split the frames up into bins, sliced by their error
from collections import Counter

binnedPrecision = 2 # number of rounded digits

binnedCounts = Counter()
for frame in allFrames:
    binnedCounts[ round( frame.error(), binnedPrecision ) ] += 1

binnedFrames = dict()
for bin in binnedCounts:
    binnedFrames[bin] = list()
    
for frame in allFrames:
    binnedFrames[ round( frame.error(), binnedPrecision ) ] += [frame]

binnedKeys = list( binnedCounts )

In [9]:
bin = random.choice( binnedKeys )
bin


Out[9]:
0.12

In [ ]:


In [10]:
# return the path to get to an image
def imagePath( run, frameIndex, camera = center ):
    
    if extremelyVerbose:
        print( "imagePath(): run={} index={} camera={}".format( run, frameIndex, center ) )
        
    frames = captures[run]["frames"]
    imageFilename = frames[frameIndex].imageFilename( camera )
    imagePath = imgPathFormat.format( run, imageFilename )
    
    return imagePath

In [11]:
# return the hash of the path to get to an image
def imageHash( run, frameIndex, camera = center ):
    
    return hash( imagePath( run, frameIndex, camera ) )

In [12]:
# read one image file
def readOneImageFile( imagePath ):
    
    image = mpimg.imread( imagePath )
    
    if veryVerbose:
        print( "readOneImageFile():", imagePath, type(image), image.shape )
    
    return np.array( image )

In [13]:
class ImagePool():
    """store images and return them by hash"""
    def __init__( self, shape, fileName = None, initialSize = 5000, extensionSize = 2000 ):
        self.dict = {}
        self.dict["poolFileName"] = fileName
        
        self.imageHashes = { "shape": shape }
        
        cacheShape = ( initialSize, ) + shape
        self.extensionSize = extensionSize
        self.images = np.empty( shape = cacheShape, dtype = np.uint8 )
        self.nextIndex = 0

    def shape( self ):
        return self.imageHashes["shape"]
    
    def count( self ):
        return self.nextIndex
    
    def size( self ):
        return len( self.images )
    
    def getOneImage( self, run, frameIndex, camera = center ):
        """get one image for the given run, index, and camera"""
        
        oneImageHash = imageHash( run, frameIndex, camera )
        
        if veryVerbose:
            print( "ImagePool::getOneImage():", run, frameIndex, camera, "hash =", oneImageHash )
        
        # see if it's already in our cache
        if oneImageHash in self.imageHashes:
            
            imageIndex = self.imageHashes[ oneImageHash ]
            image = self.images[ imageIndex ]
            
            if veryVerbose:
                print( "ImagePool::getOneImage() CACHE HIT: ", oneImageHash, imageIndex, image.shape )
            
        else:
            image = readOneImageFile( imagePath( run, frameIndex, camera ) )
            
            assert( image.shape == self.shape() )
            
            image = np.reshape( image, (1,) + image.shape )
            imageIndex = int( self.nextIndex )
            self.nextIndex += 1
            
            # room enough?
            if self.nextIndex >= len( self.images ):
                cacheShape = ( self.extensionSize, ) + self.shape()
                self.images = np.append( self.images, 
                                np.empty( shape = cacheShape, dtype = np.uint8 ), 0 )
                
            # save the image in the cache
            self.images[ imageIndex ] = image
            self.imageHashes[ oneImageHash ] = imageIndex
            
            if veryVerbose:
                print( "ImagePool::getOneImage() CACHE MISS: ", oneImageHash, imageIndex, image.shape )
                print( "ImagePool::getOneImage() cache shape: ", self.images.shape )
        
        return image

In [14]:
imagePool = ImagePool( (160, 320, 3), "imagePool", initialSize = ( 1 + len(allFrames) // 1000 ) * 3000 )
print( "imagePool shape =", imagePool.shape() )


imagePool shape = (160, 320, 3)

In [15]:
# read in and display the HASH for all the images
if verbose:
    for run, capture in captures.items():
        print( "Hashing images for", run )

        for frameIndex, frame in enumerate( capture["frames"] ):

            # read each camera image
            for camera in cameras:

                # read one camera's image file and store it in this frame
                image = imagePool.getOneImage( run, frameIndex, camera )
                print( run, frameIndex, camera, type(image), imageHash( run, frameIndex, camera ) )
            
            if not veryVerbose:
                break


Hashing images for Simple
Simple 0 left <class 'numpy.ndarray'> 6014212017121842267
Simple 0 center <class 'numpy.ndarray'> -4105450827635942941
Simple 0 right <class 'numpy.ndarray'> -7309336852208040814
Hashing images for SimpleReversed
SimpleReversed 0 left <class 'numpy.ndarray'> -2251202771477767180
SimpleReversed 0 center <class 'numpy.ndarray'> 2761258479188749430
SimpleReversed 0 right <class 'numpy.ndarray'> 7527841049066440487

In [16]:
# DEBUG - let's just look at the first frames from each file
if verbose:
    for run, capture in captures.items():
        print( run )

        frameIndex = 0
        frame = capture["frames"][frameIndex]
        
        print( "- filename:", frame.imageFilename() )
        print( "- throttle:", frame.throttle(), type( frame.throttle() ) )
        print( "-    speed:", frame.speed(), type( frame.speed() ) )
        print( "-    error:", frame.error(), type( frame.error() ) )

        image = imagePool.getOneImage( run, frameIndex, center )

        print( "-    image:", image.shape, type(image) )
        print( "- TL pixel:", image[0][0] )


Simple
- filename: center_2017_03_20_23_55_29_770.jpg
- throttle: 0.07328288 <class 'float'>
-    speed: 8.5193 <class 'float'>
-    error: -0.05715605 <class 'float'>
-    image: (160, 320, 3) <class 'numpy.ndarray'>
- TL pixel: [121 150 192]
SimpleReversed
- filename: center_2017_03_21_00_20_10_332.jpg
- throttle: 0.06687795 <class 'float'>
-    speed: 8.030988 <class 'float'>
-    error: 0.005521045 <class 'float'>
-    image: (160, 320, 3) <class 'numpy.ndarray'>
- TL pixel: [11  6  0]

In [17]:
# plot three (or so) images, side-by-side, with a title
def plotImages( images, captions = None, title = None, img_shape = (160,320,3), filePath = None ):
    
    figure, axes = plt.subplots( 1, len(images), figsize = ( 9, 2 ), dpi = 160 )
    figure.subplots_adjust( hspace = 0.1, wspace = 0.1 )
    
    if title:
        figure.suptitle( title, fontweight = 'bold', fontsize = 14 )
        
    # plot each of the shift images and caption each with their presumed error (in pixels)
    for i, ax in enumerate(axes.flat):
        
        ax.imshow( images[i].reshape( img_shape ), cmap='binary' )
        ax.set_xlabel( captions[i] if captions else "", fontsize = 9 )
        
        ax.set_xticks([]) # remove ticks
        ax.set_yticks([])
    
    # save the figure
    if filePath:
        figure.savefig( filePath, bbox_inches='tight' )
        print( "Saved as: {}".format( filePath ) )
    
    # display the figure
    plt.show()

In [18]:
# print a frame's multiple camera angle images
def printFrameImages( frame, title = None, img_shape = (160,320,3), filePath = None ):
    
    images = [ None ] * 3
    
    for i, camera in enumerate(cameras):
        
        # plot one image
        images[i] = imagePool.getOneImage( frame.run(), frame.index(), camera )
        
    plotImages( images, cameras, title, img_shape, filePath )

In [19]:
# print some samples
if False:
    for frame in random.sample( allFrames, 20 ):
        title = "{}: {} (Error: {:6.4f})".format( frame.run(), frame.index(), frame.steeringAngle() )
        printFrameImages( frame, title )

In [20]:
import cv2

def shiftFrameImage( frame, camera = center, shift = random.random(), flip = False, img_shape = (160,320,3) ):
    
    # retrieve one image
    image = imagePool.getOneImage( frame.run(), frame.index(), camera ).reshape( img_shape )
    errorPixels = image.shape[1] * frame.error()
    
    # setup the margins for the new image size
    margin = 100 # reduce the total width of the image by this amount
    topMargin = 55
    bottomMargin = 25
    newShape = ( img_shape[0] - topMargin - bottomMargin, img_shape[1] - margin, img_shape[2] )
    
    # generate shifted image and new error amounts for the shifted image
    shiftFactor = ( shift - 0.5 ) * 2.0 # [-1.0, 1.0]
    
    # rotate the image (just a little, depending on our direction of shift)
    maxRotationDegrees = 6 + 4 * random.random()
    rotateM = cv2.getRotationMatrix2D( ( img_shape[1]/2, img_shape[0]-bottomMargin ),
                            - shiftFactor * maxRotationDegrees, 1 )
    rotatedImage = cv2.warpAffine( image, rotateM, (img_shape[1],img_shape[0]) )
    
    # shift the image
    shiftM = np.float32( [ [ 1, 0, -margin * shift ], [ 0, 1, -topMargin ] ] )
    shiftedImage = cv2.warpAffine( rotatedImage, shiftM, (newShape[1],newShape[0]) )
    
    # calc shifted error pixels
    shiftedErrorPixels = errorPixels + shiftFactor * margin + cameraShifts[ camera ]
    
    # prepare to flip about y-axis, if requested
    if flip:
        
        shiftedErrorPixels = -shiftedErrorPixels
        
        flipM = np.float32( [ [ -1, 0, newShape[1] ], [ 0, 1, 0 ] ] )
        finalImage = cv2.warpAffine( shiftedImage, flipM, (newShape[1],newShape[0]) )
        
    else:
        finalImage = shiftedImage
        
    return finalImage, shiftedErrorPixels, newShape

In [21]:
# print a frame's multiple camera angle images
def printFrameShifts( frame, camera = center, title = None, img_shape = (160,320,3), filePath = None, randomize = False, flip = True ):
    
    # randomly shift the image left or right (exactly 0.5 is no shift at all)
    shift = random.random() # [0.0, 1.0]
        
    # for this routine, generate three images and error offsets for each
    images = [ None ] * 3
    captions = [0.] * 3
    caption = "shift={:6.4f} error={:6.4f} {}"
    
    # randomize? if not, use the maximum shifts on the left and right, no shift (0.5) in the center
    leftShift, rightShift = ( random.random() * 0.5, 0.5 + random.random() * 0.5 ) if randomize else (0.0,1.0)

    # generate shifted images
    for i, shift in enumerate( [ leftShift, 0.5, rightShift ] ):
        
        flipImage = flip and (i != 1) # flip left and right, but not center image
        images[i], shiftedErrorPixels, newShape = shiftFrameImage( 
                                            frame, camera, shift, flip = flipImage, img_shape = img_shape )
        captions[i] = caption.format( shift, shiftedErrorPixels, "flipped" if flipImage else "" )
    
    plotImages( images, captions, title, newShape, filePath )

In [22]:
# print some samples
if False:
    for frame in random.sample( allFrames, 20 ):
        camera = random.choice( cameras )
        title = "{}: {} {}".format( frame.run(), frame.index(), camera )
        printFrameShifts( frame, camera, title, randomize = False, flip = False )

In [23]:
import cv2

# mask the image to a hot zone where we expect to find lane lines
def randomShadowMask( image ):
    
    shape = image.shape
    
    # create random vertices
    halfWidth = shape[1] / 2
    maskTopLeft = [ halfWidth - shape[1] * random.random(), shape[0] * random.random() * 0.9 ]
    maskTopRight = [ halfWidth + shape[1] * random.random(), shape[0] * random.random() * 0.9 ]
    maskBottomRight = [ halfWidth * ( 1.0 + random.random() ), shape[0] ]
    maskBottomLeft = [ halfWidth * random.random(), shape[0] ]    
    
    # create a single array of all the mask vertices
    maskVertices = [ np.array( [
            maskTopLeft,maskTopRight,
            maskBottomRight,
            maskBottomLeft ], 
        np.int32) ]
    
    # debug vertices
    if False:
        print("maskTopLeft =",maskTopLeft)
        print("maskTopRight =",maskTopRight)
        print("maskBottomRight =",maskBottomRight)
        print("maskBottomLeft =",maskBottomLeft)
        print(maskVertices)
        
    # start with blank mask
    regionMask = np.zeros_like(image)
    
    # create a 1, 3, or 4 channel fill color depending on the depth of the input image
    if len(image.shape) > 2:
        channelCount = image.shape[2]  # 3 or 4 depending on if there's an alpha channel in our image
        ignoreColor = (255,) * channelCount
    else:
        ignoreColor = 255
        
    #filling pixels inside the polygon defined by maskVertices with the fill color    
    cv2.fillPoly(regionMask, maskVertices, ignoreColor)
    
    return regionMask

def imageRandomShadow( image ):

    mask = randomShadowMask( image )
    
    image = cv2.addWeighted( mask, -(0.1 + 0.5 * random.random()), image, 1.0, 1.0 )
    
    return image

In [24]:
# gamma correction
import cv2
from collections import defaultdict
import pickle

# read the pickled gammaTables for faster startup
try:
    with open( "gammaTables.p", "rb" ) as gammaFile:
        gt = pickle.load( gammaFile )
        gammaTables = defaultdict( lambda: None, [ (gamma,gt[gamma]) for gamma in gt.keys() ] )
except:
    # not available, so just create an empty set
    gammaTables = defaultdict( lambda: None )

# gamma correct the supplied image
def imageGammaCorrection( image, gamma = 1.0, range = ( 0, 255 ) ):
    
    # cache the gamma tables we generate
    global gammaTables
    
    # gamma-correct the image (1.0 means no change)
    if gamma == 1.0:
        
        # no change
        finalImage = image
    
    else:
        # check our cache for the gamma table
        gammaTable = gammaTables[ gamma ]
        
        if gammaTable is None: # cache miss
            
            # build the table for this one gamma
            inverseGamma = 1.0 / gamma
            gammaTable = np.array( [ 
                ( ( pv / float( range[1] ) ) ** inverseGamma ) * range[1]
                    for pv in np.arange( range[0], range[1] + 1 )
            ] ).astype("uint8")
            
            # cache the table for quicker lookups later
            gammaTables[ gamma ] = gammaTable
        
        # correct the image gamma using the generated (or cached) gamma table
        finalImage = cv2.LUT( image, gammaTable )
        
    return finalImage

In [25]:
# print a frame with darker and lighter gamma
def printFrameGammas( frame, title = None, img_shape = (160,320,3), 
                     filePath = None, randomize = False, randomShadows = None ):
    
    # for this routine, generate three images and error offsets for each
    images = [ None ] * 3
    captions = [0.] * 3
    caption = "gamma={:3.2f}"
    
    # randomize? if not, use the maximum shifts on the left and right, no shift (0.5) in the center
    leftGamma, rightGamma = ( round(0.25 + random.random() * 0.75, 1), round(1.0 + random.random() * 2.0, 1) ) \
                                if randomize else ( 0.25, 2.5 )

    # generate shifted images
    for i, gamma in enumerate( [ leftGamma, 1.0, rightGamma ] ):
        
        image = imagePool.getOneImage( frame.run(), frame.index(), center ).reshape( img_shape )
        
        if randomShadows and ( randomShadows >= random.random() ):
            image = imageRandomShadow( image )
        
        image = imageGammaCorrection( image, gamma )
        
        images[i] = image
        captions[i] = caption.format( gamma )
    
    plotImages( images, captions, title, img_shape, filePath )

In [26]:
# print some samples
if False:
    for frame in random.sample( allFrames, 20 ):
        title = "{}: {}".format( frame.run(), frame.index() )
        printFrameGammas( frame, title, randomize = True, randomShadows = 0.25 )

In [27]:
from datetime import datetime
startTime = datetime.now()
framesPerBlock = 10000

if skipBlockChecks:
    print( "Skipping block check:", startTime )
    blocks = 0
else:
    print( "Starting block check:", startTime )
    blocks = 30

totalFrames = blocks * framesPerBlock

for block in range( blocks ):
    
    blockTime = datetime.now()
    for frame in random.sample( allFrames, framesPerBlock ):
        
        # randomly shift the image left or right (exactly 0.5 is no shift at all)
        shift = random.random() # [0.0, 1.0]
        flip = random.randint(0,1)
        camera = random.choice( cameras )
        image, shiftedErrorPixels, newShape = shiftFrameImage( frame, camera = camera, shift = shift, flip = flip )
        
        gamma = round( 0.25 + random.random() * 2.75, 2 )
        modImage = imageGammaCorrection( image, gamma )
    
    msg = "Block {} \tTime: {}\tImageShape: {}"
    print( msg.format( block, datetime.now() - blockTime, image.shape ) )
    
print( "Total Time ({} images):".format( totalFrames ), datetime.now() - startTime )


Skipping block check: 2017-04-09 09:19:47.709508
Total Time (0 images): 0:00:00.000994

In [28]:
imagePool.count(),imagePool.size()


Out[28]:
(6, 51000)

In [29]:
# pickle the gammaTables for faster startup later
import pickle
if True:
    with open( "gammaTables.p", "wb" ) as gammaFile:
        pickle.dump( { gamma: gammaTables[gamma] for gamma in gammaTables.keys() }, gammaFile )

# here's how to read the gammaTables back in
if False:
    try:
        with open( "gammaTables.p", "rb" ) as gammaFile:
            gt = pickle.load( gammaFile )
            gammaTables = defaultdict( lambda: None, [ (gamma,gt[gamma]) for gamma in gt.keys() ] )
    except:
        gammaTables = defaultdict( lambda: None )

    print(gammaTables)

In [30]:
# assert(False) # stop here

Joystix Experiments


In [31]:
if False:
    from collections import defaultdict
    import numpy as np
    import sys

    def processJoystickEvent( buffer ):

        # get the event type, and either the button or axis, depending on the event type
        items = np.frombuffer( buffer, dtype = np.uint8, count = 2, offset = 6 )
        event = items[0]
        buttonOrAxis = items[1]

        # get the value of the button or joystick axis
        value = np.frombuffer( buffer, dtype = np.int16, count = 1, offset = 4 )[0]

        # get the time in milliseconds (since when?) of the event
        time = np.frombuffer( buffer, dtype = np.uint32, count = 1, offset = 0 )[0]

        return ( event, buttonOrAxis, value, time )

    with open( '/dev/input/js0', 'rb' ) as joystick:

        dataFrame = []
        buffer = np.zeros( shape = (16,), dtype = np.uint8 ) # twice as big as we need (unicode?)

        events = 0
        while events < 300:
            joystick.flush()
            for c in joystick.read(1):

                buffer[len(dataFrame)] = np.uint8(c)
                dataFrame += [c]

                if len(dataFrame) >= 8:

                    event, axis, value, time = processJoystickEvent( buffer[:8] )

                    if event == 1:
                        eventType = "[button-pressed] "
                        axisOrButton = "button"
                    elif event == 2:
                        eventType = "[axis-moved] "
                        axisOrButton = "axis"
                    elif event == 129:
                        eventType = "[initial-value] "
                        axisOrButton = "button"
                    elif event == 130:
                        eventType = "[initial-axis] "
                        axisOrButton = "axis"
                    else:
                        eventType = "[unknown{}] ".format( event )
                        axisOrButton = "device"

                    sys.stdout.write( "Joystick event " + eventType )
                    sys.stdout.write( "on " + axisOrButton + str(axis) )
                    sys.stdout.write( ": value = " + str(value) + " " )
                    sys.stdout.write( "at time = " + str(time) + "\n" )
                    sys.stdout.flush()

                    dataFrame = dataFrame[8:]
                    events += 1

Keras Model Training


In [32]:
# build up one dataset
if False:
    groups = 20
    samples = 1000
    size = groups * samples

    print( "Building Dataset with {} groups of {} samples".format( groups, samples ) )

    X_train = np.zeros( ( size, 80, 220, 3 ) )
    y_train = np.zeros( ( size, ) )

    index = 0
    for group in range( groups ):
        print( "- Group", group )
        for frame in random.sample( allFrames, samples ):

            # randomly shift the image left or right (exactly 0.5 is no shift at all)
            shift = random.random() # [0.0, 1.0]
            flip = random.randint(0,1)
            image, shiftedErrorPixels, newShape = shiftFrameImage( frame, shift = shift, flip = flip )

            gamma = round( 0.25 + random.random() * 2.75, 2 )
            gammaImage = imageGammaCorrection( image, gamma )

            X_train[index] = gammaImage
            y_train[index] = shiftedErrorPixels
            index += 1

In [33]:
randomShadows = 0.10 # percent of images to add psuedo-shadows

def trainingBatchGenerator_OLDVERSION( batchSize = 128 ):
    
    trainingImages = np.zeros( ( batchSize, 80, 220, 3 ) )
    trainingLabels = np.zeros( ( batchSize, 1 ) )
    
    while True: # yield below
        index = 0
        for frame in random.sample( allFrames, batchSize ):

            # randomly shift the image left or right (exactly 0.5 is no shift at all)
            shift = random.random() # [0.0, 1.0]
            flip = random.randint(0,1)
            image, shiftedErrorPixels, newShape = shiftFrameImage( frame, shift = shift, flip = flip )
            
            # occasionally add psuedo-shadows, if requested
            if randomShadows and ( randomShadows >= random.random() ):
                image = imageRandomShadow( image )
            
            # slightly shift the image gamma darker or brighter
            gamma = round( 0.25 + random.random() * 2.75, 2 )
            gammaImage = imageGammaCorrection( image, gamma )
            
            # add this one modified image to the training set
            trainingImages[index] = gammaImage
            trainingLabels[index] = shiftedErrorPixels
            index += 1
            
        yield trainingImages, trainingLabels

In [34]:
randomShadows = 0.10 # percent of images to add psuedo-shadows

def trainingBatchGenerator( batchSize = 128 ):
    
    trainingImages = np.zeros( ( batchSize, 80, 220, 3 ) )
    trainingLabels = np.zeros( ( batchSize, 1 ) )
    
    bins = list( binnedCounts )
    
    while True: # yield below
        for index in range( batchSize ):
            
            # choose evenly between our binned frames, so that the over-represented steering angles
            # get chosen only as often as the under-represented ones
            bin = random.choice( binnedKeys )
            frames = binnedFrames[ bin ]
            frame = random.choice( frames )

            # randomly shift the image left or right (exactly 0.5 is no shift at all)
            shift = random.random() # [0.0, 1.0]
            flip = random.randint(0,1)
            camera = random.choice( cameras )

            image, shiftedErrorPixels, newShape = shiftFrameImage( 
                                            frame, camera = camera, shift = shift, flip = flip )
            
            # occasionally add psuedo-shadows, if requested
            if randomShadows and ( randomShadows >= random.random() ):
                image = imageRandomShadow( image )
            
            # slightly shift the image gamma darker or brighter
            gamma = round( 0.25 + random.random() * 2.75, 2 )
            gammaImage = imageGammaCorrection( image, gamma )
            
            # add this one modified image to the training set
            trainingImages[index] = gammaImage
            trainingLabels[index] = shiftedErrorPixels
            index += 1
            
        yield trainingImages, trainingLabels

In [38]:
from keras.models import Sequential
from keras.layers import Lambda, Convolution2D, Flatten, Dense, Dropout
from keras.layers.advanced_activations import ELU
from keras.layers.advanced_activations import PReLU
from keras.regularizers import l2 as L2

model = Sequential()

# input layer: normalize pixels from [0,255] to [-1.0,1.0]
model.add( Lambda( lambda x: x/127.5 - 1.0, input_shape = ( 80, 220, 3 ) ) )

# Conv 1: 5 x 5 convolution layer with 32 filters and 2 x 2 stride
model.add( Convolution2D( 32, 5, 5, subsample=(2, 2), border_mode='valid', W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )

# Conv 2: 5 x 5 convolution layer with 48 filters and 2 x 2 stride
model.add( Convolution2D( 48, 5, 5, subsample=(2, 2), border_mode='valid', W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )

# Conv 3: 5 x 5 convolution layer with 64 filters and 2 x 2 stride
model.add( Convolution2D( 64, 5, 5, subsample=(2, 2), border_mode='valid', W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )

# Conv 4: 3 x 3 convolution layer with 128 filters and 1 x 1 stride
model.add( Convolution2D( 128, 3, 3, subsample=(1, 1), border_mode='valid', W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )

# Conv 5: 3 x 3 convolution layer with 128 filters and 1 x 1 stride
model.add( Convolution2D( 128, 3, 3, subsample=(1, 1), border_mode='valid', W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )

# flatten 2D layers to 1D fully-connected layer
model.add( Flatten() )

# FC 1: Fully-Connected layer with 128 nodes, dropout-keep 60% between FC layers
model.add( Dense( 128, W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )
model.add( Dropout( 0.60 ) )

# FC 2: Fully-Connected layer with 64 nodes, dropout-keep 60% between FC layers
model.add( Dense( 64, W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )
model.add( Dropout( 0.60 ) )

# FC 3: Fully-Connected layer with 32 nodes, dropout-keep 60% between FC layers
model.add( Dense( 32, W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )
model.add( Dropout( 0.60 ) )
    
# finally, boil output down to a single inferred error prediction
model.add( Dense( 1 ) )

# too big?
print( "Model Parameters:", model.count_params() )
    
# compile and fit
model.compile( loss = "mse", optimizer = "adam" )
model.fit_generator( trainingBatchGenerator( batchSize = 200 ), 
                    samples_per_epoch = 50000,
                    nb_epoch = 20 )

# save it (TODO: randomize filename)
model.save( "model.h5" )


Model Parameters: 1539217
Epoch 1/20
50000/50000 [==============================] - 57s - loss: 43337.5100      
Epoch 2/20
50000/50000 [==============================] - 56s - loss: 29219.3616     
Epoch 3/20
50000/50000 [==============================] - 56s - loss: 22432.5454     
Epoch 4/20
50000/50000 [==============================] - 56s - loss: 18984.3424     
Epoch 5/20
50000/50000 [==============================] - 56s - loss: 17322.4639     
Epoch 6/20
50000/50000 [==============================] - 56s - loss: 16604.7118     
Epoch 7/20
50000/50000 [==============================] - 56s - loss: 15476.8723     
Epoch 8/20
50000/50000 [==============================] - 56s - loss: 14906.9206     
Epoch 9/20
50000/50000 [==============================] - 56s - loss: 14432.1337     
Epoch 10/20
50000/50000 [==============================] - 56s - loss: 13640.6626     
Epoch 11/20
50000/50000 [==============================] - 56s - loss: 13481.7462     
Epoch 12/20
50000/50000 [==============================] - 56s - loss: 13395.7265     
Epoch 13/20
50000/50000 [==============================] - 56s - loss: 12754.0030     
Epoch 14/20
50000/50000 [==============================] - 56s - loss: 12831.6605     
Epoch 15/20
50000/50000 [==============================] - 56s - loss: 12410.0160     
Epoch 16/20
50000/50000 [==============================] - 56s - loss: 12389.2505     
Epoch 17/20
50000/50000 [==============================] - 56s - loss: 12217.3681     
Epoch 18/20
50000/50000 [==============================] - 56s - loss: 11889.3872     
Epoch 19/20
50000/50000 [==============================] - 56s - loss: 11780.7157     
Epoch 20/20
50000/50000 [==============================] - 56s - loss: 11537.3974     

In [ ]:
frame = allFrames[0]
image = imagePool.getOneImage( frame.run(), frame.index(), center )
img_shape = image.shape

print( image.shape )

In [ ]: