In [1]:
# 234567890123456789012345678901234567890123456789012345678901234567890123456789
import csv
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import random
In [2]:
# run options
verbose = True
veryVerbose = False
extremelyVerbose = False
doProcessedDatasetsOnly = False # process ONLY runs from the "secret" jungle track
skipBlockChecks = True # block checks are stress tests on our image pool and pipeline, for testing
In [3]:
# primary data directory
# -- for data as recorded directly from CarSim
dataDir = "./Data/"
if doProcessedDatasetsOnly:
dataDir += "dropbox/"
runNames = [ "ComplexForwardLaneLine" ]
else:
#runNames = [ "Simple", "SimpleReversed", "ComplexForward", "UdacityData" ]
runNames = [ "Simple", "SimpleReversed" ]
csvPathFormat = dataDir + "{}/driving_log.csv"
imgPathFormat = dataDir + "{}/IMG/{}"
left, center, right = "left", "center", "right"
cameras = [ left, center, right ]
cameraShifts = { left:-266, center:0, right:266 }
In [4]:
# process and pool each frame from all CarSim CSV capture files
captures = {}
allFrames = []
In [5]:
class CaptureFrame():
"""
store all the filenames and related info for a single, multi-camera capture event
from the Udacity SDC CarSim
"""
def __init__( self, run, index, path = None ):
self._dict = {}
self._dict["run"] = run
self._dict["index"] = index
self._dict["path"] = path
self._dict["dateYearMonth"] = str()
self._dict["dateDayID"] = str()
self._numerics = {}
def run( self ):
return self._dict["run"]
def index( self ):
return self._dict["index"]
def path( self ):
return self._dict["path"]
def setDateYearMonth( self, dateYearMonth ):
self._dict["dateYearMonth"] = str( dateYearMonth )
return
def dateYearMonth( self ):
return self._dict["dateYearMonth"]
def setDateDayID( self, dateDayID ):
self._dict["dateDayID"] = str( dateDayID )
return
def dateDayID( self ):
return self._dict["dateDayID"]
def setNumerics( self, steeringAngle, throttle, brake, speed, error = None ):
self._numerics["steeringAngle"] = float( steeringAngle )
self._numerics["throttle"] = float( throttle )
self._numerics["brake"] = float( brake )
self._numerics["speed"] = float( speed )
if error:
self._numerics["error"] = float( error )
return
def steeringAngle( self ):
return self._numerics["steeringAngle"]
def throttle( self ):
return self._numerics["throttle"]
def brake( self ):
return self._numerics["brake"]
def speed( self ):
return self._numerics["speed"]
def error( self ):
return self._numerics["error"] if "error" in self._numerics else 0.0
def imageFilename( self, camera = center ):
if doProcessedDatasetsOnly:
return "{}_{}_{}.jpg".format( self._dict["run"], captures[self._dict["run"]]["dateYearMonth"], self._dict["dateDayID"] )
else:
return "{}_{}_{}.jpg".format( camera, captures[self._dict["run"]]["dateYearMonth"], self._dict["dateDayID"] )
In [6]:
# read and process each drive-data file
for run in runNames:
# read one complete csv file
csvPath = csvPathFormat.format( run )
print( "Processing CarSim CSV File:", csvPath )
capture = {}
captures[run] = capture
frames = []
capture["frames"] = frames
# save the run and path in each frame,
# so let's cache a single copy to replicate throughout the dict
frameRun = str(run)
framePath = str(csvPath)
with open( csvPath ) as csvFile:
csvReader = csv.reader( csvFile )
for csvLineNumber, csvLine in enumerate( csvReader ):
# always skip the first line (to skip any headers)
if run == "UdacityData" and csvLineNumber < 1:
continue
# setup a new frame
frameIndex = len( frames )
frame = CaptureFrame( frameRun, frameIndex, framePath )
frames.append( frame )
# processed CSV format is different from the original CarSim CSV format
if doProcessedDatasetsOnly:
# rather than store all the mostly similar filenames, we'll just store the unique parts
centerFileName = csvLine[0].split('/')[-1] # such as, "ComplexForwardLaneLine_2017_04_02_08_35_20_4542.jpg"
s = centerFileName[:-len(".jpg")].split("_") # list of split elements, such as ["ComplexForwardLaneLine","2017", ...]
dateYearMonth = "_".join( s[1:3] ) # just the year and month parts, "2017_04"
capture["dateYearMonth"] = dateYearMonth
frame.setDateYearMonth( dateYearMonth )
dateDayID = "_".join( s[3:] ) # just the day, time and ID parts, "02_08_35_20_4542"
frame.setDateDayID( dateDayID )
else:
# rather than store all the mostly similar filenames, we'll just store the unique parts
centerFileName = csvLine[0].split('/')[-1] # such as, "center_2017_03_21_00_20_10_332.jpg"
s = centerFileName[:-len(".jpg")].split("_") # list of split elements, such as ["center","2017", ...]
dateYearMonth = "_".join( s[1:3] ) # just the year and month parts, "2017_03"
capture["dateYearMonth"] = dateYearMonth
frame.setDateYearMonth( dateYearMonth )
dateDayID = "_".join( s[3:] ) # just the day, time and ID parts, "21_00_20_10_332"
frame.setDateDayID( dateDayID )
# verify all is well
if run is not "UdacityData":
if veryVerbose:
print( "centerFileName =", centerFileName )
print( " imageFilename =", frame.imageFilename( center ) )
assert( centerFileName == frame.imageFilename( center ) ) # will fail if the capture happened while a month ended!
# store the numerics (steeringAngle, throttle, brake, speed, and error) for this frame
if len(csvLine) > 7:
# our "processed" CSV format is different from the original CarSim CSV format
error = float( csvLine[7] )
else:
# use the original format, but normalize the steeringAngle "error" between [-0.5, 0.5]
error = float( csvLine[3] ) / 2
frame.setNumerics( csvLine[3], csvLine[4], csvLine[5], csvLine[6], error )
# also collect all the frames together, for when we don't care what run they're from
allFrames += frames
# report the total
print( "Total frames available:", len(allFrames) )
In [7]:
# verify the first captured lines
if verbose:
for run, capture in captures.items():
print( "Verifying captures for", run )
# test imageFilenameFromFrame()
frame = capture["frames"][0]
print( "-- first frame filename: ", frame.imageFilename( center ) )
for frame in capture["frames"]:
print( "- Frame", frame.index(), ":", frame )
if not veryVerbose:
break
In [8]:
# split the frames up into bins, sliced by their error
from collections import Counter
binnedPrecision = 2 # number of rounded digits
binnedCounts = Counter()
for frame in allFrames:
binnedCounts[ round( frame.error(), binnedPrecision ) ] += 1
binnedFrames = dict()
for bin in binnedCounts:
binnedFrames[bin] = list()
for frame in allFrames:
binnedFrames[ round( frame.error(), binnedPrecision ) ] += [frame]
binnedKeys = list( binnedCounts )
In [9]:
bin = random.choice( binnedKeys )
bin
Out[9]:
In [ ]:
In [10]:
# return the path to get to an image
def imagePath( run, frameIndex, camera = center ):
if extremelyVerbose:
print( "imagePath(): run={} index={} camera={}".format( run, frameIndex, center ) )
frames = captures[run]["frames"]
imageFilename = frames[frameIndex].imageFilename( camera )
imagePath = imgPathFormat.format( run, imageFilename )
return imagePath
In [11]:
# return the hash of the path to get to an image
def imageHash( run, frameIndex, camera = center ):
return hash( imagePath( run, frameIndex, camera ) )
In [12]:
# read one image file
def readOneImageFile( imagePath ):
image = mpimg.imread( imagePath )
if veryVerbose:
print( "readOneImageFile():", imagePath, type(image), image.shape )
return np.array( image )
In [13]:
class ImagePool():
"""store images and return them by hash"""
def __init__( self, shape, fileName = None, initialSize = 5000, extensionSize = 2000 ):
self.dict = {}
self.dict["poolFileName"] = fileName
self.imageHashes = { "shape": shape }
cacheShape = ( initialSize, ) + shape
self.extensionSize = extensionSize
self.images = np.empty( shape = cacheShape, dtype = np.uint8 )
self.nextIndex = 0
def shape( self ):
return self.imageHashes["shape"]
def count( self ):
return self.nextIndex
def size( self ):
return len( self.images )
def getOneImage( self, run, frameIndex, camera = center ):
"""get one image for the given run, index, and camera"""
oneImageHash = imageHash( run, frameIndex, camera )
if veryVerbose:
print( "ImagePool::getOneImage():", run, frameIndex, camera, "hash =", oneImageHash )
# see if it's already in our cache
if oneImageHash in self.imageHashes:
imageIndex = self.imageHashes[ oneImageHash ]
image = self.images[ imageIndex ]
if veryVerbose:
print( "ImagePool::getOneImage() CACHE HIT: ", oneImageHash, imageIndex, image.shape )
else:
image = readOneImageFile( imagePath( run, frameIndex, camera ) )
assert( image.shape == self.shape() )
image = np.reshape( image, (1,) + image.shape )
imageIndex = int( self.nextIndex )
self.nextIndex += 1
# room enough?
if self.nextIndex >= len( self.images ):
cacheShape = ( self.extensionSize, ) + self.shape()
self.images = np.append( self.images,
np.empty( shape = cacheShape, dtype = np.uint8 ), 0 )
# save the image in the cache
self.images[ imageIndex ] = image
self.imageHashes[ oneImageHash ] = imageIndex
if veryVerbose:
print( "ImagePool::getOneImage() CACHE MISS: ", oneImageHash, imageIndex, image.shape )
print( "ImagePool::getOneImage() cache shape: ", self.images.shape )
return image
In [14]:
imagePool = ImagePool( (160, 320, 3), "imagePool", initialSize = ( 1 + len(allFrames) // 1000 ) * 3000 )
print( "imagePool shape =", imagePool.shape() )
In [15]:
# read in and display the HASH for all the images
if verbose:
for run, capture in captures.items():
print( "Hashing images for", run )
for frameIndex, frame in enumerate( capture["frames"] ):
# read each camera image
for camera in cameras:
# read one camera's image file and store it in this frame
image = imagePool.getOneImage( run, frameIndex, camera )
print( run, frameIndex, camera, type(image), imageHash( run, frameIndex, camera ) )
if not veryVerbose:
break
In [16]:
# DEBUG - let's just look at the first frames from each file
if verbose:
for run, capture in captures.items():
print( run )
frameIndex = 0
frame = capture["frames"][frameIndex]
print( "- filename:", frame.imageFilename() )
print( "- throttle:", frame.throttle(), type( frame.throttle() ) )
print( "- speed:", frame.speed(), type( frame.speed() ) )
print( "- error:", frame.error(), type( frame.error() ) )
image = imagePool.getOneImage( run, frameIndex, center )
print( "- image:", image.shape, type(image) )
print( "- TL pixel:", image[0][0] )
In [17]:
# plot three (or so) images, side-by-side, with a title
def plotImages( images, captions = None, title = None, img_shape = (160,320,3), filePath = None ):
figure, axes = plt.subplots( 1, len(images), figsize = ( 9, 2 ), dpi = 160 )
figure.subplots_adjust( hspace = 0.1, wspace = 0.1 )
if title:
figure.suptitle( title, fontweight = 'bold', fontsize = 14 )
# plot each of the shift images and caption each with their presumed error (in pixels)
for i, ax in enumerate(axes.flat):
ax.imshow( images[i].reshape( img_shape ), cmap='binary' )
ax.set_xlabel( captions[i] if captions else "", fontsize = 9 )
ax.set_xticks([]) # remove ticks
ax.set_yticks([])
# save the figure
if filePath:
figure.savefig( filePath, bbox_inches='tight' )
print( "Saved as: {}".format( filePath ) )
# display the figure
plt.show()
In [18]:
# print a frame's multiple camera angle images
def printFrameImages( frame, title = None, img_shape = (160,320,3), filePath = None ):
images = [ None ] * 3
for i, camera in enumerate(cameras):
# plot one image
images[i] = imagePool.getOneImage( frame.run(), frame.index(), camera )
plotImages( images, cameras, title, img_shape, filePath )
In [19]:
# print some samples
if False:
for frame in random.sample( allFrames, 20 ):
title = "{}: {} (Error: {:6.4f})".format( frame.run(), frame.index(), frame.steeringAngle() )
printFrameImages( frame, title )
In [20]:
import cv2
def shiftFrameImage( frame, camera = center, shift = random.random(), flip = False, img_shape = (160,320,3) ):
# retrieve one image
image = imagePool.getOneImage( frame.run(), frame.index(), camera ).reshape( img_shape )
errorPixels = image.shape[1] * frame.error()
# setup the margins for the new image size
margin = 100 # reduce the total width of the image by this amount
topMargin = 55
bottomMargin = 25
newShape = ( img_shape[0] - topMargin - bottomMargin, img_shape[1] - margin, img_shape[2] )
# generate shifted image and new error amounts for the shifted image
shiftFactor = ( shift - 0.5 ) * 2.0 # [-1.0, 1.0]
# rotate the image (just a little, depending on our direction of shift)
maxRotationDegrees = 6 + 4 * random.random()
rotateM = cv2.getRotationMatrix2D( ( img_shape[1]/2, img_shape[0]-bottomMargin ),
- shiftFactor * maxRotationDegrees, 1 )
rotatedImage = cv2.warpAffine( image, rotateM, (img_shape[1],img_shape[0]) )
# shift the image
shiftM = np.float32( [ [ 1, 0, -margin * shift ], [ 0, 1, -topMargin ] ] )
shiftedImage = cv2.warpAffine( rotatedImage, shiftM, (newShape[1],newShape[0]) )
# calc shifted error pixels
shiftedErrorPixels = errorPixels + shiftFactor * margin + cameraShifts[ camera ]
# prepare to flip about y-axis, if requested
if flip:
shiftedErrorPixels = -shiftedErrorPixels
flipM = np.float32( [ [ -1, 0, newShape[1] ], [ 0, 1, 0 ] ] )
finalImage = cv2.warpAffine( shiftedImage, flipM, (newShape[1],newShape[0]) )
else:
finalImage = shiftedImage
return finalImage, shiftedErrorPixels, newShape
In [21]:
# print a frame's multiple camera angle images
def printFrameShifts( frame, camera = center, title = None, img_shape = (160,320,3), filePath = None, randomize = False, flip = True ):
# randomly shift the image left or right (exactly 0.5 is no shift at all)
shift = random.random() # [0.0, 1.0]
# for this routine, generate three images and error offsets for each
images = [ None ] * 3
captions = [0.] * 3
caption = "shift={:6.4f} error={:6.4f} {}"
# randomize? if not, use the maximum shifts on the left and right, no shift (0.5) in the center
leftShift, rightShift = ( random.random() * 0.5, 0.5 + random.random() * 0.5 ) if randomize else (0.0,1.0)
# generate shifted images
for i, shift in enumerate( [ leftShift, 0.5, rightShift ] ):
flipImage = flip and (i != 1) # flip left and right, but not center image
images[i], shiftedErrorPixels, newShape = shiftFrameImage(
frame, camera, shift, flip = flipImage, img_shape = img_shape )
captions[i] = caption.format( shift, shiftedErrorPixels, "flipped" if flipImage else "" )
plotImages( images, captions, title, newShape, filePath )
In [22]:
# print some samples
if False:
for frame in random.sample( allFrames, 20 ):
camera = random.choice( cameras )
title = "{}: {} {}".format( frame.run(), frame.index(), camera )
printFrameShifts( frame, camera, title, randomize = False, flip = False )
In [23]:
import cv2
# mask the image to a hot zone where we expect to find lane lines
def randomShadowMask( image ):
shape = image.shape
# create random vertices
halfWidth = shape[1] / 2
maskTopLeft = [ halfWidth - shape[1] * random.random(), shape[0] * random.random() * 0.9 ]
maskTopRight = [ halfWidth + shape[1] * random.random(), shape[0] * random.random() * 0.9 ]
maskBottomRight = [ halfWidth * ( 1.0 + random.random() ), shape[0] ]
maskBottomLeft = [ halfWidth * random.random(), shape[0] ]
# create a single array of all the mask vertices
maskVertices = [ np.array( [
maskTopLeft,maskTopRight,
maskBottomRight,
maskBottomLeft ],
np.int32) ]
# debug vertices
if False:
print("maskTopLeft =",maskTopLeft)
print("maskTopRight =",maskTopRight)
print("maskBottomRight =",maskBottomRight)
print("maskBottomLeft =",maskBottomLeft)
print(maskVertices)
# start with blank mask
regionMask = np.zeros_like(image)
# create a 1, 3, or 4 channel fill color depending on the depth of the input image
if len(image.shape) > 2:
channelCount = image.shape[2] # 3 or 4 depending on if there's an alpha channel in our image
ignoreColor = (255,) * channelCount
else:
ignoreColor = 255
#filling pixels inside the polygon defined by maskVertices with the fill color
cv2.fillPoly(regionMask, maskVertices, ignoreColor)
return regionMask
def imageRandomShadow( image ):
mask = randomShadowMask( image )
image = cv2.addWeighted( mask, -(0.1 + 0.5 * random.random()), image, 1.0, 1.0 )
return image
In [24]:
# gamma correction
import cv2
from collections import defaultdict
import pickle
# read the pickled gammaTables for faster startup
try:
with open( "gammaTables.p", "rb" ) as gammaFile:
gt = pickle.load( gammaFile )
gammaTables = defaultdict( lambda: None, [ (gamma,gt[gamma]) for gamma in gt.keys() ] )
except:
# not available, so just create an empty set
gammaTables = defaultdict( lambda: None )
# gamma correct the supplied image
def imageGammaCorrection( image, gamma = 1.0, range = ( 0, 255 ) ):
# cache the gamma tables we generate
global gammaTables
# gamma-correct the image (1.0 means no change)
if gamma == 1.0:
# no change
finalImage = image
else:
# check our cache for the gamma table
gammaTable = gammaTables[ gamma ]
if gammaTable is None: # cache miss
# build the table for this one gamma
inverseGamma = 1.0 / gamma
gammaTable = np.array( [
( ( pv / float( range[1] ) ) ** inverseGamma ) * range[1]
for pv in np.arange( range[0], range[1] + 1 )
] ).astype("uint8")
# cache the table for quicker lookups later
gammaTables[ gamma ] = gammaTable
# correct the image gamma using the generated (or cached) gamma table
finalImage = cv2.LUT( image, gammaTable )
return finalImage
In [25]:
# print a frame with darker and lighter gamma
def printFrameGammas( frame, title = None, img_shape = (160,320,3),
filePath = None, randomize = False, randomShadows = None ):
# for this routine, generate three images and error offsets for each
images = [ None ] * 3
captions = [0.] * 3
caption = "gamma={:3.2f}"
# randomize? if not, use the maximum shifts on the left and right, no shift (0.5) in the center
leftGamma, rightGamma = ( round(0.25 + random.random() * 0.75, 1), round(1.0 + random.random() * 2.0, 1) ) \
if randomize else ( 0.25, 2.5 )
# generate shifted images
for i, gamma in enumerate( [ leftGamma, 1.0, rightGamma ] ):
image = imagePool.getOneImage( frame.run(), frame.index(), center ).reshape( img_shape )
if randomShadows and ( randomShadows >= random.random() ):
image = imageRandomShadow( image )
image = imageGammaCorrection( image, gamma )
images[i] = image
captions[i] = caption.format( gamma )
plotImages( images, captions, title, img_shape, filePath )
In [26]:
# print some samples
if False:
for frame in random.sample( allFrames, 20 ):
title = "{}: {}".format( frame.run(), frame.index() )
printFrameGammas( frame, title, randomize = True, randomShadows = 0.25 )
In [27]:
from datetime import datetime
startTime = datetime.now()
framesPerBlock = 10000
if skipBlockChecks:
print( "Skipping block check:", startTime )
blocks = 0
else:
print( "Starting block check:", startTime )
blocks = 30
totalFrames = blocks * framesPerBlock
for block in range( blocks ):
blockTime = datetime.now()
for frame in random.sample( allFrames, framesPerBlock ):
# randomly shift the image left or right (exactly 0.5 is no shift at all)
shift = random.random() # [0.0, 1.0]
flip = random.randint(0,1)
camera = random.choice( cameras )
image, shiftedErrorPixels, newShape = shiftFrameImage( frame, camera = camera, shift = shift, flip = flip )
gamma = round( 0.25 + random.random() * 2.75, 2 )
modImage = imageGammaCorrection( image, gamma )
msg = "Block {} \tTime: {}\tImageShape: {}"
print( msg.format( block, datetime.now() - blockTime, image.shape ) )
print( "Total Time ({} images):".format( totalFrames ), datetime.now() - startTime )
In [28]:
imagePool.count(),imagePool.size()
Out[28]:
In [29]:
# pickle the gammaTables for faster startup later
import pickle
if True:
with open( "gammaTables.p", "wb" ) as gammaFile:
pickle.dump( { gamma: gammaTables[gamma] for gamma in gammaTables.keys() }, gammaFile )
# here's how to read the gammaTables back in
if False:
try:
with open( "gammaTables.p", "rb" ) as gammaFile:
gt = pickle.load( gammaFile )
gammaTables = defaultdict( lambda: None, [ (gamma,gt[gamma]) for gamma in gt.keys() ] )
except:
gammaTables = defaultdict( lambda: None )
print(gammaTables)
In [30]:
# assert(False) # stop here
In [31]:
if False:
from collections import defaultdict
import numpy as np
import sys
def processJoystickEvent( buffer ):
# get the event type, and either the button or axis, depending on the event type
items = np.frombuffer( buffer, dtype = np.uint8, count = 2, offset = 6 )
event = items[0]
buttonOrAxis = items[1]
# get the value of the button or joystick axis
value = np.frombuffer( buffer, dtype = np.int16, count = 1, offset = 4 )[0]
# get the time in milliseconds (since when?) of the event
time = np.frombuffer( buffer, dtype = np.uint32, count = 1, offset = 0 )[0]
return ( event, buttonOrAxis, value, time )
with open( '/dev/input/js0', 'rb' ) as joystick:
dataFrame = []
buffer = np.zeros( shape = (16,), dtype = np.uint8 ) # twice as big as we need (unicode?)
events = 0
while events < 300:
joystick.flush()
for c in joystick.read(1):
buffer[len(dataFrame)] = np.uint8(c)
dataFrame += [c]
if len(dataFrame) >= 8:
event, axis, value, time = processJoystickEvent( buffer[:8] )
if event == 1:
eventType = "[button-pressed] "
axisOrButton = "button"
elif event == 2:
eventType = "[axis-moved] "
axisOrButton = "axis"
elif event == 129:
eventType = "[initial-value] "
axisOrButton = "button"
elif event == 130:
eventType = "[initial-axis] "
axisOrButton = "axis"
else:
eventType = "[unknown{}] ".format( event )
axisOrButton = "device"
sys.stdout.write( "Joystick event " + eventType )
sys.stdout.write( "on " + axisOrButton + str(axis) )
sys.stdout.write( ": value = " + str(value) + " " )
sys.stdout.write( "at time = " + str(time) + "\n" )
sys.stdout.flush()
dataFrame = dataFrame[8:]
events += 1
In [32]:
# build up one dataset
if False:
groups = 20
samples = 1000
size = groups * samples
print( "Building Dataset with {} groups of {} samples".format( groups, samples ) )
X_train = np.zeros( ( size, 80, 220, 3 ) )
y_train = np.zeros( ( size, ) )
index = 0
for group in range( groups ):
print( "- Group", group )
for frame in random.sample( allFrames, samples ):
# randomly shift the image left or right (exactly 0.5 is no shift at all)
shift = random.random() # [0.0, 1.0]
flip = random.randint(0,1)
image, shiftedErrorPixels, newShape = shiftFrameImage( frame, shift = shift, flip = flip )
gamma = round( 0.25 + random.random() * 2.75, 2 )
gammaImage = imageGammaCorrection( image, gamma )
X_train[index] = gammaImage
y_train[index] = shiftedErrorPixels
index += 1
In [33]:
randomShadows = 0.10 # percent of images to add psuedo-shadows
def trainingBatchGenerator_OLDVERSION( batchSize = 128 ):
trainingImages = np.zeros( ( batchSize, 80, 220, 3 ) )
trainingLabels = np.zeros( ( batchSize, 1 ) )
while True: # yield below
index = 0
for frame in random.sample( allFrames, batchSize ):
# randomly shift the image left or right (exactly 0.5 is no shift at all)
shift = random.random() # [0.0, 1.0]
flip = random.randint(0,1)
image, shiftedErrorPixels, newShape = shiftFrameImage( frame, shift = shift, flip = flip )
# occasionally add psuedo-shadows, if requested
if randomShadows and ( randomShadows >= random.random() ):
image = imageRandomShadow( image )
# slightly shift the image gamma darker or brighter
gamma = round( 0.25 + random.random() * 2.75, 2 )
gammaImage = imageGammaCorrection( image, gamma )
# add this one modified image to the training set
trainingImages[index] = gammaImage
trainingLabels[index] = shiftedErrorPixels
index += 1
yield trainingImages, trainingLabels
In [34]:
randomShadows = 0.10 # percent of images to add psuedo-shadows
def trainingBatchGenerator( batchSize = 128 ):
trainingImages = np.zeros( ( batchSize, 80, 220, 3 ) )
trainingLabels = np.zeros( ( batchSize, 1 ) )
bins = list( binnedCounts )
while True: # yield below
for index in range( batchSize ):
# choose evenly between our binned frames, so that the over-represented steering angles
# get chosen only as often as the under-represented ones
bin = random.choice( binnedKeys )
frames = binnedFrames[ bin ]
frame = random.choice( frames )
# randomly shift the image left or right (exactly 0.5 is no shift at all)
shift = random.random() # [0.0, 1.0]
flip = random.randint(0,1)
camera = random.choice( cameras )
image, shiftedErrorPixels, newShape = shiftFrameImage(
frame, camera = camera, shift = shift, flip = flip )
# occasionally add psuedo-shadows, if requested
if randomShadows and ( randomShadows >= random.random() ):
image = imageRandomShadow( image )
# slightly shift the image gamma darker or brighter
gamma = round( 0.25 + random.random() * 2.75, 2 )
gammaImage = imageGammaCorrection( image, gamma )
# add this one modified image to the training set
trainingImages[index] = gammaImage
trainingLabels[index] = shiftedErrorPixels
index += 1
yield trainingImages, trainingLabels
In [38]:
from keras.models import Sequential
from keras.layers import Lambda, Convolution2D, Flatten, Dense, Dropout
from keras.layers.advanced_activations import ELU
from keras.layers.advanced_activations import PReLU
from keras.regularizers import l2 as L2
model = Sequential()
# input layer: normalize pixels from [0,255] to [-1.0,1.0]
model.add( Lambda( lambda x: x/127.5 - 1.0, input_shape = ( 80, 220, 3 ) ) )
# Conv 1: 5 x 5 convolution layer with 32 filters and 2 x 2 stride
model.add( Convolution2D( 32, 5, 5, subsample=(2, 2), border_mode='valid', W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )
# Conv 2: 5 x 5 convolution layer with 48 filters and 2 x 2 stride
model.add( Convolution2D( 48, 5, 5, subsample=(2, 2), border_mode='valid', W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )
# Conv 3: 5 x 5 convolution layer with 64 filters and 2 x 2 stride
model.add( Convolution2D( 64, 5, 5, subsample=(2, 2), border_mode='valid', W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )
# Conv 4: 3 x 3 convolution layer with 128 filters and 1 x 1 stride
model.add( Convolution2D( 128, 3, 3, subsample=(1, 1), border_mode='valid', W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )
# Conv 5: 3 x 3 convolution layer with 128 filters and 1 x 1 stride
model.add( Convolution2D( 128, 3, 3, subsample=(1, 1), border_mode='valid', W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )
# flatten 2D layers to 1D fully-connected layer
model.add( Flatten() )
# FC 1: Fully-Connected layer with 128 nodes, dropout-keep 60% between FC layers
model.add( Dense( 128, W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )
model.add( Dropout( 0.60 ) )
# FC 2: Fully-Connected layer with 64 nodes, dropout-keep 60% between FC layers
model.add( Dense( 64, W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )
model.add( Dropout( 0.60 ) )
# FC 3: Fully-Connected layer with 32 nodes, dropout-keep 60% between FC layers
model.add( Dense( 32, W_regularizer = L2( 0.01 ) ) )
model.add( PReLU( init='zero' ) )
model.add( Dropout( 0.60 ) )
# finally, boil output down to a single inferred error prediction
model.add( Dense( 1 ) )
# too big?
print( "Model Parameters:", model.count_params() )
# compile and fit
model.compile( loss = "mse", optimizer = "adam" )
model.fit_generator( trainingBatchGenerator( batchSize = 200 ),
samples_per_epoch = 50000,
nb_epoch = 20 )
# save it (TODO: randomize filename)
model.save( "model.h5" )
In [ ]:
frame = allFrames[0]
image = imagePool.getOneImage( frame.run(), frame.index(), center )
img_shape = image.shape
print( image.shape )
In [ ]: