Emotion Detection Neural Network Training

Daniel Dittenhafer

This notebook is the primary driver of training for my emotion detection neural network.

Import References

Here we import the needed python APIs and setup/config some Theano config.

import datetime
import Image
import gc
import numpy as np
import os
import random
from scipy import misc
import string
import time

# Set some Theano config before initializing
os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=cpu,floatX=float32,allow_gc=False,openmp=True"
import theano

import matplotlib
from matplotlib import pyplot as plt

import emotion_model
import dwdii_transforms


Using Theano backend.

print "device:", theano.config.device
print "floatX:",  theano.config.floatX
print "mode:", theano.config.mode
print "openmp:", theano.config.openmp
print "allow_gc:", theano.config.allow_gc

device: cpu
floatX: float32
mode: FAST_RUN
openmp: True
allow_gc: False

imagePath = "/root/facial_expressions/images"
dataPath = "/root/facial_expressions/data/legend.csv"
imgResize = (150, 150)

['legend.csv', '500_picts_satz.csv']

Load Training and Test Data

In this section, the training/validation data is loaded. The load_data function pre-balances the data set by removing images from over-represented emotion classes.

maxData = 1845
X_data, Y_data = dwdii_transforms.load_data(dataPath, imagePath, maxData = maxData, verboseFreq = 200, imgResize=imgResize)
print X_data.shape
print Y_data.shape

In this section, we will apply transformations to the existing images to increase of training data, as well as add a bit of noise in the hopes of improving the overall training activities.

imgDataGenCount = 12
transformCount = 9 + imgDataGenCount

newImgs = np.zeros([X_data.shape[0] * transformCount, X_data.shape[1], X_data.shape[2]])
newYs = np.zeros([Y_data.shape[0] * transformCount, Y_data.shape[1]], dtype=np.int8)
print newImgs.shape
print newYs.shape

(38157, 150, 150)
(38157, 1)

img = X_data[0]

(150, 150)

ndx = 0
for i in range(X_data.shape[0]):
    img = X_data[i]
    img0 = dwdii_transforms.reflectY(img)
    newImgs[ndx] = img0
    newYs[ndx] = Y_data[i]
    #misc.imsave("test0.png", img0)
    ndx += 1
    img1 = dwdii_transforms.cvDilate(img)
    newImgs[ndx] = img1
    newYs[ndx] = Y_data[i]
    #misc.imsave("test1.png", img1)
    ndx += 1
    img2 = dwdii_transforms.cvErode(img)
    newImgs[ndx] = img2
    newYs[ndx] = Y_data[i]
    #misc.imsave("test2.png", img2)
    ndx += 1
    img3 = dwdii_transforms.cvDilate2(img)
    newImgs[ndx] = img3
    newYs[ndx] = Y_data[i]
    #misc.imsave("test3.png", img3)
    ndx += 1    
    #img4 = dwdii_transforms.cvMedianBlur(img)
    #newImgs[ndx] = img4
    #newYs[ndx] = Y_data[i]
    #misc.imsave("test4.png", img4)
    #ndx += 1      
    img5 = dwdii_transforms.cvExcessiveSharpening(img)
    newImgs[ndx] = img5
    newYs[ndx] = Y_data[i]
    #misc.imsave("test5.png", img5)
    ndx += 1    
    img6 = dwdii_transforms.cvEdgeEnhancement(img)
    newImgs[ndx] = img6
    newYs[ndx] = Y_data[i]
    #misc.imsave("test6.png", img6)
    ndx += 1    

    img7 = dwdii_transforms.cvBlurMotion1(img)
    newImgs[ndx] = img7
    newYs[ndx] = Y_data[i]
    #misc.imsave("test7.png", img7)
    ndx += 1    
    img8 = dwdii_transforms.cvBlurMotion2(img)
    newImgs[ndx] = img8
    newYs[ndx] = Y_data[i]
    #misc.imsave("test8.png", img8)
    ndx += 1      

    img9 = dwdii_transforms.reflectY(img)
    newImgs[ndx] = img9
    #print img9.shape
    newYs[ndx] = Y_data[i]
    #misc.imsave("test9.png", img9)
    ndx += 1      
    for n in range(imgDataGenCount):
        imgX = emotion_model.imageDataGenTransform(img, Y_data[i])
        #print imgX
        #print imgX.shape
        imgX = imgX.reshape(150, 150)
        #print imgX.shape
        newImgs[ndx] = imgX
        newYs[ndx] = Y_data[i]
        #misc.imsave("testX.png", imgX)
        ndx += 1

print("Done", str(datetime.datetime.now()))

('Done', '2016-12-17 05:03:51.548579')

import numpy
print numpy.version.version
print numpy.__version__


X_data2 = np.concatenate((X_data, newImgs))
Y_data2 = np.concatenate((Y_data, newYs))
print X_data2.shape
print Y_data2.shape

(39974, 150, 150)
(39974, 1)

Split Training/Test Sets

The following code segment splits the data into training and test data sets. Currently this is a standard 80/20 split for training and test respectively after performing a random shuffle using the unison_shuffled_copies help method.

skippedTransforms = False
if skippedTransforms:
    X_data2 = X_data
    Y_data2 = Y_data

In [16]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

# First shuffle the data 
X_data2, Y_data2 = unison_shuffled_copies(X_data2, Y_data2)

# Split the data into Training and Test sets
trainNdx = int(X_data2.shape[0] * .8)
print trainNdx
X_train, X_test = np.split(X_data2, [trainNdx])
Y_train, Y_test = np.split(Y_data2, [trainNdx])
print X_train.shape
print X_test.shape

print Y_train.shape
print Y_test.shape

(31979, 150, 150)
(7995, 150, 150)
(31979, 1)
(7995, 1)

Define the Model

In this section, we define the model. The emotion_model module contains the model definition itself. emotion_model_v1 is a basic convolutional neural network while our final model is a variation on a model shared by James Hamski (jh).

The model is trained on 1 of 8 emotions.

# Map the emotions to integers for categorization later.
emotions = dwdii_transforms.emotionNumerics()
print emotions
print len(emotions)

{'sadness': 6, 'neutral': 2, 'contempt': 7, 'disgust': 1, 'anger': 0, 'surprise': 4, 'fear': 5, 'happiness': 3}

Our model is a convolutional neural network with 4 hidden layers.

#model = emotion_model.emotion_model_v3_2(len(emotions), verbose=True)
model = emotion_model.emotion_model_jh_v5(len(emotions), verbose=True, 

Layer (type)                       Output Shape        Param #     Connected to                     
convolution2d_1 (Convolution2D)    (None, 32, 143, 143)2080        convolution2d_input_1[0][0]      
activation_1 (Activation)          (None, 32, 143, 143)0           convolution2d_1[0][0]            
maxpooling2d_1 (MaxPooling2D)      (None, 32, 71, 71)  0           activation_1[0][0]               
convolution2d_2 (Convolution2D)    (None, 32, 67, 67)  25632       maxpooling2d_1[0][0]             
activation_2 (Activation)          (None, 32, 67, 67)  0           convolution2d_2[0][0]            
maxpooling2d_2 (MaxPooling2D)      (None, 32, 33, 33)  0           activation_2[0][0]               
convolution2d_3 (Convolution2D)    (None, 64, 31, 31)  18496       maxpooling2d_2[0][0]             
activation_3 (Activation)          (None, 64, 31, 31)  0           convolution2d_3[0][0]            
maxpooling2d_3 (MaxPooling2D)      (None, 64, 15, 15)  0           activation_3[0][0]               
convolution2d_4 (Convolution2D)    (None, 64, 14, 14)  16448       maxpooling2d_3[0][0]             
activation_4 (Activation)          (None, 64, 14, 14)  0           convolution2d_4[0][0]            
maxpooling2d_4 (MaxPooling2D)      (None, 64, 7, 7)    0           activation_4[0][0]               
flatten_1 (Flatten)                (None, 3136)        0           maxpooling2d_4[0][0]             
dense_1 (Dense)                    (None, 64)          200768      flatten_1[0][0]                  
activation_5 (Activation)          (None, 64)          0           dense_1[0][0]                    
dense_2 (Dense)                    (None, 8)           520         activation_5[0][0]               
activation_6 (Activation)          (None, 8)           0           dense_2[0][0]                    
Total params: 263944

Training the Model

The following code segment trains the model using the run_network helper function. Previously, I was hitting a memory issue (my interpretation), when I have batches above a certain threshold. Batches=10 work fine, but batches of 100 are too big. May need to allocate more RAM to the docker container. I have since moved to a Docker Cloud / Amazon Web Services instance with increased memory and this issue has been mitigated.

# Reshape to the appropriate shape for the CNN input
testX = X_test.reshape(X_test.shape[0], 1, X_train.shape[1],X_train.shape[2])
trainX = X_train.reshape(X_train.shape[0], 1, X_train.shape[1],X_train.shape[2])

loadWeights = False
if loadWeights:

print "Training start: " + str(datetime.datetime.now())
m, h = emotion_model.run_network([trainX, testX, Y_train, Y_test], model, batch=200, epochs=30, verbosity=1)

Training start: 2016-12-17 05:15:40.014472
(31979, 8)
(7995, 8)
Training model...
Train on 31979 samples, validate on 7995 samples
Epoch 1/30
31979/31979 [==============================] - 2431s - loss: 0.2908 - acc: 0.4111 - val_loss: 0.2697 - val_acc: 0.4642
Epoch 2/30
31979/31979 [==============================] - 2528s - loss: 0.2358 - acc: 0.5729 - val_loss: 0.2249 - val_acc: 0.5961
Epoch 3/30
31979/31979 [==============================] - 2639s - loss: 0.2035 - acc: 0.6421 - val_loss: 0.1873 - val_acc: 0.6782
Epoch 4/30
31979/31979 [==============================] - 2600s - loss: 0.1780 - acc: 0.6914 - val_loss: 0.1790 - val_acc: 0.6938
Epoch 5/30
31979/31979 [==============================] - 3354s - loss: 0.1554 - acc: 0.7350 - val_loss: 0.1569 - val_acc: 0.7335
Epoch 6/30
31979/31979 [==============================] - 12169s - loss: 0.1338 - acc: 0.7736 - val_loss: 0.1729 - val_acc: 0.6887
Epoch 7/30
31979/31979 [==============================] - 12201s - loss: 0.1166 - acc: 0.8038 - val_loss: 0.1350 - val_acc: 0.7765
Epoch 8/30
31979/31979 [==============================] - 12299s - loss: 0.0995 - acc: 0.8360 - val_loss: 0.1340 - val_acc: 0.7864
Epoch 9/30
11200/31979 [=========>....................] - ETA: 7025s - loss: 0.0829 - acc: 0.8669
model.save_weights("dwdii-emo-150-jhv5-21tf-30e-Cloud.hdf5", overwrite=True)

Precision & Recall

In this section we compute Precision and Recall metrics for each of the emotion classes.

predictOutput = model.predict(testX)

import collections
prMetrics = {}

# For each emotion
for e in emotions.keys():
    prMetrics[e] = collections.defaultdict(int)   
print prMetrics

numEmo = dwdii_transforms.numericEmotions()
print numEmo

# For each predicted image
for i in range(len(predictOutput)):

    arPred = np.array(predictOutput[i])
    predictionProb = arPred.max()
    predictionNdx = arPred.argmax()
    predictedEmo = numEmo[predictionNdx]

    # True Positives
    if predictionNdx == Y_test[i]:
        prMetrics[predictedEmo]["TruePos"] += 1.0
    # False Positives
        prMetrics[predictedEmo]["FalsePos"] += 1.0
# Look for false negatives
for i in range(len(Y_test)):
    arPred = np.array(predictOutput[i])
    predictionProb = arPred.max()
    predictionNdx = arPred.argmax()
    predictedEmo = numEmo[predictionNdx]
    yEmo = numEmo[int(Y_test[i])]
    if Y_test[i] == predictionNdx:
        # Ok
        prMetrics[yEmo]["FalseNeg"] += 1.0

emotionPrecision = {}
emotionRecall = {}
for p in prMetrics:
    emotionPrecision[p] = prMetrics[p]["TruePos"] / ( prMetrics[p]["TruePos"] + prMetrics[p]["FalsePos"])
    emotionRecall[p] = prMetrics[p]["TruePos"] /( prMetrics[p]["TruePos"] + prMetrics[p]["FalseNeg"])
print "Precision by Emotion"
print "--------------------"
for e in emotionPrecision:
    print e, emotionPrecision[e]
print "Recall by Emotion"
print "--------------------"
for e in emotionRecall:
    print e, emotionRecall[e]

