In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [2]:
# read in some of the data
df = pd.read_csv('../data/facial keypoints training.csv', nrows=5000)

In [3]:
# check that the image is really 96*96
print(len(np.fromstring(df.Image.iloc[0], dtype=int, sep=' ')))
print('{}, {}'.format(96**2, df.columns.tolist()))


9216
9216, ['left_eye_center_x', 'left_eye_center_y', 'right_eye_center_x', 'right_eye_center_y', 'left_eye_inner_corner_x', 'left_eye_inner_corner_y', 'left_eye_outer_corner_x', 'left_eye_outer_corner_y', 'right_eye_inner_corner_x', 'right_eye_inner_corner_y', 'right_eye_outer_corner_x', 'right_eye_outer_corner_y', 'left_eyebrow_inner_end_x', 'left_eyebrow_inner_end_y', 'left_eyebrow_outer_end_x', 'left_eyebrow_outer_end_y', 'right_eyebrow_inner_end_x', 'right_eyebrow_inner_end_y', 'right_eyebrow_outer_end_x', 'right_eyebrow_outer_end_y', 'nose_tip_x', 'nose_tip_y', 'mouth_left_corner_x', 'mouth_left_corner_y', 'mouth_right_corner_x', 'mouth_right_corner_y', 'mouth_center_top_lip_x', 'mouth_center_top_lip_y', 'mouth_center_bottom_lip_x', 'mouth_center_bottom_lip_y', 'Image']

In [4]:
df = df[['left_eye_center_x', 'left_eye_center_y','Image']]
# for now only train on the left eye center
df = df[df['left_eye_center_x'].notnull() & df['left_eye_center_y'].notnull()]

In [5]:
# check how much data is left of the initial blob
print(df.shape)


(4990, 3)

In [6]:
# round the left eye x, y coordinates to integers (could multiply by powers of ten to get higher position resolution)
df[['left_eye_center_x', 'left_eye_center_y']] = df[['left_eye_center_x', 'left_eye_center_y']].round(0)
print(df[['left_eye_center_x', 'left_eye_center_y']].head())


   left_eye_center_x  left_eye_center_y
0               66.0               39.0
1               64.0               35.0
2               65.0               35.0
3               65.0               37.0
4               67.0               40.0

In [7]:
# encode. must give column names if numeric
left_eye_x = pd.get_dummies(df['left_eye_center_x'], columns=['left_eye_center_x'])
left_eye_y = pd.get_dummies(df['left_eye_center_y'], columns=['left_eye_center_y'])

In [8]:
# convert faces to dataframe
faces = df.Image.apply(lambda x: pd.Series(np.fromstring(x, dtype=np.float32, sep=' ')))

In [9]:
# see the results
print(faces.head())
print(faces.shape)


    0      1      2      3      4      5      6      7      8      9     \
0  238.0  236.0  237.0  238.0  240.0  240.0  239.0  241.0  241.0  243.0   
1  219.0  215.0  204.0  196.0  204.0  211.0  212.0  200.0  180.0  168.0   
2  144.0  142.0  159.0  180.0  188.0  188.0  184.0  180.0  167.0  132.0   
3  193.0  192.0  193.0  194.0  194.0  194.0  193.0  192.0  168.0  111.0   
4  147.0  148.0  160.0  196.0  215.0  214.0  216.0  217.0  219.0  220.0   

   ...    9206  9207  9208  9209  9210  9211  9212   9213   9214   9215  
0  ...    33.0  29.0  30.0  34.0  39.0  49.0  62.0   70.0   75.0   90.0  
1  ...     1.0   1.0   1.0   1.0   1.0   1.0   1.0    1.0    1.0    1.0  
2  ...    64.0  60.0  56.0  61.0  70.0  69.0  71.0   78.0   78.0   77.0  
3  ...     1.0   1.0   1.0   1.0   1.0   1.0   1.0    1.0    1.0    1.0  
4  ...    33.0  34.0  37.0  37.0  43.0  46.0  83.0  140.0  170.0  176.0  

[5 rows x 9216 columns]
(4990, 9216)

In [10]:
# free space
del df['Image']
faces = faces.as_matrix()
left_eye_x = left_eye_x.as_matrix()
print(faces.shape)
print(left_eye_x.shape)


(4990, 9216)
(4990, 48)

In [11]:
# tf doesn't appear to like cross-type operations. keep everything float32
print(type(faces[0,0]))


<class 'numpy.float32'>

In [12]:
num_labels = left_eye_x.shape[1]
image_size = 96

graph = tf.Graph()
with graph.as_default():
    
    # graph
    face = tf.placeholder(tf.float32, [None, image_size**2])
    label = tf.placeholder(tf.float32, [None, 48])
    
    weights = tf.Variable(tf.truncated_normal([image_size*image_size, num_labels]))
    biases = tf.Variable(tf.zeros([num_labels]))
    
    #print('{},{}'.format(type(train_dataset), type(weights)))
    
    # loss
    logits = tf.matmul(face, weights) + biases
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits))
    
    # optimizer
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    
    # predictions
    train_prediction = tf.nn.softmax(logits)

    # saver
    saver = tf.train.Saver(tf.global_variables())

In [13]:
num_steps = 1000
train = 0

with tf.Session(graph=graph) as session:
    
    if train:
        # initialise graph
        tf.global_variables_initializer().run()
        
        # define what you mean by accuracy (to-do: add another measure)
        def accuracy(predictions, labels):
            return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])
        
        
        # optimise the coefficients of the graph using the loss function and optimiser
        for step in range(num_steps):
            _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict={face:faces, label:left_eye_x})
            
            if (step % 50 == 0):
                print('Loss at step {}: {}'.format(step, l))
                print('Training accuracy: %.1f%%' % accuracy(predictions, left_eye_x))
                try:
                    saver.save(session,'../models/left-eye-test-model')
                    print('saved model to left-eye-test-model.meta')
                except Exception as e:
                    print(repr(e))
    else:
        # get result of single classification
        checkpoint = tf.train.get_checkpoint_state('../models/')
        saver.restore(session, checkpoint.model_checkpoint_path)
        predictions = session.run(logits, feed_dict={face:faces[0:], label:left_eye_x[0:]})
        print('predict x-coordinate:\t{}'.format(predictions.argmax(axis=1)))
        print('actual x-coordinate:\t{}'.format(left_eye_x.argmax(axis=1)))


predict x-coordinate:	[30 30 30 ..., 30 32  9]
actual x-coordinate:	[31 29 30 ..., 29 33  9]

In [ ]: