In [1]:
    
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import sys
from six.moves import cPickle as pickle
%matplotlib inline
    
In [2]:
    
pickle_file = 'train.pickle'
with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_X_0 = save['data']
    train_outcome = save['outcome']
    del save  # hint to help gc free up memory
    
In [3]:
    
#reformat the label
#for each digit, add a 'end_digit' as '10'
#for each label, add a digit size
#each of them is a one-hot coding
def label_reformat(label, max_size = 5):
    digit_size = np.asarray([len(x) for x in label])
    digit_size[digit_size > max_size]= max_size
    digit_size = ((np.arange(max_size)+1) == digit_size[:,None]).astype(np.float32)
    
    digits = {}
    end_digit = 10.0
    for i in range(max_size):
        digit_coding = np.asarray( [x[i] if len(x)>i else end_digit for x in label])
        digit_coding = (np.arange(end_digit+1) == digit_coding[:,None]).astype(np.float32)
        digits['digit_'+ str(i)] = digit_coding
        
    return digit_size, digits
    
In [29]:
    
image_size = train_X_0.shape[1]
num_channels = train_X_0.shape[3]
batch_size = 20
val_size = 40
test_size = 50
train_label = train_outcome['label'][:5000]
train_digit_size, train_digits = label_reformat(train_label)
train_X = train_X_0[:5000]
val_label = train_outcome['label'][6200:6300]
val_digit_size, val_digits = label_reformat(val_label)
val_X = train_X_0[6200:6300]
val_size = val_X.shape[0]
    
In [30]:
    
print train_digit_size.shape
print train_digits['digit_0'].shape
print train_X.shape
    
    
In [31]:
    
sess = tf.InteractiveSession()
    
In [32]:
    
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    
In [33]:
    
x_image = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
y_d1 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d2 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d3 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d4 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d5 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_dsize = tf.placeholder(tf.float32, shape=(batch_size, 5))
    
In [34]:
    
val_x_image = tf.placeholder(tf.float32, shape=(val_size, image_size, image_size, num_channels))
val_y_d1 = tf.placeholder(tf.float32, shape=(val_size, 11))
val_y_d2 = tf.placeholder(tf.float32, shape=(val_size, 11))
val_y_d3 = tf.placeholder(tf.float32, shape=(val_size, 11))
val_y_d4 = tf.placeholder(tf.float32, shape=(val_size, 11))
val_y_d5 = tf.placeholder(tf.float32, shape=(val_size, 11))
val_y_dsize = tf.placeholder(tf.float32, shape=(val_size, 5))
test_x_image = tf.placeholder(tf.float32, shape=(test_size, image_size, image_size, num_channels))
test_y_d1 = tf.placeholder(tf.float32, shape=(test_size, 11))
test_y_d2 = tf.placeholder(tf.float32, shape=(test_size, 11))
test_y_d3 = tf.placeholder(tf.float32, shape=(test_size, 11))
test_y_d4 = tf.placeholder(tf.float32, shape=(test_size, 11))
test_y_d5 = tf.placeholder(tf.float32, shape=(test_size, 11))
test_y_dsize = tf.placeholder(tf.float32, shape=(test_size, 5))
    
In [35]:
    
def next_batch(X, y_dsize, y_ds, batch_size=50, replace = True):
    idx = np.random.choice(X.shape[0],batch_size, replace = replace)
    batch_x = X[idx,:,:,:]
    batch_y_dsize = y_dsize[idx,:]
    batch_y_d1 = y_ds['digit_0'][idx,:]
    batch_y_d2 = y_ds['digit_1'][idx,:]
    batch_y_d3 = y_ds['digit_2'][idx,:]
    batch_y_d4 = y_ds['digit_3'][idx,:]
    batch_y_d5 = y_ds['digit_4'][idx,:]
    
    return batch_x, batch_y_dsize, batch_y_d1, batch_y_d2, batch_y_d3, batch_y_d4, batch_y_d5
    
In [36]:
    
W_conv1 = weight_variable([5, 5, num_channels, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
    
In [37]:
    
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
    
In [38]:
    
W_fc1 = weight_variable([16 * 16 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 16*16*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    
Drop out layer
In [39]:
    
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    
Fully connected layers several different softmax header, for different digits and digit size
In [40]:
    
#first digit
W_fc2_d1 = weight_variable([1024, 11])
b_fc2_d1 = bias_variable([11])
y_conv_d1 = tf.matmul(h_fc1_drop, W_fc2_d1) + b_fc2_d1
#second digit
W_fc2_d2 = weight_variable([1024, 11])
b_fc2_d2 = bias_variable([11])
y_conv_d2 = tf.matmul(h_fc1_drop, W_fc2_d2) + b_fc2_d2
#third digit
W_fc2_d3 = weight_variable([1024, 11])
b_fc2_d3 = bias_variable([11])
y_conv_d3 = tf.matmul(h_fc1_drop, W_fc2_d3) + b_fc2_d3
#fourth digit
W_fc2_d4 = weight_variable([1024, 11])
b_fc2_d4 = bias_variable([11])
y_conv_d4 = tf.matmul(h_fc1_drop, W_fc2_d4) + b_fc2_d4
#fifth digit
W_fc2_d5 = weight_variable([1024, 11])
b_fc2_d5 = bias_variable([11])
y_conv_d5 = tf.matmul(h_fc1_drop, W_fc2_d5) + b_fc2_d5
#digit size
W_fc2_dsize = weight_variable([1024, 5])
b_fc2_dsize = bias_variable([5])
y_conv_dsize = tf.matmul(h_fc1_drop, W_fc2_dsize) + b_fc2_dsize
    
In [41]:
    
cross_entropy = ( tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d1, y_d1)) 
                 + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d2, y_d2))
                 + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d3, y_d3))
                 + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d4, y_d4))
                 + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d5, y_d5))
                 + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_dsize, y_dsize))
                 )
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    
In [42]:
    
#let's just check the first digit
correct_prediction = tf.equal(tf.argmax(y_conv_d1,1), tf.argmax(y_d1,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
In [43]:
    
sess.run(tf.initialize_all_variables())
for i in range(1000):
    (batch_x, batch_y_dsize, 
     batch_y_d1, batch_y_d2, 
     batch_y_d3, batch_y_d4, batch_y_d5) = next_batch(train_X, 
                                                      train_digit_size, 
                                                      train_digits, batch_size)
        
    train_step.run(feed_dict={
            x_image: batch_x, y_dsize: batch_y_dsize,
            y_d1: batch_y_d1, y_d2: batch_y_d2, y_d3: batch_y_d3,
            y_d4: batch_y_d4, y_d5: batch_y_d5,
            keep_prob: 0.5})
    
    if i%10 == 0:
        train_accuracy = accuracy.eval(feed_dict={
                x_image: batch_x, y_dsize: batch_y_dsize,
                y_d1: batch_y_d1, y_d2: batch_y_d2, y_d3: batch_y_d3,
                y_d4: batch_y_d4, y_d5: batch_y_d5,
                keep_prob: 1.0})
        print("step %d, training accuracy %g"%(i, train_accuracy))
        
    if i%40 ==0:
        (batch_x, batch_y_dsize, 
         batch_y_d1, batch_y_d2, 
         batch_y_d3, batch_y_d4, batch_y_d5) = next_batch(val_X, 
                                                          val_digit_size, 
                                                          val_digits, batch_size)
        val_accuracy = accuracy.eval(feed_dict={
                    x_image: batch_x, y_dsize: batch_y_dsize,
                    y_d1: batch_y_d1, y_d2: batch_y_d2, y_d3: batch_y_d3,
                    y_d4: batch_y_d4, y_d5: batch_y_d5,
                    keep_prob: 1.0})
        print("step %d, val accuracy %g"%(i, val_accuracy))
    
    
In [ ]: