In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import sys
from six.moves import cPickle as pickle
%matplotlib inline

Read the small data


In [2]:
pickle_file = 'mini_train.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    mini_X_0 = save['data']
    mini_outcome = save['outcome']
    del save  # hint to help gc free up memory

In [3]:
#reformat the label
#for each digit, add a 'end_digit' as '10'
#for each label, add a digit size
#each of them is a one-hot coding

def label_reformat(label, max_size = 5):
    digit_size = np.asarray([len(x) for x in label])
    digit_size[digit_size > max_size]= max_size
    digit_size = ((np.arange(max_size)+1) == digit_size[:,None]).astype(np.float32)
    
    digits = {}
    end_digit = 10.0
    for i in range(max_size):
        digit_coding = np.asarray( [x[i] if len(x)>i else end_digit for x in label])
        digit_coding = (np.arange(end_digit+1) == digit_coding[:,None]).astype(np.float32)
        digits['digit_'+ str(i)] = digit_coding
        
    return digit_size, digits

extract the localization box


In [4]:
def one_boxes(metadata):
    box={}
    box['left']=[]
    box['right']=[]
    box['top']=[]
    box['bottom']=[]
    
    left = metadata['left']
    top = metadata['top']
    width = metadata['width']
    height = metadata['height']
    
    for i in xrange(len(left)):
        tmp_left = np.asarray(left[i])
        tmp_top = np.asarray(top[i])
        tmp_width = np.asarray(width[i])
        tmp_height = np.asarray(height[i])
        tmp_right = tmp_left + tmp_width
        tmp_bottom = tmp_top + tmp_height
        box['left'].append(np.min(tmp_left))
        box['right'].append(np.max(tmp_right))
        box['top'].append(np.min(tmp_top))
        box['bottom'].append(np.max(tmp_bottom))
        
    box['left'] = np.asarray(box['left'])
    box['right']= np.asarray(box['right'])
    box['top']=np.asarray(box['top'])
    box['bottom']=np.asarray(box['bottom'])
    
    return np.stack((box['left'],box['right'],box['top'],box['bottom']),
                          axis=-1 )

Sample a smaller data set


In [5]:
label = mini_outcome['label'][:100]
digit_size, digits = label_reformat(label)
mini_X = mini_X_0[:100]

#make it scale between 0 and 1
digits_box = one_boxes(mini_outcome)/mini_X.shape[1]
digits_box = digits_box[:100]

In [6]:
print digit_size.shape
print digits['digit_0'].shape
print mini_X.shape
print digits_box.shape


(100, 5)
(100, 11)
(100, 64, 64, 3)
(100, 4)

Start a tensorflow session


In [7]:
sess = tf.InteractiveSession()

In [8]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [9]:
image_size = mini_X.shape[1]
num_channels = mini_X.shape[3]
batch_size = 20

x_image = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))

y_d1 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d2 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d3 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d4 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d5 = tf.placeholder(tf.float32, shape=(batch_size, 11))

y_dsize = tf.placeholder(tf.float32, shape=(batch_size, 5))

y_box = tf.placeholder(tf.float32, shape=(batch_size, 4))

In [10]:
def next_batch(X, y_dsize, y_ds, y_box, batch_size=50):
    idx = np.random.choice(X.shape[0],batch_size)
    batch_x = X[idx,:,:,:]
    batch_y_dsize = y_dsize[idx,:]
    batch_y_d1 = y_ds['digit_0'][idx,:]
    batch_y_d2 = y_ds['digit_1'][idx,:]
    batch_y_d3 = y_ds['digit_2'][idx,:]
    batch_y_d4 = y_ds['digit_3'][idx,:]
    batch_y_d5 = y_ds['digit_4'][idx,:]
    
    batch_y_box = y_box[idx,:]
    
    
    return ( batch_x, batch_y_dsize,
            batch_y_d1, batch_y_d2, 
            batch_y_d3, batch_y_d4, batch_y_d5,
            batch_y_box)

Construct CNN


In [11]:
W_conv1 = weight_variable([5, 5, num_channels, 32])
b_conv1 = bias_variable([32])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

In [12]:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

In [13]:
W_fc1 = weight_variable([16 * 16 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 16*16*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [14]:
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

In [15]:
#first digit
W_fc2_d1 = weight_variable([1024, 11])
b_fc2_d1 = bias_variable([11])

y_conv_d1 = tf.matmul(h_fc1_drop, W_fc2_d1) + b_fc2_d1

#second digit
W_fc2_d2 = weight_variable([1024, 11])
b_fc2_d2 = bias_variable([11])

y_conv_d2 = tf.matmul(h_fc1_drop, W_fc2_d2) + b_fc2_d2

#third digit
W_fc2_d3 = weight_variable([1024, 11])
b_fc2_d3 = bias_variable([11])

y_conv_d3 = tf.matmul(h_fc1_drop, W_fc2_d3) + b_fc2_d3

#fourth digit
W_fc2_d4 = weight_variable([1024, 11])
b_fc2_d4 = bias_variable([11])

y_conv_d4 = tf.matmul(h_fc1_drop, W_fc2_d4) + b_fc2_d4

#fifth digit
W_fc2_d5 = weight_variable([1024, 11])
b_fc2_d5 = bias_variable([11])

y_conv_d5 = tf.matmul(h_fc1_drop, W_fc2_d5) + b_fc2_d5

#digit size
W_fc2_dsize = weight_variable([1024, 5])
b_fc2_dsize = bias_variable([5])

y_conv_dsize = tf.matmul(h_fc1_drop, W_fc2_dsize) + b_fc2_dsize

##digit box
W_fc2_dbox = weight_variable([1024, 4])
b_fc2_dbox = bias_variable([4])

y_conv_dbox = tf.matmul(h_fc1_drop, W_fc2_dbox) + b_fc2_dbox

In [16]:
cross_entropy_and_l2 = ( 
    tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d1, y_d1)) 
    + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d2, y_d2))
    + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d3, y_d3))
    + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d4, y_d4))
    + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d5, y_d5))
    + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_dsize, y_dsize))
    + tf.reduce_mean((y_conv_dbox-y_box)**2) + tf.nn.l2_loss(W_fc2_dbox)*0.0001
)

train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy_and_l2)

In [17]:
#let's just check the first digit
correct_prediction = tf.equal(tf.argmax(y_conv_d1,1), tf.argmax(y_d1,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Train model on a small data, see if overfit

if overfit, good; if not, check bugs.


In [18]:
sess.run(tf.initialize_all_variables())
for i in range(1000):
    ( batch_x, batch_y_dsize,batch_y_d1, 
     batch_y_d2, batch_y_d3, batch_y_d4, batch_y_d5,
     batch_y_box) = next_batch(mini_X, digit_size, 
                                               digits, digits_box, batch_size)
    if i%10 == 0:
        train_accuracy = accuracy.eval(feed_dict={
                x_image: batch_x, y_dsize: batch_y_dsize,
                y_d1: batch_y_d1, y_d2: batch_y_d2, y_d3: batch_y_d3,
                y_d4: batch_y_d4, y_d5: batch_y_d5, y_box: batch_y_box,
                keep_prob: 1.0})
        print("step %d, training accuracy %g"%(i, train_accuracy))
    train_step.run(feed_dict={
            x_image: batch_x, y_dsize: batch_y_dsize,
            y_d1: batch_y_d1, y_d2: batch_y_d2, y_d3: batch_y_d3,
            y_d4: batch_y_d4, y_d5: batch_y_d5, y_box: batch_y_box,
            keep_prob: 0.5})


step 0, training accuracy 0.1
step 10, training accuracy 0.2
step 20, training accuracy 0.35
step 30, training accuracy 0.25
step 40, training accuracy 0.45
step 50, training accuracy 0.4
step 60, training accuracy 0.3
step 70, training accuracy 0.25
step 80, training accuracy 0.25
step 90, training accuracy 0.35
step 100, training accuracy 0.35
step 110, training accuracy 0.55
step 120, training accuracy 0.4
step 130, training accuracy 0.3
step 140, training accuracy 0.45
step 150, training accuracy 0.5
step 160, training accuracy 0.4
step 170, training accuracy 0.45
step 180, training accuracy 0.65
step 190, training accuracy 0.6
step 200, training accuracy 0.7
step 210, training accuracy 0.5
step 220, training accuracy 0.5
step 230, training accuracy 0.45
step 240, training accuracy 0.7
step 250, training accuracy 0.9
step 260, training accuracy 0.8
step 270, training accuracy 0.4
step 280, training accuracy 0.7
step 290, training accuracy 0.85
step 300, training accuracy 0.9
step 310, training accuracy 0.85
step 320, training accuracy 0.75
step 330, training accuracy 0.75
step 340, training accuracy 0.9
step 350, training accuracy 0.9
step 360, training accuracy 0.85
step 370, training accuracy 0.8
step 380, training accuracy 0.65
step 390, training accuracy 0.8
step 400, training accuracy 0.85
step 410, training accuracy 0.7
step 420, training accuracy 1
step 430, training accuracy 0.8
step 440, training accuracy 0.8
step 450, training accuracy 1
step 460, training accuracy 0.7
step 470, training accuracy 1
step 480, training accuracy 0.85
step 490, training accuracy 0.95
step 500, training accuracy 0.9
step 510, training accuracy 0.9
step 520, training accuracy 0.9
step 530, training accuracy 0.9
step 540, training accuracy 0.95
step 550, training accuracy 1
step 560, training accuracy 1
step 570, training accuracy 0.95
step 580, training accuracy 0.95
step 590, training accuracy 0.95
step 600, training accuracy 0.85
step 610, training accuracy 1
step 620, training accuracy 0.95
step 630, training accuracy 1
step 640, training accuracy 0.95
step 650, training accuracy 0.85
step 660, training accuracy 0.9
step 670, training accuracy 0.8
step 680, training accuracy 1
step 690, training accuracy 0.95
step 700, training accuracy 1
step 710, training accuracy 1
step 720, training accuracy 1
step 730, training accuracy 1
step 740, training accuracy 1
step 750, training accuracy 1
step 760, training accuracy 0.95
step 770, training accuracy 1
step 780, training accuracy 1
step 790, training accuracy 1
step 800, training accuracy 0.95
step 810, training accuracy 0.95
step 820, training accuracy 0.95
step 830, training accuracy 1
step 840, training accuracy 1
step 850, training accuracy 1
step 860, training accuracy 1
step 870, training accuracy 1
step 880, training accuracy 1
step 890, training accuracy 1
step 900, training accuracy 1
step 910, training accuracy 1
step 920, training accuracy 1
step 930, training accuracy 1
step 940, training accuracy 0.95
step 950, training accuracy 1
step 960, training accuracy 1
step 970, training accuracy 1
step 980, training accuracy 1
step 990, training accuracy 1

In [ ]: