In [18]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import sys
from six.moves import cPickle as pickle
%matplotlib inline

Read the training data


In [19]:
#pickle_file = 'train.pickle'
'''
with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_X_1 = save['data']
    train_outcome_1 = save['outcome']
    del save  # hint to help gc free up memory
'''
    
pickle_file = 'train2.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_X_0 = save['data']
    train_outcome_0 = save['outcome']
    del save  # hint to help gc free up memory
    
'''
pickle_file = 'test.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    test_X_1 = save['data']
    test_outcome_1 = save['outcome']
    del save  # hint to help gc free up memory
    
'''

pickle_file = 'test2.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    test_X_0 = save['data']
    test_outcome_0 = save['outcome']
    del save  # hint to help gc free up memory

In [20]:
#reformat the label
#for each digit, add a 'end_digit' as '10'
#for each label, add a digit size
#each of them is a one-hot coding

def label_reformat(label, max_size = 5):
    digit_size = np.asarray([len(x) for x in label])
    digit_size[digit_size > max_size]= max_size
    digit_size = ((np.arange(max_size)+1) == digit_size[:,None]).astype(np.float32)
    
    digits = {}
    end_digit = 10.0
    for i in range(max_size):
        digit_coding = np.asarray( [x[i] if len(x)>i else end_digit for x in label])
        digit_coding = (np.arange(end_digit+1) == digit_coding[:,None]).astype(np.float32)
        digits['digit_'+ str(i)] = digit_coding
        
    return digit_size, digits

sample a smaller data


In [21]:
#train_X_0 = np.vstack((train_X_1 ,train_X_2 ))

In [22]:
train_X_0.shape


Out[22]:
(33402, 64, 64, 3)

In [23]:
#train_X_0 = np.vstack((train_X_1 ,train_X_2 ))

image_size = train_X_0.shape[1]
num_channels = train_X_0.shape[3]
batch_size = 100
val_size = 50
test_size = 50


#train_label = train_outcome_1['label'] +  train_outcome_2['label']
train_label = train_outcome_0['label'][:400]
train_digit_size, train_digits = label_reformat(train_label)
train_X = train_X_0[:400]


val_label = test_outcome_0['label']
val_digit_size, val_digits = label_reformat(val_label)
val_X = test_X_0

val_size = val_X.shape[0]

In [24]:
print train_digit_size.shape
print train_digits['digit_0'].shape
print train_X.shape


(400, 5)
(400, 11)
(400, 64, 64, 3)

In [25]:
plt.imshow(train_X[0,:,:,:])
plt.show()
print train_digits['digit_0'][0]
print train_digits['digit_1'][0]


[ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]

In [26]:
plt.imshow(val_X[1,:,:,:])
plt.show()
print val_digits['digit_0'][1]
print val_digits['digit_1'][1]


[ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]

start tensorflow session


In [27]:
def next_batch(X, y_dsize, y_ds, batch_size=50, replace = True):
    idx = np.random.choice(X.shape[0],batch_size, replace = replace)
    batch_x = X[idx,:,:,:]
    batch_y_dsize = y_dsize[idx,:]
    batch_y_d1 = y_ds['digit_0'][idx,:]
    batch_y_d2 = y_ds['digit_1'][idx,:]
    batch_y_d3 = y_ds['digit_2'][idx,:]
    batch_y_d4 = y_ds['digit_3'][idx,:]
    batch_y_d5 = y_ds['digit_4'][idx,:]
    
    return batch_x, batch_y_dsize, batch_y_d1, batch_y_d2, batch_y_d3, batch_y_d4, batch_y_d5

In [28]:
reg = 1e-4

graph = tf.Graph()
with graph.as_default():
    
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.01)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(1.0, shape=shape)
        return tf.Variable(initial)

    def conv2d(x, W):
        conv = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
        return conv

    def max_pool_2x2(x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    
    def max_pool_2x2_same(x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME')
    
    x_image = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))

    y_d1 = tf.placeholder(tf.float32, shape=(batch_size, 11))
    y_d2 = tf.placeholder(tf.float32, shape=(batch_size, 11))
    y_d3 = tf.placeholder(tf.float32, shape=(batch_size, 11))
    y_d4 = tf.placeholder(tf.float32, shape=(batch_size, 11))
    y_d5 = tf.placeholder(tf.float32, shape=(batch_size, 11))

    y_dsize = tf.placeholder(tf.float32, shape=(batch_size, 5))
    
    val_x_image = tf.placeholder(tf.float32, shape=(val_size, image_size, image_size, num_channels))

    val_y_d1 = tf.placeholder(tf.float32, shape=(val_size, 11))
    val_y_d2 = tf.placeholder(tf.float32, shape=(val_size, 11))
    val_y_d3 = tf.placeholder(tf.float32, shape=(val_size, 11))
    val_y_d4 = tf.placeholder(tf.float32, shape=(val_size, 11))
    val_y_d5 = tf.placeholder(tf.float32, shape=(val_size, 11))

    val_y_dsize = tf.placeholder(tf.float32, shape=(val_size, 5))

    test_x_image = tf.placeholder(tf.float32, shape=(test_size, image_size, image_size, num_channels))

    test_y_d1 = tf.placeholder(tf.float32, shape=(test_size, 11))
    test_y_d2 = tf.placeholder(tf.float32, shape=(test_size, 11))
    test_y_d3 = tf.placeholder(tf.float32, shape=(test_size, 11))
    test_y_d4 = tf.placeholder(tf.float32, shape=(test_size, 11))
    test_y_d5 = tf.placeholder(tf.float32, shape=(test_size, 11))

    test_y_dsize = tf.placeholder(tf.float32, shape=(test_size, 5))
    
    
    W_conv1 = weight_variable([5, 5, num_channels, 32])
    b_conv1 = bias_variable([32])

    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    
    
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])

    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2_same(h_conv2)
    
    #W_fc1 = weight_variable([16 * 16 * 64, 1024])
    #b_fc1 = bias_variable([1024])

    #h_pool2_flat = tf.reshape(h_pool2, [-1, 16*16*64])
    #h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    
    W_conv3 = weight_variable([5, 5, 64, 128])
    b_conv3 = bias_variable([128])

    h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
    h_pool3 = max_pool_2x2(h_conv3)
    
        
    W_conv4 = weight_variable([5, 5, 128, 160])
    b_conv4 = bias_variable([160])

    h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4)
    h_pool4 = max_pool_2x2_same(h_conv4)
    
    
    W_conv5 = weight_variable([5, 5, 160, 180])
    b_conv5 = bias_variable([180])

    h_conv5 = tf.nn.relu(conv2d(h_pool4, W_conv5) + b_conv5)
    h_pool5 = max_pool_2x2_same(h_conv5)
    
    
    W_conv6 = weight_variable([5, 5, 180, 180])
    b_conv6 = bias_variable([180])

    h_conv6 = tf.nn.relu(conv2d(h_pool5, W_conv6) + b_conv6)
    h_pool6 = max_pool_2x2_same(h_conv6)
    
    

    W_fc1 = weight_variable([16 * 16 * 180, 1024])
    b_fc1 = bias_variable([1024])

    h_pool6_flat = tf.reshape(h_pool6, [-1, 16*16*180])
    z_fc1 = tf.matmul(h_pool6_flat, W_fc1) + b_fc1
    h_fc1 = tf.nn.relu(z_fc1)
    
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    
    
    #first digit
    W_fc2_d1 = weight_variable([1024, 11])
    b_fc2_d1 = bias_variable([11])

    y_conv_d1 = tf.matmul(h_fc1_drop, W_fc2_d1) + b_fc2_d1

    #second digit
    W_fc2_d2 = weight_variable([1024, 11])
    b_fc2_d2 = bias_variable([11])

    y_conv_d2 = tf.matmul(h_fc1_drop, W_fc2_d2) + b_fc2_d2

    #third digit
    W_fc2_d3 = weight_variable([1024, 11])
    b_fc2_d3 = bias_variable([11])

    y_conv_d3 = tf.matmul(h_fc1_drop, W_fc2_d3) + b_fc2_d3

    #fourth digit
    W_fc2_d4 = weight_variable([1024, 11])
    b_fc2_d4 = bias_variable([11])

    y_conv_d4 = tf.matmul(h_fc1_drop, W_fc2_d4) + b_fc2_d4

    #fifth digit
    W_fc2_d5 = weight_variable([1024, 11])
    b_fc2_d5 = bias_variable([11])

    y_conv_d5 = tf.matmul(h_fc1_drop, W_fc2_d5) + b_fc2_d5

    #digit size
    W_fc2_dsize = weight_variable([1024, 5])
    b_fc2_dsize = bias_variable([5])

    y_conv_dsize = tf.matmul(h_fc1_drop, W_fc2_dsize) + b_fc2_dsize

    
    cross_entropy = ( tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d1, y_d1)) 
                     + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d2, y_d2))
                     + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d3, y_d3))
                     + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d4, y_d4))
                     + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d5, y_d5))
                     + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_dsize, y_dsize))
                     ) + reg *(tf.nn.l2_loss(W_conv1) + tf.nn.l2_loss(W_conv2) 
                               + tf.nn.l2_loss(W_conv3) + tf.nn.l2_loss(W_conv4) 
                               + tf.nn.l2_loss(W_fc1)
                               + tf.nn.l2_loss(W_fc2_d1) + tf.nn.l2_loss(W_fc2_d2) 
                               + tf.nn.l2_loss(W_fc2_d3) + tf.nn.l2_loss(W_fc2_d4) 
                               + tf.nn.l2_loss(W_fc2_d5) + tf.nn.l2_loss(W_fc2_dsize) 
                              ) 

    train_step = tf.train.AdamOptimizer(1e-2,epsilon=0.1).minimize(cross_entropy)
    #train_step = tf.train.tf.train.RMSPropOptimizer(1e-4).minimize(cross_entropy)
    
    #let's just check the first digit
    correct_prediction = tf.equal(tf.argmax(y_conv_d1,1), tf.argmax(y_d1,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Train model on a small data, see whether it overfit

if overfit, then good. If not, check bugs.


In [36]:
num_steps = 5000
summary_frequency = 20

BNs_train, BNs_test, acc_train, acc_test = [], [], [], []

with tf.Session(graph=graph) as session:

    tf.initialize_all_variables().run()
    print('Initialized')

    for i in range(num_steps):
        (batch_x, batch_y_dsize, 
         batch_y_d1, batch_y_d2, 
         batch_y_d3, batch_y_d4, batch_y_d5) = next_batch(train_X, 
                                                          train_digit_size, 
                                                          train_digits, batch_size)
        feed_dict={
                x_image: batch_x, y_dsize: batch_y_dsize,
                y_d1: batch_y_d1, y_d2: batch_y_d2, y_d3: batch_y_d3,
                y_d4: batch_y_d4, y_d5: batch_y_d5,
                keep_prob: 0.5}
        
        session.run(train_step,feed_dict=feed_dict)
    
        if i%summary_frequency == 0:
            res_train = session.run([accuracy,z_fc1],feed_dict=feed_dict)
            print("step %d, training accuracy %g"%(i, res_train[0]))
            
            acc_train.append(res_train[0])
            BNs_train.append(np.mean(res_train[1],axis=0).flatten()[:10])
            
            (batch_x, batch_y_dsize, batch_y_d1,
             batch_y_d2, batch_y_d3, batch_y_d4, batch_y_d5) = next_batch(val_X, 
                                                                          val_digit_size, 
                                                                          val_digits, 
                                                                          batch_size, replace = False)
            feed_dict={x_image: batch_x, 
                       y_dsize: batch_y_dsize,y_d1: batch_y_d1, 
                       y_d2: batch_y_d2, y_d3: batch_y_d3,y_d4: batch_y_d4, 
                       y_d5: batch_y_d5, keep_prob: 1}
        
            res = session.run([accuracy,z_fc1],feed_dict=feed_dict)

            acc_test.append(res[0])

            # record the first 10 mean value of BN2 over the entire test set
            BNs_test.append(np.mean(res[1],axis=0).flatten()[:10])
            print("step %d, val accuracy %g"%(i, res[0]))
        
BNs_train, BNs_test, acc_train, acc_test = ( np.array(BNs_train), 
                                            np.array(BNs_test), 
                                            np.array(acc_train), 
                                            np.array(acc_test) )


Initialized
step 0, training accuracy 0.25
step 0, val accuracy 0.3
step 20, training accuracy 0.35
step 20, val accuracy 0.24
step 40, training accuracy 0.25
step 40, val accuracy 0.31
step 60, training accuracy 0.33
step 60, val accuracy 0.35
step 80, training accuracy 0.3
step 80, val accuracy 0.28
step 100, training accuracy 0.33
step 100, val accuracy 0.26
step 120, training accuracy 0.32
step 120, val accuracy 0.25
step 140, training accuracy 0.24
step 140, val accuracy 0.2
step 160, training accuracy 0.25
step 160, val accuracy 0.31
step 180, training accuracy 0.25
step 180, val accuracy 0.26
step 200, training accuracy 0.26
step 200, val accuracy 0.35
step 220, training accuracy 0.29
step 220, val accuracy 0.31
step 240, training accuracy 0.29
step 240, val accuracy 0.21
step 260, training accuracy 0.41
step 260, val accuracy 0.3
step 280, training accuracy 0.32
step 280, val accuracy 0.31
step 300, training accuracy 0.32
step 300, val accuracy 0.32
step 320, training accuracy 0.28
step 320, val accuracy 0.25
step 340, training accuracy 0.26
step 340, val accuracy 0.24
step 360, training accuracy 0.35
step 360, val accuracy 0.38
step 380, training accuracy 0.27
step 380, val accuracy 0.31
step 400, training accuracy 0.26
step 400, val accuracy 0.35
step 420, training accuracy 0.25
step 420, val accuracy 0.25
step 440, training accuracy 0.32
step 440, val accuracy 0.22
step 460, training accuracy 0.38
step 460, val accuracy 0.28
step 480, training accuracy 0.38
step 480, val accuracy 0.23
step 500, training accuracy 0.3
step 500, val accuracy 0.32
step 520, training accuracy 0.28
step 520, val accuracy 0.29
step 540, training accuracy 0.3
step 540, val accuracy 0.33
step 560, training accuracy 0.27
step 560, val accuracy 0.33
step 580, training accuracy 0.3
step 580, val accuracy 0.26
step 600, training accuracy 0.35
step 600, val accuracy 0.27
step 620, training accuracy 0.22
step 620, val accuracy 0.22
step 640, training accuracy 0.25
step 640, val accuracy 0.35
step 660, training accuracy 0.29
step 660, val accuracy 0.28
step 680, training accuracy 0.26
step 680, val accuracy 0.29
step 700, training accuracy 0.22
step 700, val accuracy 0.23
step 720, training accuracy 0.3
step 720, val accuracy 0.28
step 740, training accuracy 0.3
step 740, val accuracy 0.29
step 760, training accuracy 0.29
step 760, val accuracy 0.29
step 780, training accuracy 0.34
step 780, val accuracy 0.32
step 800, training accuracy 0.32
step 800, val accuracy 0.36
step 820, training accuracy 0.24
step 820, val accuracy 0.25
step 840, training accuracy 0.37
step 840, val accuracy 0.31
step 860, training accuracy 0.18
step 860, val accuracy 0.31
step 880, training accuracy 0.27
step 880, val accuracy 0.33
step 900, training accuracy 0.34
step 900, val accuracy 0.32
step 920, training accuracy 0.28
step 920, val accuracy 0.29
step 940, training accuracy 0.33
step 940, val accuracy 0.29
step 960, training accuracy 0.32
step 960, val accuracy 0.34
step 980, training accuracy 0.38
step 980, val accuracy 0.25
step 1000, training accuracy 0.31
step 1000, val accuracy 0.2
step 1020, training accuracy 0.23
step 1020, val accuracy 0.25
step 1040, training accuracy 0.32
step 1040, val accuracy 0.32
step 1060, training accuracy 0.28
step 1060, val accuracy 0.18
step 1080, training accuracy 0.28
step 1080, val accuracy 0.28
step 1100, training accuracy 0.27
step 1100, val accuracy 0.34
step 1120, training accuracy 0.13
step 1120, val accuracy 0.17
step 1140, training accuracy 0.31
step 1140, val accuracy 0.19
step 1160, training accuracy 0.26
step 1160, val accuracy 0.25
step 1180, training accuracy 0.42
step 1180, val accuracy 0.26
step 1200, training accuracy 0.27
step 1200, val accuracy 0.23
step 1220, training accuracy 0.18
step 1220, val accuracy 0.1
step 1240, training accuracy 0.24
step 1240, val accuracy 0.21
step 1260, training accuracy 0.25
step 1260, val accuracy 0.3
step 1280, training accuracy 0.29
step 1280, val accuracy 0.24
step 1300, training accuracy 0.31
step 1300, val accuracy 0.33
step 1320, training accuracy 0.34
step 1320, val accuracy 0.25
step 1340, training accuracy 0.28
step 1340, val accuracy 0.2
step 1360, training accuracy 0.31
step 1360, val accuracy 0.31
step 1380, training accuracy 0.27
step 1380, val accuracy 0.34
step 1400, training accuracy 0.19
step 1400, val accuracy 0.31
step 1420, training accuracy 0.27
step 1420, val accuracy 0.29
step 1440, training accuracy 0.3
step 1440, val accuracy 0.23
step 1460, training accuracy 0.37
step 1460, val accuracy 0.25
step 1480, training accuracy 0.31
step 1480, val accuracy 0.34
step 1500, training accuracy 0.33
step 1500, val accuracy 0.32
step 1520, training accuracy 0.37
step 1520, val accuracy 0.24
step 1540, training accuracy 0.35
step 1540, val accuracy 0.31
step 1560, training accuracy 0.3
step 1560, val accuracy 0.22
step 1580, training accuracy 0.32
step 1580, val accuracy 0.23
step 1600, training accuracy 0.19
step 1600, val accuracy 0.32
step 1620, training accuracy 0.27
step 1620, val accuracy 0.35
step 1640, training accuracy 0.31
step 1640, val accuracy 0.26
step 1660, training accuracy 0.31
step 1660, val accuracy 0.27
step 1680, training accuracy 0.28
step 1680, val accuracy 0.33
step 1700, training accuracy 0.3
step 1700, val accuracy 0.28
step 1720, training accuracy 0.28
step 1720, val accuracy 0.33
step 1740, training accuracy 0.25
step 1740, val accuracy 0.32
step 1760, training accuracy 0.28
step 1760, val accuracy 0.41
step 1780, training accuracy 0.33
step 1780, val accuracy 0.2
step 1800, training accuracy 0.29
step 1800, val accuracy 0.29
step 1820, training accuracy 0.33
step 1820, val accuracy 0.27
step 1840, training accuracy 0.29
step 1840, val accuracy 0.31
step 1860, training accuracy 0.32
step 1860, val accuracy 0.29
step 1880, training accuracy 0.34
step 1880, val accuracy 0.3
step 1900, training accuracy 0.23
step 1900, val accuracy 0.33
step 1920, training accuracy 0.29
step 1920, val accuracy 0.29
step 1940, training accuracy 0.3
step 1940, val accuracy 0.26
step 1960, training accuracy 0.29
step 1960, val accuracy 0.31
step 1980, training accuracy 0.32
step 1980, val accuracy 0.31
step 2000, training accuracy 0.31
step 2000, val accuracy 0.31
step 2020, training accuracy 0.31
step 2020, val accuracy 0.29
step 2040, training accuracy 0.33
step 2040, val accuracy 0.23
step 2060, training accuracy 0.38
step 2060, val accuracy 0.25
step 2080, training accuracy 0.26
step 2080, val accuracy 0.27
step 2100, training accuracy 0.26
step 2100, val accuracy 0.23
step 2120, training accuracy 0.31
step 2120, val accuracy 0.29
step 2140, training accuracy 0.26
step 2140, val accuracy 0.27
step 2160, training accuracy 0.35
step 2160, val accuracy 0.24
step 2180, training accuracy 0.27
step 2180, val accuracy 0.28
step 2200, training accuracy 0.29
step 2200, val accuracy 0.25
step 2220, training accuracy 0.23
step 2220, val accuracy 0.28
step 2240, training accuracy 0.32
step 2240, val accuracy 0.24
step 2260, training accuracy 0.35
step 2260, val accuracy 0.29
step 2280, training accuracy 0.28
step 2280, val accuracy 0.37
step 2300, training accuracy 0.3
step 2300, val accuracy 0.3
step 2320, training accuracy 0.31
step 2320, val accuracy 0.26
step 2340, training accuracy 0.3
step 2340, val accuracy 0.29
step 2360, training accuracy 0.33
step 2360, val accuracy 0.23
step 2380, training accuracy 0.36
step 2380, val accuracy 0.35
step 2400, training accuracy 0.35
step 2400, val accuracy 0.34
step 2420, training accuracy 0.35
step 2420, val accuracy 0.3
step 2440, training accuracy 0.22
step 2440, val accuracy 0.28
step 2460, training accuracy 0.37
step 2460, val accuracy 0.32
step 2480, training accuracy 0.29
step 2480, val accuracy 0.29
step 2500, training accuracy 0.3
step 2500, val accuracy 0.18
step 2520, training accuracy 0.36
step 2520, val accuracy 0.35
step 2540, training accuracy 0.29
step 2540, val accuracy 0.24
step 2560, training accuracy 0.3
step 2560, val accuracy 0.27
step 2580, training accuracy 0.31
step 2580, val accuracy 0.25
step 2600, training accuracy 0.31
step 2600, val accuracy 0.32
step 2620, training accuracy 0.28
step 2620, val accuracy 0.31
step 2640, training accuracy 0.31
step 2640, val accuracy 0.32
step 2660, training accuracy 0.29
step 2660, val accuracy 0.26
step 2680, training accuracy 0.29
step 2680, val accuracy 0.29
step 2700, training accuracy 0.32
step 2700, val accuracy 0.29
step 2720, training accuracy 0.25
step 2720, val accuracy 0.21
step 2740, training accuracy 0.29
step 2740, val accuracy 0.29
step 2760, training accuracy 0.34
step 2760, val accuracy 0.32
step 2780, training accuracy 0.35
step 2780, val accuracy 0.28
step 2800, training accuracy 0.22
step 2800, val accuracy 0.34
step 2820, training accuracy 0.36
step 2820, val accuracy 0.25
step 2840, training accuracy 0.27
step 2840, val accuracy 0.18
step 2860, training accuracy 0.31
step 2860, val accuracy 0.3
step 2880, training accuracy 0.29
step 2880, val accuracy 0.3
step 2900, training accuracy 0.3
step 2900, val accuracy 0.24
step 2920, training accuracy 0.27
step 2920, val accuracy 0.28
step 2940, training accuracy 0.22
step 2940, val accuracy 0.26
step 2960, training accuracy 0.28
step 2960, val accuracy 0.32
step 2980, training accuracy 0.34
step 2980, val accuracy 0.26
step 3000, training accuracy 0.31
step 3000, val accuracy 0.3
step 3020, training accuracy 0.24
step 3020, val accuracy 0.33
step 3040, training accuracy 0.33
step 3040, val accuracy 0.23
step 3060, training accuracy 0.33
step 3060, val accuracy 0.33
step 3080, training accuracy 0.29
step 3080, val accuracy 0.3
step 3100, training accuracy 0.27
step 3100, val accuracy 0.25
step 3120, training accuracy 0.26
step 3120, val accuracy 0.23
step 3140, training accuracy 0.27
step 3140, val accuracy 0.31
step 3160, training accuracy 0.34
step 3160, val accuracy 0.25
step 3180, training accuracy 0.32
step 3180, val accuracy 0.26
step 3200, training accuracy 0.26
step 3200, val accuracy 0.29
step 3220, training accuracy 0.3
step 3220, val accuracy 0.27
step 3240, training accuracy 0.32
step 3240, val accuracy 0.36
step 3260, training accuracy 0.36
step 3260, val accuracy 0.25
step 3280, training accuracy 0.26
step 3280, val accuracy 0.28
step 3300, training accuracy 0.28
step 3300, val accuracy 0.25
step 3320, training accuracy 0.24
step 3320, val accuracy 0.31
step 3340, training accuracy 0.23
step 3340, val accuracy 0.28
step 3360, training accuracy 0.42
step 3360, val accuracy 0.27
step 3380, training accuracy 0.25
step 3380, val accuracy 0.35
step 3400, training accuracy 0.19
step 3400, val accuracy 0.28
step 3420, training accuracy 0.31
step 3420, val accuracy 0.33
step 3440, training accuracy 0.31
step 3440, val accuracy 0.36
step 3460, training accuracy 0.33
step 3460, val accuracy 0.24
step 3480, training accuracy 0.39
step 3480, val accuracy 0.28
step 3500, training accuracy 0.24
step 3500, val accuracy 0.27
step 3520, training accuracy 0.32
step 3520, val accuracy 0.27
step 3540, training accuracy 0.35
step 3540, val accuracy 0.26
step 3560, training accuracy 0.27
step 3560, val accuracy 0.34
step 3580, training accuracy 0.27
step 3580, val accuracy 0.22
step 3600, training accuracy 0.31
step 3600, val accuracy 0.28
step 3620, training accuracy 0.35
step 3620, val accuracy 0.33
step 3640, training accuracy 0.3
step 3640, val accuracy 0.25
step 3660, training accuracy 0.3
step 3660, val accuracy 0.25
step 3680, training accuracy 0.29
step 3680, val accuracy 0.36
step 3700, training accuracy 0.23
step 3700, val accuracy 0.21
step 3720, training accuracy 0.29
step 3720, val accuracy 0.34
step 3740, training accuracy 0.27
step 3740, val accuracy 0.26
step 3760, training accuracy 0.25
step 3760, val accuracy 0.31
step 3780, training accuracy 0.23
step 3780, val accuracy 0.3
step 3800, training accuracy 0.31
step 3800, val accuracy 0.28
step 3820, training accuracy 0.31
step 3820, val accuracy 0.3
step 3840, training accuracy 0.29
step 3840, val accuracy 0.29
step 3860, training accuracy 0.29
step 3860, val accuracy 0.3
step 3880, training accuracy 0.31
step 3880, val accuracy 0.28
step 3900, training accuracy 0.28
step 3900, val accuracy 0.29
step 3920, training accuracy 0.35
step 3920, val accuracy 0.26
step 3940, training accuracy 0.22
step 3940, val accuracy 0.3
step 3960, training accuracy 0.29
step 3960, val accuracy 0.26
step 3980, training accuracy 0.3
step 3980, val accuracy 0.25
step 4000, training accuracy 0.4
step 4000, val accuracy 0.25
step 4020, training accuracy 0.29
step 4020, val accuracy 0.31
step 4040, training accuracy 0.28
step 4040, val accuracy 0.29
step 4060, training accuracy 0.37
step 4060, val accuracy 0.33
step 4080, training accuracy 0.26
step 4080, val accuracy 0.22
step 4100, training accuracy 0.35
step 4100, val accuracy 0.32
step 4120, training accuracy 0.35
step 4120, val accuracy 0.27
step 4140, training accuracy 0.22
step 4140, val accuracy 0.28
step 4160, training accuracy 0.31
step 4160, val accuracy 0.18
step 4180, training accuracy 0.32
step 4180, val accuracy 0.33
step 4200, training accuracy 0.41
step 4200, val accuracy 0.38
step 4220, training accuracy 0.35
step 4220, val accuracy 0.33
step 4240, training accuracy 0.29
step 4240, val accuracy 0.26
step 4260, training accuracy 0.31
step 4260, val accuracy 0.32
step 4280, training accuracy 0.31
step 4280, val accuracy 0.31
step 4300, training accuracy 0.29
step 4300, val accuracy 0.21
step 4320, training accuracy 0.3
step 4320, val accuracy 0.24
step 4340, training accuracy 0.34
step 4340, val accuracy 0.23
step 4360, training accuracy 0.25
step 4360, val accuracy 0.27
step 4380, training accuracy 0.35
step 4380, val accuracy 0.34
step 4400, training accuracy 0.33
step 4400, val accuracy 0.26
step 4420, training accuracy 0.32
step 4420, val accuracy 0.19
step 4440, training accuracy 0.35
step 4440, val accuracy 0.25
step 4460, training accuracy 0.38
step 4460, val accuracy 0.25
step 4480, training accuracy 0.26
step 4480, val accuracy 0.34
step 4500, training accuracy 0.29
step 4500, val accuracy 0.27
step 4520, training accuracy 0.28
step 4520, val accuracy 0.21
step 4540, training accuracy 0.3
step 4540, val accuracy 0.24
step 4560, training accuracy 0.29
step 4560, val accuracy 0.3
step 4580, training accuracy 0.23
step 4580, val accuracy 0.28
step 4600, training accuracy 0.39
step 4600, val accuracy 0.23
step 4620, training accuracy 0.36
step 4620, val accuracy 0.33
step 4640, training accuracy 0.29
step 4640, val accuracy 0.3
step 4660, training accuracy 0.39
step 4660, val accuracy 0.29
step 4680, training accuracy 0.29
step 4680, val accuracy 0.23
step 4700, training accuracy 0.34
step 4700, val accuracy 0.25
step 4720, training accuracy 0.31
step 4720, val accuracy 0.29
step 4740, training accuracy 0.37
step 4740, val accuracy 0.22
step 4760, training accuracy 0.26
step 4760, val accuracy 0.25
step 4780, training accuracy 0.39
step 4780, val accuracy 0.27
step 4800, training accuracy 0.2
step 4800, val accuracy 0.33
step 4820, training accuracy 0.38
step 4820, val accuracy 0.29
step 4840, training accuracy 0.28
step 4840, val accuracy 0.2
step 4860, training accuracy 0.32
step 4860, val accuracy 0.28
step 4880, training accuracy 0.35
step 4880, val accuracy 0.32
step 4900, training accuracy 0.26
step 4900, val accuracy 0.31
step 4920, training accuracy 0.29
step 4920, val accuracy 0.25
step 4940, training accuracy 0.33
step 4940, val accuracy 0.26
step 4960, training accuracy 0.29
step 4960, val accuracy 0.27
step 4980, training accuracy 0.29
step 4980, val accuracy 0.21

In [39]:
fig, ax = plt.subplots()

ax.plot(range(0,len(acc_train)*summary_frequency,summary_frequency),acc_train, label='Training')
ax.plot(range(0,len(acc_test)*summary_frequency,summary_frequency),acc_test, label='Validation')
ax.set_xlabel('Training steps')
ax.set_ylabel('Accuracy')
ax.set_ylim([0,1])
ax.set_title('No Batch normalization Accuracy')
ax.legend(loc=4)
plt.show()



In [40]:
fig, axes = plt.subplots(5, 2, figsize=(6,12))
fig.tight_layout()

for i, ax in enumerate(axes):
    ax[0].set_title("training BN")
    ax[1].set_title("validation BN")
    ax[0].plot(BNs_train[:,i])
    ax[1].plot(BNs_test[:,i])



In [ ]: