Deep Learning

Assignment 3

Previously in 2_fullyconnected.ipynb, you trained a logistic regression and a neural network model.

The goal of this assignment is to explore regularization techniques.


In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
import cPickle as pickle
import numpy as np
import tensorflow as tf

First reload the data we generated in notmist.ipynb.


In [2]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print 'Training set', train_dataset.shape, train_labels.shape
    print 'Validation set', valid_dataset.shape, valid_labels.shape
    print 'Test set', test_dataset.shape, test_labels.shape


Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (18724, 28, 28) (18724,)

Reformat into a shape that's more adapted to the models we're going to train:

  • data as a flat matrix,
  • labels as float 1-hot encodings.

In [3]:
image_size = 28
num_labels = 10

def reformat(dataset, labels):
    dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
    # Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
    labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
    return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print 'Training set', train_dataset.shape, train_labels.shape
print 'Validation set', valid_dataset.shape, valid_labels.shape
print 'Test set', test_dataset.shape, test_labels.shape


Training set (200000, 784) (200000, 10)
Validation set (10000, 784) (10000, 10)
Test set (18724, 784) (18724, 10)

In [4]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

Problem 1

Introduce and tune L2 regularization for both logistic and neural network models. Remember that L2 amounts to adding a penalty on the norm of the weights to the loss. In TensorFlow, you can compue the L2 loss for a tensor t using nn.l2_loss(t). The right amount of regularization should improve your validation / test accuracy.



In [95]:
n_hidden = 1024
L2_weight = 0.5e-3


def forward(tf_X):
    """
    assert tf.shape(tf_X)[1] == image_size*image_size,\
        "Training data not of correct shape. Each input should be of shape: %s" % (image_size*image_size)
    """
    with tf.name_scope('hidden1'):
        weights = tf.Variable(tf.truncated_normal([image_size*image_size, n_hidden]), name="weights")
        biases = tf.Variable(tf.zeros([n_hidden]), name="biases")
        z01 = tf.matmul(tf_X, weights) + biases
        hidden1 = tf.nn.relu(z01)
        l2_reg_01 = tf.nn.l2_loss(weights)
    with tf.name_scope('z12'):
        weights = tf.Variable(tf.truncated_normal([n_hidden, num_labels]), name="weights")
        biases = tf.Variable(tf.zeros([num_labels]), name="biases")
        z12 = tf.matmul(hidden1, weights) + biases
        l2_reg_12 = tf.nn.l2_loss(weights)
    return z12, l2_reg_01+l2_reg_12

# Define loss
def get_loss(z12, l2_loss, tf_Y):
    """
    assert tf.shape(tf_X)[1] == image_size*image_size,\
        "Training data not of correct shape. got %s require %s" % (tf.shape(tf_X)[1], image_size*image_size)
    assert tf.shape(tf_Y)[1] == num_labels,\
        "Training data not of correct shape. got %s require %s" % (tf.shape(tf_Y)[1], num_labels)
    """
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(z12, tf_training_labels))
    total_loss = loss + L2_weight*l2_loss
    return total_loss

# Define the network graph
graph = tf.Graph()
with graph.as_default():
    #tf_training_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size*image_size))
    #tf_training_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_training_dataset = tf.placeholder(tf.float32) # Should have shape (batch_size, image_size*image_size)
    tf_training_labels = tf.placeholder(tf.float32) # Should have shape (batch_size, num_labels)
    
    z12, l2_loss = forward(tf_training_dataset)
    total_loss = get_loss(z12, l2_loss, tf_training_labels)
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(total_loss)

In [96]:
# train the model
num_steps = 3001
batch_size = 128
with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print "Initialized, using batch size: %s" % batch_size
    for step in xrange(num_steps):
        idx = np.random.randint(train_dataset.shape[0], size=batch_size)
        #offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[idx]
        batch_labels = train_labels[idx]
        #batch_data = train_dataset[offset:(offset + batch_size), :]
        #batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_training_dataset : batch_data, tf_training_labels : batch_labels}
        _, l, predictions = session.run([optimizer, total_loss, z12], feed_dict=feed_dict)
        if (step % 500 == 0):
            #batch_size += 100
            print "Updated batch size: %s" % batch_size
            print "Minibatch loss at step", step, ":", l
            print "Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)
            predictions = session.run(z12, feed_dict={tf_training_dataset: valid_dataset})
            print "Validation accuracy: %.1f%%" % accuracy(predictions, valid_labels)
    predictions = session.run(z12, feed_dict={tf_training_dataset: test_dataset})
    print "Test accuracy: %.1f%%" % accuracy(predictions, test_labels)


Initialized, using batch size: 128
Updated batch size: 128
Minibatch loss at step 0 : 433.335
Minibatch accuracy: 16.4%
Validation accuracy: 19.5%
Updated batch size: 128
Minibatch loss at step 500 : 154.113
Minibatch accuracy: 71.1%
Validation accuracy: 78.1%
Updated batch size: 128
Minibatch loss at step 1000 : 99.6778
Minibatch accuracy: 78.1%
Validation accuracy: 81.9%
Updated batch size: 128
Minibatch loss at step 1500 : 78.0688
Minibatch accuracy: 78.9%
Validation accuracy: 80.7%
Updated batch size: 128
Minibatch loss at step 2000 : 56.6891
Minibatch accuracy: 85.9%
Validation accuracy: 82.3%
Updated batch size: 128
Minibatch loss at step 2500 : 43.8642
Minibatch accuracy: 82.8%
Validation accuracy: 83.4%
Updated batch size: 128
Minibatch loss at step 3000 : 33.9656
Minibatch accuracy: 89.1%
Validation accuracy: 84.5%
Test accuracy: 91.1%

In [58]:
train_dataset.shape


Out[58]:
(200000, 784)

Problem 2

Let's demonstrate an extreme case of overfitting. Restrict your training data to just a few batches. What happens?



In [97]:
# Overfitting using very small subset of data
num_steps = 3001
batch_size = 100
with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print "Initialized, using batch size: %s" % batch_size
    for step in xrange(num_steps):
        idx = np.random.randint(train_dataset.shape[0]/100, size=batch_size)
        #offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[idx]
        batch_labels = train_labels[idx]
        #batch_data = train_dataset[offset:(offset + batch_size), :]
        #batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_training_dataset : batch_data, tf_training_labels : batch_labels}
        _, l, predictions = session.run([optimizer, total_loss, z12], feed_dict=feed_dict)
        if (step % 500 == 0):
            #batch_size += 100
            print "Updated batch size: %s" % batch_size
            print "Minibatch loss at step", step, ":", l
            print "Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)
            predictions = session.run(z12, feed_dict={tf_training_dataset: valid_dataset})
            print "Validation accuracy: %.1f%%" % accuracy(predictions, valid_labels)
    predictions = session.run(z12, feed_dict={tf_training_dataset: test_dataset})
    print "Test accuracy: %.1f%%" % accuracy(predictions, test_labels)


Initialized, using batch size: 100
Updated batch size: 100
Minibatch loss at step 0 : 510.032
Minibatch accuracy: 7.0%
Validation accuracy: 24.8%
Updated batch size: 100
Minibatch loss at step 500 : 122.496
Minibatch accuracy: 99.0%
Validation accuracy: 78.4%
Updated batch size: 100
Minibatch loss at step 1000 : 95.3546
Minibatch accuracy: 100.0%
Validation accuracy: 78.9%
Updated batch size: 100
Minibatch loss at step 1500 : 74.2451
Minibatch accuracy: 100.0%
Validation accuracy: 78.8%
Updated batch size: 100
Minibatch loss at step 2000 : 57.8141
Minibatch accuracy: 100.0%
Validation accuracy: 79.0%
Updated batch size: 100
Minibatch loss at step 2500 : 45.0234
Minibatch accuracy: 100.0%
Validation accuracy: 78.5%
Updated batch size: 100
Minibatch loss at step 3000 : 35.0617
Minibatch accuracy: 100.0%
Validation accuracy: 78.7%
Test accuracy: 86.4%

Problem 3

Introduce Dropout on the hidden layer of the neural network. Remember: Dropout should only be introduced during training, not evaluation, otherwise your evaluation results would be stochastic as well. TensorFlow provides nn.dropout() for that, but you have to make sure it's only inserted during training.

What happens to our extreme overfitting case?



In [105]:
batch_size = 128
n_hidden = 1024
L2_weight = 0.5e-3

def forward(tf_X, dropout_p):
    """
    assert tf.shape(tf_X)[1] == image_size*image_size,\
        "Training data not of correct shape. Each input should be of shape: %s" % (image_size*image_size)
    """
    with tf.name_scope('hidden1'):
        weights = tf.Variable(tf.truncated_normal([image_size*image_size, n_hidden]), name="weights")
        biases = tf.Variable(tf.zeros([n_hidden]), name="biases")
        z01 = tf.matmul(tf_X, weights) + biases
        hidden1 = tf.nn.dropout(tf.nn.relu(z01), dropout_p) # Added dropout
        l2_reg_01 = tf.nn.l2_loss(weights)
    with tf.name_scope('z12'):
        weights = tf.Variable(tf.truncated_normal([n_hidden, num_labels]), name="weights")
        biases = tf.Variable(tf.zeros([num_labels]), name="biases")
        z12 = tf.matmul(hidden1, weights) + biases
        l2_reg_12 = tf.nn.l2_loss(weights)
    return z12, l2_reg_01+l2_reg_12
    #return z12, 0

# Define loss
def get_loss(z12, l2_loss, tf_Y):
    """
    assert tf.shape(tf_X)[1] == image_size*image_size,\
        "Training data not of correct shape. got %s require %s" % (tf.shape(tf_X)[1], image_size*image_size)
    assert tf.shape(tf_Y)[1] == num_labels,\
        "Training data not of correct shape. got %s require %s" % (tf.shape(tf_Y)[1], num_labels)
    """
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(z12, tf_training_labels))
    total_loss = loss + L2_weight*l2_loss
    return total_loss

# Define the network graph
tf.python.framework.ops.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
    #tf_training_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size*image_size))
    #tf_training_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_training_dataset = tf.placeholder(tf.float32) # Should have shape (batch_size, image_size*image_size)
    tf_training_labels = tf.placeholder(tf.float32) # Should have shape (batch_size, num_labels)
    dropout_p = tf.placeholder(tf.float32)
    
    z12, l2_loss = forward(tf_training_dataset, dropout_p)
    total_loss = get_loss(z12, l2_loss, tf_training_labels)
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(total_loss)

In [109]:
# train the model
num_steps = 3001
batch_size = 128
with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print "Initialized, using batch size: %s" % batch_size
    for step in xrange(num_steps):
        idx = np.random.randint(train_dataset.shape[0], size=batch_size)
        #offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[idx]
        batch_labels = train_labels[idx]
        #batch_data = train_dataset[offset:(offset + batch_size), :]
        #batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_training_dataset : batch_data, tf_training_labels : batch_labels, dropout_p: 0.5}
        _, l = session.run([optimizer, total_loss], feed_dict=feed_dict)
        predictions = session.run(z12, feed_dict={tf_training_dataset: batch_data, dropout_p: 1})
        if (step % 500 == 0):
            #batch_size += 100
            print "Updated batch size: %s" % batch_size
            print "Minibatch loss at step", step, ":", l
            print "Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)
            predictions = session.run(z12, feed_dict={tf_training_dataset: valid_dataset, dropout_p: 1})
            print "Validation accuracy: %.1f%%" % accuracy(predictions, valid_labels)
    predictions = session.run(z12, feed_dict={tf_training_dataset: test_dataset, dropout_p: 1})
    print "Test accuracy: %.1f%%" % accuracy(predictions, test_labels)


Initialized, using batch size: 128
Updated batch size: 128
Minibatch loss at step 0 : 594.441
Minibatch accuracy: 45.3%
Validation accuracy: 30.6%
Updated batch size: 128
Minibatch loss at step 500 : 135.303
Minibatch accuracy: 82.8%
Validation accuracy: 77.0%
Updated batch size: 128
Minibatch loss at step 1000 : 106.291
Minibatch accuracy: 83.6%
Validation accuracy: 76.9%
Updated batch size: 128
Minibatch loss at step 1500 : 78.9362
Minibatch accuracy: 83.6%
Validation accuracy: 79.4%
Updated batch size: 128
Minibatch loss at step 2000 : 59.3434
Minibatch accuracy: 83.6%
Validation accuracy: 80.6%
Updated batch size: 128
Minibatch loss at step 2500 : 44.3513
Minibatch accuracy: 87.5%
Validation accuracy: 82.0%
Updated batch size: 128
Minibatch loss at step 3000 : 34.0546
Minibatch accuracy: 93.0%
Validation accuracy: 82.7%
Test accuracy: 89.7%

In [108]:
# train the model using smaller sample resulting in overfitting
num_steps = 3001
batch_size = 100
with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print "Initialized, using batch size: %s" % batch_size
    for step in xrange(num_steps):
        idx = np.random.randint(train_dataset[:2000].shape[0], size=batch_size)
        #offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[:2000][idx]
        batch_labels = train_labels[:2000][idx]
        #batch_data = train_dataset[offset:(offset + batch_size), :]
        #batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_training_dataset : batch_data, tf_training_labels : batch_labels, dropout_p: 0.5}
        _, l = session.run([optimizer, total_loss], feed_dict=feed_dict)
        predictions = session.run(z12, feed_dict={tf_training_dataset: batch_data, dropout_p: 1})
        if (step % 500 == 0):
            #batch_size += 100
            print "Updated batch size: %s" % batch_size
            print "Minibatch loss at step", step, ":", l
            print "Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)
            predictions = session.run(z12, feed_dict={tf_training_dataset: valid_dataset, dropout_p: 1})
            print "Validation accuracy: %.1f%%" % accuracy(predictions, valid_labels)
    predictions = session.run(z12, feed_dict={tf_training_dataset: test_dataset, dropout_p: 1})
    print "Test accuracy: %.1f%%" % accuracy(predictions, test_labels)


Initialized, using batch size: 100
Updated batch size: 100
Minibatch loss at step 0 : 600.411
Minibatch accuracy: 53.0%
Validation accuracy: 34.7%
Updated batch size: 100
Minibatch loss at step 500 : 132.637
Minibatch accuracy: 100.0%
Validation accuracy: 80.2%
Updated batch size: 100
Minibatch loss at step 1000 : 100.23
Minibatch accuracy: 100.0%
Validation accuracy: 80.0%
Updated batch size: 100
Minibatch loss at step 1500 : 80.0319
Minibatch accuracy: 100.0%
Validation accuracy: 79.9%
Updated batch size: 100
Minibatch loss at step 2000 : 60.3647
Minibatch accuracy: 100.0%
Validation accuracy: 80.7%
Updated batch size: 100
Minibatch loss at step 2500 : 47.079
Minibatch accuracy: 100.0%
Validation accuracy: 80.8%
Updated batch size: 100
Minibatch loss at step 3000 : 36.7001
Minibatch accuracy: 100.0%
Validation accuracy: 80.2%
Test accuracy: 88.1%

Problem 4

Try to get the best performance you can using a multi-layer model! The best reported test accuracy using a deep network is 97.1%.

One avenue you can explore is to add multiple layers.

Another one is to use learning rate decay:

global_step = tf.Variable(0)  # count the number of steps taken.
learning_rate = tf.train.exponential_decay(0.5, step, ...)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)



In [186]:
## BEST MODEL
"""
[Step: 5000] Minibatch loss 12.6376, accuracy: 89.5%
[Step: 5000] Validation loss 12.6891, accuracy: 86.9%
Test loss 12.4793, accuracy: 93.0%
"""

batch_size = 128
n_hidden = 1024
L2_weight = 0.5e-3

def forward(tf_X, dropout_p):
    """
    assert tf.shape(tf_X)[1] == image_size*image_size,\
        "Training data not of correct shape. Each input should be of shape: %s" % (image_size*image_size)
    """
    l2_weight_loss = [0]
    #tf.Variable(0, name="l2_weight_loss")
    with tf.name_scope('hidden1'):
        weights = tf.Variable(tf.truncated_normal([image_size*image_size, n_hidden]), name="weights")
        biases = tf.Variable(tf.zeros([n_hidden]), name="biases")
        z01 = tf.matmul(tf.nn.dropout(tf_X, 0.9), weights) + biases # Dropout input keeping 0.9 inputs always
        hidden1 = tf.nn.dropout(tf.nn.relu(z01), dropout_p) # Added dropout
        #hidden1 = tf.nn.relu(z01) # No dropout
        l2_weight_loss.append(tf.nn.l2_loss(weights))
    """
    with tf.name_scope('z12'):
        weights = tf.Variable(tf.truncated_normal([n_hidden, n_hidden]), name="weights")
        biases = tf.Variable(tf.zeros([n_hidden]), name="biases")
        z12 = tf.matmul(hidden1, weights) + biases
        hidden2 = tf.nn.dropout(tf.nn.tanh(z12), dropout_p) # Added dropout
        #hidden2 = tf.nn.relu(z12) # No dropout
        #l2_weight_loss.append(tf.nn.l2_loss(weights))
    """
    with tf.name_scope('outputs'):
        weights = tf.Variable(tf.truncated_normal([n_hidden, num_labels]), name="weights")
        biases = tf.Variable(tf.zeros([num_labels]), name="biases")
        outputs = tf.matmul(hidden1, weights) + biases # Add constant to ensure input to log is never zero.
        l2_weight_loss.append(tf.nn.l2_loss(weights))
    return outputs, reduce(lambda x, y: x + y, l2_weight_loss)
    #return outputs, 0

# Define loss
def get_loss(outputs, l2_loss, tf_Y):
    """
    assert tf.shape(tf_X)[1] == image_size*image_size,\
        "Training data not of correct shape. got %s require %s" % (tf.shape(tf_X)[1], image_size*image_size)
    assert tf.shape(tf_Y)[1] == num_labels,\
        "Training data not of correct shape. got %s require %s" % (tf.shape(tf_Y)[1], num_labels)
    """
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(outputs, tf_training_labels))
    total_loss = loss + L2_weight*l2_loss
    return total_loss

# Define the network graph
tf.python.framework.ops.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
    tf_training_dataset = tf.placeholder(tf.float32) # Should have shape (batch_size, image_size*image_size)
    tf_training_labels = tf.placeholder(tf.float32) # Should have shape (batch_size, num_labels)
    dropout_p = tf.placeholder(tf.float32)
    
    outputs, l2_loss = forward(tf_training_dataset, dropout_p)
    total_loss = get_loss(outputs, l2_loss, tf_training_labels)
    
    global_step = tf.Variable(0, trainable=False)  # count the number of steps taken.
    #learning_rate = tf.train.exponential_decay(0.5, global_step, 10000, 0.96)
    learning_rate = 0.5
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss, global_step=global_step)

In [188]:
# train the model
num_steps = 5001
batch_size = 128
with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print "Initialized, using batch size: %s" % batch_size
    for step in xrange(num_steps):
        idx = np.random.randint(train_dataset.shape[0], size=batch_size)
        #offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[idx]
        batch_labels = train_labels[idx]
        #batch_data = train_dataset[offset:(offset + batch_size), :]
        #batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_training_dataset : batch_data, tf_training_labels : batch_labels, dropout_p: 1}
        _, l = session.run([optimizer, total_loss], feed_dict=feed_dict)
        predictions = session.run(outputs, feed_dict={tf_training_dataset: batch_data, dropout_p: 1})
        if (step % 500 == 0):
            batch_size += 100
            print "Updated batch size: %s" % batch_size
            print "[Step: %s] Minibatch loss %s, accuracy: %.1f%%" % (step, l, accuracy(predictions, batch_labels))
            predictions, l = session.run([outputs, total_loss], 
                                      feed_dict={tf_training_dataset: valid_dataset, tf_training_labels : valid_labels, dropout_p: 1})
            print "[Step: %s] Validation loss %s, accuracy: %.1f%%" % (step, l, accuracy(predictions, valid_labels))
    predictions, l = session.run([outputs, total_loss], 
                                      feed_dict={tf_training_dataset: test_dataset, tf_training_labels : test_labels, dropout_p: 1})
    print "Test loss %s, accuracy: %.1f%%" % (l, accuracy(predictions, test_labels))


Initialized, using batch size: 128
Updated batch size: 228
[Step: 0] Minibatch loss 594.078, accuracy: 50.0%
[Step: 0] Validation loss 1413.83, accuracy: 33.0%
Updated batch size: 328
[Step: 500] Minibatch loss 130.521, accuracy: 83.3%
[Step: 500] Validation loss 137.902, accuracy: 76.8%
Updated batch size: 428
[Step: 1000] Minibatch loss 95.7518, accuracy: 85.1%
[Step: 1000] Validation loss 96.2667, accuracy: 81.0%
Updated batch size: 528
[Step: 1500] Minibatch loss 72.8652, accuracy: 84.1%
[Step: 1500] Validation loss 73.9434, accuracy: 80.1%
Updated batch size: 628
[Step: 2000] Minibatch loss 56.29, accuracy: 85.0%
[Step: 2000] Validation loss 56.8085, accuracy: 82.1%
Updated batch size: 728
[Step: 2500] Minibatch loss 43.5756, accuracy: 85.2%
[Step: 2500] Validation loss 43.8024, accuracy: 82.5%
Updated batch size: 828
[Step: 3000] Minibatch loss 33.9084, accuracy: 88.0%
[Step: 3000] Validation loss 33.9835, accuracy: 84.0%
Updated batch size: 928
[Step: 3500] Minibatch loss 26.4319, accuracy: 86.7%
[Step: 3500] Validation loss 26.7211, accuracy: 81.8%
Updated batch size: 1028
[Step: 4000] Minibatch loss 20.5894, accuracy: 88.9%
[Step: 4000] Validation loss 20.6519, accuracy: 86.1%
Updated batch size: 1128
[Step: 4500] Minibatch loss 16.1211, accuracy: 89.2%
[Step: 4500] Validation loss 16.1746, accuracy: 86.3%
Updated batch size: 1228
[Step: 5000] Minibatch loss 12.6376, accuracy: 89.5%
[Step: 5000] Validation loss 12.6891, accuracy: 86.9%
Test loss 12.4793, accuracy: 93.0%

In [ ]: