Deep Learning

Assignment 4

Previously in 2_fullyconnected.ipynb and 3_regularization.ipynb, we trained fully connected networks to classify notMNIST characters.

The goal of this assignment is make the neural network convolutional.

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
import numpy as np
import tensorflow as tf
from tensorflow.contrib import layers
from six.moves import cPickle as pickle
from six.moves import range
from tensorbuilder.api import *
from plotly import offline as py, graph_objs as go, tools
import random


In [2]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

('Training set', (200000, 28, 28), (200000,))
('Validation set', (10000, 28, 28), (10000,))
('Test set', (10000, 28, 28), (10000,))

Reformat into a TensorFlow-friendly shape:

  • convolutions need the image data formatted as a cube (width by height by #channels)
  • labels as float 1-hot encodings.

In [3]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

('Training set', (200000, 28, 28, 1), (200000, 10))
('Validation set', (10000, 28, 28, 1), (10000, 10))
('Test set', (10000, 28, 28, 1), (10000, 10))

In [4]:
training_data =, y=train_labels)
test_data =, y=test_labels)
validation_data =, y=valid_labels)

x_data = np.vstack((train_dataset, test_dataset, valid_dataset))
y_data = np.vstack((train_labels, test_labels, valid_labels))

In [5]:
def crange(initial, final='z'):
    initial = ord(initial)
    final = ord(final) + 1
    for i in xrange(initial, final):
        yield chr(i)
char_dict = dict(enumerate(crange('a', 'j')))

def axis_ops(): 
    return dict(
rand = random.sample(range(len(x_data)), 10)
titles = [ "Letter {0}".format(char_dict[np.argmax(y_data[i])].upper()) for i in rand ]
layout = go.Layout(xaxis=axis_ops(), yaxis=axis_ops())
fig = tools.make_subplots(rows=2, cols=5, print_grid=False, subplot_titles=titles)
def heatmap(z, smooth='best', reversescale=True, title=None):
    return go.Heatmap(
def printmat(x):
    print(np.array2string(np.abs(x[:, :]), formatter={'float_kind':'{0:.1f}'.format}))

for row in range(2):
    for col in range(5):
        i = row * 5 + col
        i = rand[i]
        trace = heatmap(x_data[i][::-1, :, 0])
        fig.append_trace(trace, row + 1, col + 1)

for i in range(10):


Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.

In [10]:
sample_indexes = rand
sample_index = 6

sample_features = x_data[sample_indexes]
sample_labels = y_data[sample_indexes]

sample = sample_features[sample_index][:, :, 0]

hmap = heatmap(sample)
hlayout = layout.copy()

print titles[sample_index]
py.iplot(go.Figure(data=[hmap], layout=hlayout))
printmat(sample[:, 5:-5])

Letter E
[[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.3 0.1 0.4 0.5 0.5]
 [0.1 0.3 0.3 0.2 0.1 0.5 0.5 0.5 0.5 0.5 0.5 0.2 0.2 0.5 0.5 0.4 0.0 0.2]
 [0.5 0.5 0.3 0.1 0.1 0.3 0.5 0.5 0.5 0.3 0.2 0.5 0.5 0.5 0.0 0.5 0.5 0.5]
 [0.5 0.0 0.5 0.5 0.5 0.5 0.5 0.5 0.0 0.5 0.5 0.5 0.5 0.2 0.5 0.5 0.5 0.5]
 [0.4 0.4 0.5 0.5 0.5 0.5 0.5 0.1 0.5 0.5 0.5 0.5 0.2 0.5 0.5 0.5 0.5 0.5]
 [0.4 0.5 0.5 0.5 0.5 0.5 0.0 0.5 0.5 0.5 0.5 0.2 0.5 0.5 0.5 0.5 0.5 0.5]
 [0.5 0.0 0.5 0.5 0.5 0.4 0.4 0.5 0.5 0.5 0.0 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
 [0.2 0.3 0.1 0.5 0.5 0.1 0.5 0.5 0.5 0.3 0.5 0.5 0.5 0.5 0.5 0.5 0.3 0.3]
 [0.5 0.5 0.2 0.1 0.3 0.2 0.5 0.5 0.5 0.0 0.5 0.3 0.3 0.1 0.1 0.2 0.4 0.2]
 [0.5 0.5 0.5 0.5 0.4 0.1 0.5 0.5 0.5 0.1 0.2 0.1 0.1 0.1 0.2 0.3 0.5 0.5]
 [0.5 0.5 0.5 0.5 0.5 0.4 0.4 0.5 0.5 0.0 0.4 0.3 0.3 0.3 0.4 0.5 0.5 0.5]
 [0.5 0.5 0.5 0.3 0.0 0.2 0.4 0.4 0.5 0.4 0.2 0.4 0.0 0.5 0.5 0.2 0.5 0.5]
 [0.5 0.2 0.3 0.5 0.5 0.4 0.1 0.4 0.2 0.2 0.2 0.1 0.2 0.4 0.2 0.1 0.5 0.5]
 [0.3 0.5 0.5 0.5 0.2 0.4 0.5 0.5 0.5 0.5 0.5 0.4 0.4 0.5 0.5 0.5 0.5 0.5]
 [0.5 0.5 0.5 0.0 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
 [0.5 0.5 0.1 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
 [0.5 0.4 0.4 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
 [0.5 0.1 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
 [0.5 0.1 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
 [0.5 0.2 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.2]
 [0.5 0.2 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.4 0.2]
 [0.5 0.0 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.3 0.2]
 [0.5 0.4 0.4 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.4 0.3 0.0 0.5]
 [0.5 0.5 0.1 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.3 0.5 0.0 0.5 0.5]
 [0.5 0.5 0.5 0.2 0.4 0.5 0.5 0.5 0.5 0.5 0.5 0.4 0.1 0.5 0.0 0.5 0.5 0.5]
 [0.5 0.5 0.5 0.5 0.4 0.1 0.1 0.2 0.2 0.0 0.2 0.5 0.4 0.2 0.5 0.5 0.5 0.5]
 [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.1 0.4 0.5 0.5 0.5 0.5 0.5]
 [0.2 0.3 0.4 0.5 0.5 0.5 0.4 0.4 0.2 0.1 0.4 0.5 0.5 0.5 0.5 0.5 0.5 0.5]]

In [24]:
import tflearn

graph = tf.Graph()
with graph.as_default():
    with tf.name_scope('inputs'):
        x = tf.placeholder('float', shape=[None, 28, 28, 1], name='x')
        y = tf.placeholder('float', shape=[None, 10], name='y')
        learning_rate = tf.placeholder('float', name='learning_rate')
        keep_prob = tf.placeholder('float', name='keep_prob')
    [h, trainer, loss] = T.Pipe(
            x, T
            .inception_layer(4, activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm)
            .inception_layer(8, activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm)
            .elu_conv2d_layer(64, [3, 3], normalizer_fn=layers.batch_norm)
            .elu_conv2d_layer(128, [3, 3], normalizer_fn=layers.batch_norm)
            .elu_conv2d_layer(128, [3, 3], padding='VALID', normalizer_fn=layers.batch_norm)
            .elu_conv2d_layer(128, [3, 3], padding='VALID', normalizer_fn=layers.batch_norm)
            .elu_conv2d_layer(256, [3, 3], padding='VALID', normalizer_fn=layers.batch_norm)
            .linear_layer(10) #, scope='logits'),
                With( tf.name_scope('loss'),
    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(h, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', accuracy)
    summaries = tf.summary.merge_all()
    saver = tf.train.Saver()
    model_name = "not-mnist-inception-batch-norm.model"
    logs_dir = model_dir = "/logs/{0}".format(model_name)
    writter = tf.summary.FileWriter(logs_dir, graph=graph)

def feed_dict(data, rate, prob):
    return {
        x: data.x,
        y: data.y,
        learning_rate: rate,
        keep_prob: prob

def count_parameters(graph):
    import operator
    with graph.as_default():
        prod = lambda iterable: reduce(operator.mul, iterable, 1)
        dims = [ list([ int(d) for d in  v.get_shape()]) for v in tf.global_variables() ]
        vars = [ prod(shape) for shape in dims ]
        return sum(vars)

print "Number of paramters:", count_parameters(graph)

Number of paramters: 2067286

In [25]:
[sample_test, _] = test_data.split(0.3, 0.7)
[sample_train, _] = training_data.split(0.02, 0.98)
[sample_validation, _] = validation_data.split(0.3, 0.7)

with tf.Session(graph=graph, config=tf.ConfigProto(log_device_placement=True)) as sess:
#; best = 0.0
    rate = 0.01
    saver.restore(sess, model_dir)
    for step, datum in training_data.epochs(20000).batches_of(500).enumerated():
        [_, _summaries] =
            [trainer, summaries], 
            feed_dict=feed_dict(datum, rate, 0.4)
        if step % 5 == 0:
            writter.add_summary(_summaries, step)
        if step % 200 == 0:
            print "Step ", step
            [test_loss, test_accuracy] =
                [loss, accuracy],
                feed_dict=feed_dict(sample_test, rate, 1.0)
            print "[Test]  loss: {0}, accuracy: {1}".format(test_loss, test_accuracy)
            [train_loss, train_accuracy] =
                [loss, accuracy],
                feed_dict=feed_dict(sample_train, rate, 1.0)
            print "[Train] loss: {0}, accuracy: {1}".format(train_loss, train_accuracy)
            [validation_loss, validation_accuracy] =
                [loss, accuracy],
                feed_dict=feed_dict(sample_validation, rate, 1.0)
            print "[Validation] loss: {0}, accuracy: {1}".format(validation_loss, validation_accuracy)
            if test_accuracy > best:
                print "Saving"
      , model_dir)
                best = test_accuracy
            print ""

Step  0
[Test]  loss: 2.4315841198, accuracy: 0.158999994397
[Train] loss: 2.5180439949, accuracy: 0.155000001192
[Validation] loss: 2.50712108612, accuracy: 0.153999999166

Step  200
[Test]  loss: 0.280500650406, accuracy: 0.919000089169
[Train] loss: 0.509522616863, accuracy: 0.853250086308
[Validation] loss: 0.505547821522, accuracy: 0.861000061035

Step  400
[Test]  loss: 0.223610147834, accuracy: 0.937666714191
[Train] loss: 0.441778838634, accuracy: 0.873750090599
[Validation] loss: 0.444794863462, accuracy: 0.875666797161

In [27]:
rand = random.sample(range(len(x_data)), 30)
answers = np.argmax(y_data[rand], 1)
samples = x_data[rand]

with tf.Session(graph=graph) as sess:
    saver.restore(sess, model_dir)
    predictions =, feed_dict={x: samples, keep_prob: 1.0})
    prediction_vals = np.argmax(predictions, 1)
for answer, sample, prediction, prediction_val in zip(answers, samples, predictions, prediction_vals):
    fig = tools.make_subplots(
        rows=1, cols=2, print_grid=False,
            'Answer {0}'.format(char_dict[answer].upper()),
            'Prediction {0} - {1}'.format(char_dict[prediction_val].upper(), prediction_val == answer)

    heatmap_trace = heatmap(sample[::-1, :, 0])
    bar_trace = go.Bar(
        x=list(crange('a', 'j')),

    fig.append_trace(heatmap_trace, 1, 1)
    fig.append_trace(bar_trace, 1, 2)

    for i in range(2):
