Accompanying code examples of the book "Introduction to Artificial Neural Networks and Deep Learning: A Practical Guide with Applications in Python" by Sebastian Raschka. All code examples are released under the MIT license. If you find this content useful, please consider supporting the work by buying a copy of the book.

Other code examples and content are available on GitHub. The PDF and ebook versions of the book are available through Leanpub.


In [1]:
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p tensorflow


Sebastian Raschka 

CPython 3.6.1
IPython 6.0.0

tensorflow 1.2.0

Model Zoo -- Convolutional Neural Network

Low-level Implementation


In [2]:
import tensorflow as tf
from functools import reduce
from tensorflow.examples.tutorials.mnist import input_data


##########################
### DATASET
##########################

mnist = input_data.read_data_sets("./", one_hot=True)


##########################
### SETTINGS
##########################

# Hyperparameters
learning_rate = 0.1
dropout_keep_proba = 0.5
epochs = 3
batch_size = 32

# Architecture
input_size = 784
image_width, image_height = 28, 28
n_classes = 10

# Other
print_interval = 500
random_seed = 123


##########################
### WRAPPER FUNCTIONS
##########################

def conv2d(input_tensor, output_channels,
           kernel_size=(5, 5), strides=(1, 1, 1, 1),
           padding='SAME', activation=None, seed=None,
           name='conv2d'):

    with tf.name_scope(name):
        input_channels = input_tensor.get_shape().as_list()[-1]
        weights_shape = (kernel_size[0], kernel_size[1],
                         input_channels, output_channels)

        weights = tf.Variable(tf.truncated_normal(shape=weights_shape,
                                                  mean=0.0,
                                                  stddev=0.01,
                                                  dtype=tf.float32,
                                                  seed=seed),
                              name='weights')
        biases = tf.Variable(tf.zeros(shape=(output_channels,)), name='biases')
        conv = tf.nn.conv2d(input=input_tensor,
                            filter=weights,
                            strides=strides,
                            padding=padding)

        act = conv + biases
        if activation is not None:
            act = activation(conv + biases)
        return act


def fully_connected(input_tensor, output_nodes,
                    activation=None, seed=None,
                    name='fully_connected'):

    with tf.name_scope(name):
        input_nodes = input_tensor.get_shape().as_list()[1]
        weights = tf.Variable(tf.truncated_normal(shape=(input_nodes,
                                                         output_nodes),
                                                  mean=0.0,
                                                  stddev=0.01,
                                                  dtype=tf.float32,
                                                  seed=seed),
                              name='weights')
        biases = tf.Variable(tf.zeros(shape=[output_nodes]), name='biases')

        act = tf.matmul(input_tensor, weights) + biases
        if activation is not None:
            act = activation(act)
        return act

    
##########################
### GRAPH DEFINITION
##########################

g = tf.Graph()
with g.as_default():
    
    tf.set_random_seed(random_seed)

    # Input data
    tf_x = tf.placeholder(tf.float32, [None, input_size, 1], name='inputs')
    tf_y = tf.placeholder(tf.float32, [None, n_classes], name='targets')
    
    keep_proba = tf.placeholder(tf.float32, shape=None, name='keep_proba')

    # Convolutional Neural Network:
    # 2 convolutional layers with maxpool and ReLU activation
    input_layer = tf.reshape(tf_x, shape=[-1, image_width, image_height, 1])
    
    conv1 = conv2d(input_tensor=input_layer,
                   output_channels=8,
                   kernel_size=(3, 3),
                   strides=(1, 1, 1, 1),
                   activation=tf.nn.relu,
                   name='conv1')
                              
    pool1 = tf.nn.max_pool(conv1,
                           ksize=(1, 2, 2, 1), 
                           strides=(1, 1, 1, 1),
                           padding='SAME',
                           name='maxpool1')
    
    conv2 = conv2d(input_tensor=pool1,
                   output_channels=16,
                   kernel_size=(3, 3),
                   strides=(1, 1, 1, 1),
                   activation=tf.nn.relu,
                   name='conv2')
    
    pool2 = tf.nn.max_pool(conv2,
                           ksize=(1, 2, 2, 1), 
                           strides=(1, 1, 1, 1),
                           padding='SAME',
                           name='maxpool2')
    
    dims = pool2.get_shape().as_list()[1:]
    dims = reduce(lambda x, y: x * y, dims, 1)
    flat = tf.reshape(pool2, shape=(-1, dims))
    
    fc = fully_connected(flat, output_nodes=64, 
                         activation=tf.nn.relu)
    
    fc = tf.nn.dropout(fc, keep_prob=keep_proba)
    out_layer = fully_connected(fc, n_classes, activation=None, 
                                name='logits')

    # Loss and optimizer
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=tf_y)
    cost = tf.reduce_mean(loss, name='cost')
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train = optimizer.minimize(cost, name='train')

    # Prediction
    correct_prediction = tf.equal(tf.argmax(tf_y, 1), 
                                  tf.argmax(out_layer, 1), 
                         name='correct_prediction')
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 
                                      tf.float32), 
                              name='accuracy')


Extracting ./train-images-idx3-ubyte.gz
Extracting ./train-labels-idx1-ubyte.gz
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz

In [3]:
import numpy as np

##########################
### TRAINING & EVALUATION
##########################

with tf.Session(graph=g) as sess:
    sess.run(tf.global_variables_initializer())

    np.random.seed(random_seed) # random seed for mnist iterator
    for epoch in range(1, epochs + 1):
        avg_cost = 0.
        total_batch = mnist.train.num_examples // batch_size

        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            batch_x = batch_x[:, :, None] # add "missing" color channel
            
            _, c = sess.run(['train', 'cost:0'], 
                            feed_dict={'inputs:0': batch_x,
                                       'targets:0': batch_y,
                                       'keep_proba:0': dropout_keep_proba})
            avg_cost += c
            if not i % print_interval:
                print("Minibatch: %03d | Cost: %.3f" % (i + 1, c))
        
        train_acc = sess.run('accuracy:0', 
                             feed_dict={'inputs:0': mnist.train.images[:, :, None],
                                        'targets:0': mnist.train.labels,
                                        'keep_proba:0': 1.0})
        valid_acc = sess.run('accuracy:0', 
                             feed_dict={'inputs:0': mnist.validation.images[:, :, None],
                                        'targets:0': mnist.validation.labels,
                                        'keep_proba:0': 1.0})
        
        print("Epoch: %03d | AvgCost: %.3f" % (epoch, avg_cost / (i + 1)), end="")
        print(" | Train/Valid ACC: %.3f/%.3f" % (train_acc, valid_acc))
        
    test_acc = sess.run('accuracy:0', 
                        feed_dict={'inputs:0': mnist.test.images[:, :, None],
                                   'targets:0': mnist.test.labels,
                                   'keep_proba:0': 1.0})
        
    print('Test ACC: %.3f' % test_acc)


Minibatch: 001 | Cost: 2.303
Minibatch: 501 | Cost: 2.325
Minibatch: 1001 | Cost: 0.833
Minibatch: 1501 | Cost: 0.181
Epoch: 001 | AvgCost: 1.281 | Train/Valid ACC: 0.943/0.951
Minibatch: 001 | Cost: 0.472
Minibatch: 501 | Cost: 0.147
Minibatch: 1001 | Cost: 0.090
Minibatch: 1501 | Cost: 0.127
Epoch: 002 | AvgCost: 0.239 | Train/Valid ACC: 0.976/0.978
Minibatch: 001 | Cost: 0.055
Minibatch: 501 | Cost: 0.101
Minibatch: 1001 | Cost: 0.483
Minibatch: 1501 | Cost: 0.069
Epoch: 003 | AvgCost: 0.150 | Train/Valid ACC: 0.981/0.978
Test ACC: 0.980