      Xavier Bresson, Sept. 2016

Lecture 11 : Deep Learning 3 - Convolutional Neural Networks

Code 1 : LeNet5

Implementation of the original LeNet5 Convolutional Neural Networks:
Gradient-based learning applied to document recognition
Y LeCun, L Bottou, Y Bengio, P Haffner
Proceedings of the IEEE 86 (11), 2278-2324

In [1]:
import tensorflow as tf
import numpy as np
import time
import collections


In [2]:
flags = # = a wrapper python-gflags

# Data folder
flags.DEFINE_string('dir_data', 'datasets', 'Directory to store data')
flags.DEFINE_float('learning_rate', 0.2, 'Initial learning rate.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.')
flags.DEFINE_float('regularization', 0.0, 'L2 regularizations of weights and biases.')
flags.DEFINE_float('dropout', 1.0, 'Dropout')

In [3]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False) # load data in local folder

Extracting datasets/train-images-idx3-ubyte.gz
Extracting datasets/train-labels-idx1-ubyte.gz
Extracting datasets/t10k-images-idx3-ubyte.gz
Extracting datasets/t10k-labels-idx1-ubyte.gz

In [4]:
train_data = mnist.train.images.astype(np.float32)
val_data = mnist.validation.images.astype(np.float32)
test_data = mnist.test.images.astype(np.float32)
train_labels = mnist.train.labels
val_labels = mnist.validation.labels
test_labels = mnist.test.labels

(55000, 784)
(5000, 784)
(10000, 784)

Neural Networks

In [5]:
# Define generic class of Neural Networks
train_size = train_data.shape[0]

class base_model(object):
    # Constructor
    def __init__(self):
        self.regularizers = 0 # L2 regularizers
    # Private methods
    def _weight_variable(self, shape, regularization=False): 
        initial = tf.truncated_normal(shape, stddev=0.1)
        var = tf.Variable(initial, name='weights')
        if regularization:
            self.regularizers += tf.nn.l2_loss(var)        
        return var
    def _bias_variable(self, shape, regularization=False): 
        initial = tf.constant(0.1, shape=shape)
        var = tf.Variable(initial, name='bias')
        if regularization:
            self.regularizers += tf.nn.l2_loss(var) 
        return var

    def _conv2d(self, x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    def _max_pool_2x2(self, x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    # Public methods
    def loss(self, logits, labels, regularization): 
        labels = tf.to_int64(labels)
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels, name='xentropy')
        loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
        loss += regularization * self.regularizers     
        #tf.scalar_summary('loss', loss) # Tensorboard     
        return loss
    # Optimization
    def training(self, loss, learning_rate, train_size, batch_size):            
        # Optimizer: set up a variable that's incremented once per batch and
        # controls the learning rate decay.
        batch = tf.Variable(0)
        # Decay once per epoch, using an exponential schedule starting at 0.01.
        learning_rate = tf.train.exponential_decay(
                0.01,                # Base learning rate.
                batch * batch_size,  # Current index into the dataset.
                train_size,          # Decay step.
                0.95,                # Decay rate.
        # Use simple momentum for the optimization.
        optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)  
        train_op = optimizer.minimize(loss, global_step=batch) 
        return train_op
    def evaluation(self, logits, labels):
        output_classes = tf.cast(tf.argmax(tf.nn.softmax(logits),1), tf.int32)
        acc = 100.* tf.reduce_sum(tf.cast(tf.equal(output_classes,labels), tf.float32))/ tf.cast(tf.shape(logits)[0], tf.float32)
        return acc  
    def prediction(self, logits):
        """Return the predicted classes."""
        output_classes = tf.cast(tf.argmax(tf.nn.softmax(logits),1), tf.int32)
        return output_classes
# TensorBoard
def variable_summaries(var, name):
    with tf.name_scope("summaries"):
        mean = tf.reduce_mean(var)
        tf.scalar_summary('mean/' + name, mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
        tf.scalar_summary('sttdev/' + name, stddev)
        tf.scalar_summary('max/' + name, tf.reduce_max(var))
        tf.scalar_summary('min/' + name, tf.reduce_min(var))
        tf.histogram_summary(name, var)

Simple Test: CL32-MP4-FC10

In [6]:
FEAT1 = 14*14 # Number of features
NCLASSES = 10 # Number of classes

class CNN_1CL_1MP_1FC(base_model):
    def __init__(self, K, F):
        print('CNN Architecture: 1CL+1MP+1FC')
        self.K = K  # Patch size
        self.F = F  # Number of filters
        self.W1 = self._weight_variable([self.K, self.K, 1, self.F], regularization=False)
        self.b1 = self._bias_variable([self.F], regularization=False)
        self.W2 = self._weight_variable([FEAT1*self.F, NCLASSES], regularization=True)        
        self.b2 = self._bias_variable([NCLASSES], regularization=True)
    def inference(self, x, d):
        layer_name = 'CL32'
        with tf.name_scope(layer_name):
            # Grid filtering
            x_2d = tf.reshape(x, [-1,28,28,1]) 
            y_2d = self._conv2d(x_2d, self.W1) + self.b1 
            # Non-linear activation
            y_2d = tf.nn.relu(y_2d)    
            # Tensorboard
            #variable_summaries(W, layer_name + '/W')
            #variable_summaries(b, layer_name + '/bias')
            #variable_summaries(x_2d, layer_name + '/x_2d')
            #variable_summaries(y_2d, layer_name + '/y_2d')
        layer_name = 'MP4'
        with tf.name_scope(layer_name):
            # Max pooling
            y_mp = self._max_pool_2x2(y_2d)
            #variable_summaries(y_mp, layer_name + '/y_mp')
            # Dropout
            y_mp = tf.nn.dropout(y_mp, d)
        layer_name = 'FC10'
        with tf.name_scope(layer_name):
            y = tf.reshape(y_mp, [-1, FEAT1*self.F]) 
            y = tf.matmul(y, self.W2) + self.b2      
            #variable_summaries(W, layer_name + '/W')
            #variable_summaries(b, layer_name + '/b')
            #variable_summaries(y, layer_name + '/y')
        return y

LeNet5: CL32-MP4-CL64-MP4-FC512-FC10

In [7]:
F1=32 # Number of features of 1st CL layer
F2=64 # Number of features of 2nd CL layer
FEAT2 = 7*7* F2
NFC1=512 # Number of nodes of 1st FC layer
NCLASSES = 10 # Number of classes

class CNN_LeNet5(base_model):
    def __init__(self, K):
        print('CNN Architecture: LeNet5')
        self.K = K  # Patch size
        self.W1 = self._weight_variable([self.K, self.K, 1, F1], regularization=False)
        self.b1 = self._bias_variable([F1], regularization=False)
        self.W2 = self._weight_variable([self.K, self.K, F1, F2], regularization=False)
        self.b2 = self._bias_variable([F2], regularization=False)
        self.W3 = self._weight_variable([FEAT2, NFC1], regularization=True)
        self.b3 = self._bias_variable([NFC1], regularization=True)
        self.W4 = self._weight_variable([NFC1, NCLASSES], regularization=True)
        self.b4 = self._bias_variable([NCLASSES], regularization=True)
    def inference(self, x, d):
        with tf.name_scope('CN32'):
            # Grid filtering
            x_2d = tf.reshape(x, [-1,28,28,1])
            y_2d = self._conv2d(x_2d, self.W1) + self.b1
            # Non-linear activation
            y_2d = tf.nn.relu(y_2d)
        with tf.name_scope('MP4'):
            # Max pooling
            y_mp = self._max_pool_2x2(y_2d)
        with tf.name_scope('CN64'):
            # Grid filtering
            y_2d = self._conv2d(y_mp, self.W2) + self.b2
            # Non-linear activation
            y_2d = tf.nn.relu(y_2d)
        with tf.name_scope('MP4'):
            # Max pooling
            y_mp = self._max_pool_2x2(y_2d)
        with tf.name_scope('FC512'):
            y = tf.reshape(y_mp, [-1, FEAT2])
            y = tf.matmul(y, self.W3) + self.b3
            # Non-linear activation
            y = tf.nn.relu(y)
            # Dropout
            y = tf.nn.dropout(y, d)
        with tf.name_scope('FC10'):
            y = tf.matmul(y, self.W4) + self.b4
        return y

Select NN model

In [8]:
# Comment/uncomment
if NN_1Layer==True:
    model = CNN_1CL_1MP_1FC(K=5, F=10)
    FLAGS.learning_rate = 0.05
    FLAGS.regularization = 5e-4
    FLAGS.dropout = 0.75

CNN Architecture: 1CL+1MP+1FC

In [9]:
# Comment/uncomment
if NN_LeNet5==True:
    model = CNN_LeNet5(K=5)
    FLAGS.learning_rate = 0.05
    FLAGS.regularization = 5e-4
    FLAGS.dropout = 0.5

In [10]:
# Parameters
num_epochs = 10
num_epochs = 2 # Early stop
train_size = train_data.shape[0]
nb_iter = int(num_epochs * train_size) // FLAGS.batch_size
print('num_epochs=',num_epochs,', train_size=',train_size,', nb_iter=',nb_iter)

# Construct computational graph
x = tf.placeholder(tf.float32, (None, 784))
y = tf.placeholder(tf.int32, (None))
d = tf.placeholder(tf.float32)
logits = model.inference(x,d) # dropout activate
loss = model.loss(logits, y, FLAGS.regularization)
train_op =, FLAGS.learning_rate, train_size, FLAGS.batch_size)
evaluation = model.evaluation(logits, y)

num_epochs= 2 , train_size= 55000 , nb_iter= 1100

In [14]:
# Train
init = tf.initialize_all_variables()
sess = tf.Session()

# TensorFlow
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
writer = tf.train.SummaryWriter('tmp/mnist_logs' + '/run1', sess.graph)
op_summary = tf.merge_all_summaries()

# Start
indices = collections.deque()
tab_results = []
tab_last_epoch = []
start_last_epoch = nb_iter - train_size // FLAGS.batch_size
nb_samples_last_epoch = 25
freq_save_last_epoch = int(train_size // FLAGS.batch_size // (nb_samples_last_epoch-1))
acc_train = -1.0
loss_train = -1.0
print('num_epochs=',num_epochs,', nb_iter=',nb_iter)
t_start = time.process_time()
for i in range(nb_iter):
    # Computational time
    freq_iter = 10
    if (i%freq_iter==0) & (i<=freq_iter):
        print('iter={:d}, freq_iter={:d}, training time: {:.2f}s, acc_train={:2.2f}, loss_train={:2.2f}'
              .format(i,freq_iter,time.process_time() - t_start,acc_train,loss_train))
        t_start = time.process_time()
    # Generic batch extraction
    if len(indices) < FLAGS.batch_size:
        indices.extend(np.random.permutation(train_data.shape[0])) # rand permutation
    idx = [indices.popleft() for i in range(FLAGS.batch_size)] # extract batch_size data
    batch_xs, batch_ys = train_data[idx,:], train_labels[idx]
    if type(batch_xs) is not np.ndarray:
        batch_xs = batch_xs.toarray()  # convert to full matrices if sparse

    # Run computational graph for weight learning
    _,acc_train,loss_train =[train_op,evaluation,loss], feed_dict={x: batch_xs, y: batch_ys, d: FLAGS.dropout})
    # Display, save results
    if (i+1)%100==0:
        # Compute test accuracy
        t_start_testset = time.process_time()
        acc_test =, feed_dict={x: mnist.test.images, y: mnist.test.labels, d: FLAGS.dropout})
        acc_test_nodropout =, feed_dict={x: mnist.test.images, y: mnist.test.labels, d: 1.0})
        t_testset = time.process_time() - t_start_testset
        print('iter={:d}, acc_train={:2.2f}, loss_train={:2.2f}, acc_test={:2.2f}, acc_test_nodropout={:2.2f}, test time={:.2f}s'
        # Summaries for TensorBoard.
        acc_train *= 1.0
        acc_test *= 1.0
        acc_test_nodropout *= 1.0
        summary = tf.Summary()
        summary.value.add(tag='acc_train', simple_value=acc_train)
        summary.value.add(tag='acc_test', simple_value=acc_test)
        summary.value.add(tag='acc_test_nodropout', simple_value=acc_test_nodropout)
        writer.add_summary(summary, i+1)
# Save accuracy for last batch       
acc_test_nodropout =, feed_dict={x: mnist.test.images, y: mnist.test.labels, d: 1.0})
print('final accuracy=',acc_test_nodropout)
print('Training time: {:.2f}s'.format(time.process_time() - t_start))

num_epochs= 2 , nb_iter= 1100
iter=0, freq_iter=10, training time: 0.00s, acc_train=-1.00, loss_train=-1.00
iter=10, freq_iter=10, training time: 1.77s, acc_train=25.00, loss_train=2.26
iter=100, acc_train=85.00, loss_train=0.57, acc_test=87.13, acc_test_nodropout=90.24, test time=14.87s
iter=200, acc_train=86.00, loss_train=0.47, acc_test=91.29, acc_test_nodropout=93.61, test time=14.89s
iter=300, acc_train=94.00, loss_train=0.30, acc_test=92.85, acc_test_nodropout=94.51, test time=15.27s
iter=400, acc_train=97.00, loss_train=0.20, acc_test=93.81, acc_test_nodropout=95.12, test time=15.89s
iter=500, acc_train=97.00, loss_train=0.24, acc_test=94.46, acc_test_nodropout=95.64, test time=15.96s
iter=600, acc_train=94.00, loss_train=0.20, acc_test=94.75, acc_test_nodropout=96.05, test time=15.99s
iter=700, acc_train=97.00, loss_train=0.18, acc_test=95.22, acc_test_nodropout=96.39, test time=16.29s
iter=800, acc_train=98.00, loss_train=0.13, acc_test=95.38, acc_test_nodropout=96.56, test time=16.09s
iter=900, acc_train=94.00, loss_train=0.21, acc_test=95.49, acc_test_nodropout=96.69, test time=16.00s
iter=1000, acc_train=98.00, loss_train=0.11, acc_test=95.90, acc_test_nodropout=96.87, test time=16.16s
iter=1100, acc_train=97.00, loss_train=0.14, acc_test=96.20, acc_test_nodropout=97.10, test time=16.24s
final accuracy= 97.1
Training time: 376.83s

Run TensorBoard:

Go to folder of file lecture11_code01.ipynb
Open Terminal and type:
tensorboard --logdir='tmp/mnist_logs' —port 8889

