Implementation of the original LeNet5 Convolutional Neural Networks:
Gradient-based learning applied to document recognition
Y LeCun, L Bottou, Y Bengio, P Haffner
Proceedings of the IEEE 86 (11), 2278-2324
In [1]:
import tensorflow as tf
import numpy as np
import time
import collections
In [2]:
flags = tf.app.flags # tf.app = a wrapper python-gflags
FLAGS = flags.FLAGS
# Data folder
flags.DEFINE_string('dir_data', 'datasets', 'Directory to store data')
flags.DEFINE_float('learning_rate', 0.2, 'Initial learning rate.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.')
flags.DEFINE_float('regularization', 0.0, 'L2 regularizations of weights and biases.')
flags.DEFINE_float('dropout', 1.0, 'Dropout')
In [3]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False) # load data in local folder
In [4]:
train_data = mnist.train.images.astype(np.float32)
val_data = mnist.validation.images.astype(np.float32)
test_data = mnist.test.images.astype(np.float32)
train_labels = mnist.train.labels
val_labels = mnist.validation.labels
test_labels = mnist.test.labels
print(train_data.shape)
print(train_labels.shape)
print(val_data.shape)
print(val_labels.shape)
print(test_data.shape)
print(test_labels.shape)
In [5]:
# Define generic class of Neural Networks
train_size = train_data.shape[0]
class base_model(object):
# Constructor
def __init__(self):
self.regularizers = 0 # L2 regularizers
# Private methods
def _weight_variable(self, shape, regularization=False):
initial = tf.truncated_normal(shape, stddev=0.1)
var = tf.Variable(initial, name='weights')
if regularization:
self.regularizers += tf.nn.l2_loss(var)
return var
def _bias_variable(self, shape, regularization=False):
initial = tf.constant(0.1, shape=shape)
var = tf.Variable(initial, name='bias')
if regularization:
self.regularizers += tf.nn.l2_loss(var)
return var
def _conv2d(self, x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def _max_pool_2x2(self, x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# Public methods
def loss(self, logits, labels, regularization):
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
loss += regularization * self.regularizers
#tf.scalar_summary('loss', loss) # Tensorboard
return loss
# Optimization
def training(self, loss, learning_rate, train_size, batch_size):
# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
0.01, # Base learning rate.
batch * batch_size, # Current index into the dataset.
train_size, # Decay step.
0.95, # Decay rate.
staircase=True)
# Use simple momentum for the optimization.
optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
train_op = optimizer.minimize(loss, global_step=batch)
return train_op
def evaluation(self, logits, labels):
output_classes = tf.cast(tf.argmax(tf.nn.softmax(logits),1), tf.int32)
acc = 100.* tf.reduce_sum(tf.cast(tf.equal(output_classes,labels), tf.float32))/ tf.cast(tf.shape(logits)[0], tf.float32)
return acc
def prediction(self, logits):
"""Return the predicted classes."""
output_classes = tf.cast(tf.argmax(tf.nn.softmax(logits),1), tf.int32)
return output_classes
# TensorBoard
def variable_summaries(var, name):
with tf.name_scope("summaries"):
mean = tf.reduce_mean(var)
tf.scalar_summary('mean/' + name, mean)
with tf.name_scope('stddev'):
stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
tf.scalar_summary('sttdev/' + name, stddev)
tf.scalar_summary('max/' + name, tf.reduce_max(var))
tf.scalar_summary('min/' + name, tf.reduce_min(var))
tf.histogram_summary(name, var)
In [6]:
FEAT1 = 14*14 # Number of features
NCLASSES = 10 # Number of classes
class CNN_1CL_1MP_1FC(base_model):
def __init__(self, K, F):
print('CNN Architecture: 1CL+1MP+1FC')
super().__init__()
self.K = K # Patch size
self.F = F # Number of filters
self.W1 = self._weight_variable([self.K, self.K, 1, self.F], regularization=False)
self.b1 = self._bias_variable([self.F], regularization=False)
self.W2 = self._weight_variable([FEAT1*self.F, NCLASSES], regularization=True)
self.b2 = self._bias_variable([NCLASSES], regularization=True)
def inference(self, x, d):
layer_name = 'CL32'
with tf.name_scope(layer_name):
# Grid filtering
x_2d = tf.reshape(x, [-1,28,28,1])
y_2d = self._conv2d(x_2d, self.W1) + self.b1
# Non-linear activation
y_2d = tf.nn.relu(y_2d)
# Tensorboard
#variable_summaries(W, layer_name + '/W')
#variable_summaries(b, layer_name + '/bias')
#variable_summaries(x_2d, layer_name + '/x_2d')
#variable_summaries(y_2d, layer_name + '/y_2d')
layer_name = 'MP4'
with tf.name_scope(layer_name):
# Max pooling
y_mp = self._max_pool_2x2(y_2d)
#variable_summaries(y_mp, layer_name + '/y_mp')
# Dropout
y_mp = tf.nn.dropout(y_mp, d)
layer_name = 'FC10'
with tf.name_scope(layer_name):
y = tf.reshape(y_mp, [-1, FEAT1*self.F])
y = tf.matmul(y, self.W2) + self.b2
#variable_summaries(W, layer_name + '/W')
#variable_summaries(b, layer_name + '/b')
#variable_summaries(y, layer_name + '/y')
return y
In [7]:
F1=32 # Number of features of 1st CL layer
F2=64 # Number of features of 2nd CL layer
FEAT2 = 7*7* F2
NFC1=512 # Number of nodes of 1st FC layer
NCLASSES = 10 # Number of classes
class CNN_LeNet5(base_model):
def __init__(self, K):
print('CNN Architecture: LeNet5')
super().__init__()
self.K = K # Patch size
self.W1 = self._weight_variable([self.K, self.K, 1, F1], regularization=False)
self.b1 = self._bias_variable([F1], regularization=False)
self.W2 = self._weight_variable([self.K, self.K, F1, F2], regularization=False)
self.b2 = self._bias_variable([F2], regularization=False)
self.W3 = self._weight_variable([FEAT2, NFC1], regularization=True)
self.b3 = self._bias_variable([NFC1], regularization=True)
self.W4 = self._weight_variable([NFC1, NCLASSES], regularization=True)
self.b4 = self._bias_variable([NCLASSES], regularization=True)
def inference(self, x, d):
with tf.name_scope('CN32'):
# Grid filtering
x_2d = tf.reshape(x, [-1,28,28,1])
y_2d = self._conv2d(x_2d, self.W1) + self.b1
# Non-linear activation
y_2d = tf.nn.relu(y_2d)
with tf.name_scope('MP4'):
# Max pooling
y_mp = self._max_pool_2x2(y_2d)
with tf.name_scope('CN64'):
# Grid filtering
y_2d = self._conv2d(y_mp, self.W2) + self.b2
# Non-linear activation
y_2d = tf.nn.relu(y_2d)
with tf.name_scope('MP4'):
# Max pooling
y_mp = self._max_pool_2x2(y_2d)
with tf.name_scope('FC512'):
y = tf.reshape(y_mp, [-1, FEAT2])
y = tf.matmul(y, self.W3) + self.b3
# Non-linear activation
y = tf.nn.relu(y)
# Dropout
y = tf.nn.dropout(y, d)
with tf.name_scope('FC10'):
y = tf.matmul(y, self.W4) + self.b4
return y
In [8]:
# Comment/uncomment
NN_1Layer=False
NN_1Layer=True
if NN_1Layer==True:
model = CNN_1CL_1MP_1FC(K=5, F=10)
FLAGS.learning_rate = 0.05
FLAGS.regularization = 5e-4
FLAGS.dropout = 0.75
In [9]:
# Comment/uncomment
NN_LeNet5=False
#NN_LeNet5=True
if NN_LeNet5==True:
model = CNN_LeNet5(K=5)
FLAGS.learning_rate = 0.05
FLAGS.regularization = 5e-4
FLAGS.dropout = 0.5
In [10]:
# Parameters
num_epochs = 10
num_epochs = 2 # Early stop
train_size = train_data.shape[0]
nb_iter = int(num_epochs * train_size) // FLAGS.batch_size
print('num_epochs=',num_epochs,', train_size=',train_size,', nb_iter=',nb_iter)
# Construct computational graph
x = tf.placeholder(tf.float32, (None, 784))
y = tf.placeholder(tf.int32, (None))
d = tf.placeholder(tf.float32)
logits = model.inference(x,d) # dropout activate
loss = model.loss(logits, y, FLAGS.regularization)
train_op = model.training(loss, FLAGS.learning_rate, train_size, FLAGS.batch_size)
evaluation = model.evaluation(logits, y)
In [14]:
# Train
init = tf.initialize_all_variables()
sess = tf.Session()
# TensorFlow
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
writer = tf.train.SummaryWriter('tmp/mnist_logs' + '/run1', sess.graph)
op_summary = tf.merge_all_summaries()
# Start
sess.run(init)
indices = collections.deque()
tab_results = []
tab_last_epoch = []
start_last_epoch = nb_iter - train_size // FLAGS.batch_size
nb_samples_last_epoch = 25
freq_save_last_epoch = int(train_size // FLAGS.batch_size // (nb_samples_last_epoch-1))
acc_train = -1.0
loss_train = -1.0
print('num_epochs=',num_epochs,', nb_iter=',nb_iter)
t_start = time.process_time()
for i in range(nb_iter):
# Computational time
freq_iter = 10
if (i%freq_iter==0) & (i<=freq_iter):
print('iter={:d}, freq_iter={:d}, training time: {:.2f}s, acc_train={:2.2f}, loss_train={:2.2f}'
.format(i,freq_iter,time.process_time() - t_start,acc_train,loss_train))
t_start = time.process_time()
# Generic batch extraction
if len(indices) < FLAGS.batch_size:
indices.extend(np.random.permutation(train_data.shape[0])) # rand permutation
idx = [indices.popleft() for i in range(FLAGS.batch_size)] # extract batch_size data
batch_xs, batch_ys = train_data[idx,:], train_labels[idx]
if type(batch_xs) is not np.ndarray:
batch_xs = batch_xs.toarray() # convert to full matrices if sparse
# Run computational graph for weight learning
_,acc_train,loss_train = sess.run([train_op,evaluation,loss], feed_dict={x: batch_xs, y: batch_ys, d: FLAGS.dropout})
# Display, save results
if (i+1)%100==0:
# Compute test accuracy
t_start_testset = time.process_time()
acc_test = sess.run(evaluation, feed_dict={x: mnist.test.images, y: mnist.test.labels, d: FLAGS.dropout})
acc_test_nodropout = sess.run(evaluation, feed_dict={x: mnist.test.images, y: mnist.test.labels, d: 1.0})
t_testset = time.process_time() - t_start_testset
print('iter={:d}, acc_train={:2.2f}, loss_train={:2.2f}, acc_test={:2.2f}, acc_test_nodropout={:2.2f}, test time={:.2f}s'
.format(i+1,acc_train,loss_train,acc_test,acc_test_nodropout,t_testset))
# Summaries for TensorBoard.
acc_train *= 1.0
acc_test *= 1.0
acc_test_nodropout *= 1.0
summary = tf.Summary()
summary.value.add(tag='acc_train', simple_value=acc_train)
summary.value.add(tag='acc_test', simple_value=acc_test)
summary.value.add(tag='acc_test_nodropout', simple_value=acc_test_nodropout)
writer.add_summary(summary, i+1)
# Save accuracy for last batch
acc_test_nodropout = sess.run(evaluation, feed_dict={x: mnist.test.images, y: mnist.test.labels, d: 1.0})
print('final accuracy=',acc_test_nodropout)
writer.close()
print('Training time: {:.2f}s'.format(time.process_time() - t_start))
In [ ]: