Accompanying code examples of the book "Introduction to Artificial Neural Networks and Deep Learning: A Practical Guide with Applications in Python" by Sebastian Raschka. All code examples are released under the MIT license. If you find this content useful, please consider supporting the work by buying a copy of the book.
Other code examples and content are available on GitHub. The PDF and ebook versions of the book are available through Leanpub.
In [1]:
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p tensorflow
This notebook demonstrates different strategies on how to export and import training TensorFlow models based on a a simple 2-hidden layer multilayer perceptron. These include
Note that the graph def is going set up in a way that it constructs "rigid," not trainable TensorFlow classifier if .npz files are provided. This is on purpose, since it may come handy in certain use cases, but the code can be easily modified to make the model trainable if NumPy .npz files are provided -- for example, by wrapping the tf.constant
calls in fc_layer
in a tf.Variable
constructor like so:
...
if weight_params is not None:
weights = tf.Variable(tf.constant(weight_params, name='weights',
dtype=tf.float32))
...
instead of
...
if weight_params is not None:
weights = tf.constant(weight_params, name='weights',
dtype=tf.float32)
...
The following code cells defines wrapper functions for our convenience; it saves us some re-typing later when we set up the TensorFlow multilayer perceptron graphs for the trainable and non-trainable models.
In [2]:
import tensorflow as tf
##########################
### WRAPPER FUNCTIONS
##########################
def fc_layer(input_tensor, n_output_units, name,
activation_fn=None, seed=None,
weight_params=None, bias_params=None):
with tf.variable_scope(name):
if weight_params is not None:
weights = tf.constant(weight_params, name='weights',
dtype=tf.float32)
else:
weights = tf.Variable(tf.truncated_normal(
shape=[input_tensor.get_shape().as_list()[-1], n_output_units],
mean=0.0,
stddev=0.1,
dtype=tf.float32,
seed=seed),
name='weights',)
if bias_params is not None:
biases = tf.constant(bias_params, name='biases',
dtype=tf.float32)
else:
biases = tf.Variable(tf.zeros(shape=[n_output_units]),
name='biases',
dtype=tf.float32)
act = tf.matmul(input_tensor, weights) + biases
if activation_fn is not None:
act = activation_fn(act)
return act
def mlp_graph(n_input=784, n_classes=10, n_hidden_1=128, n_hidden_2=256,
learning_rate=0.1,
fixed_params=None):
# fixed_params to allow loading weights & biases
# from NumPy npz archives and defining a fixed, non-trainable
# TensorFlow classifier
if not fixed_params:
var_names = ['fc1/weights:0', 'fc1/biases:0',
'fc2/weights:0', 'fc2/biases:0',
'logits/weights:0', 'logits/biases:0',]
fixed_params = {v: None for v in var_names}
found_params = False
else:
found_params = True
# Input data
tf_x = tf.placeholder(tf.float32, [None, n_input], name='features')
tf_y = tf.placeholder(tf.int32, [None], name='targets')
tf_y_onehot = tf.one_hot(tf_y, depth=n_classes, name='onehot_targets')
# Multilayer perceptron
fc1 = fc_layer(input_tensor=tf_x,
n_output_units=n_hidden_1,
name='fc1',
weight_params=fixed_params['fc1/weights:0'],
bias_params=fixed_params['fc1/biases:0'],
activation_fn=tf.nn.relu)
fc2 = fc_layer(input_tensor=fc1,
n_output_units=n_hidden_2,
name='fc2',
weight_params=fixed_params['fc2/weights:0'],
bias_params=fixed_params['fc2/biases:0'],
activation_fn=tf.nn.relu)
logits = fc_layer(input_tensor=fc2,
n_output_units=n_classes,
name='logits',
weight_params=fixed_params['logits/weights:0'],
bias_params=fixed_params['logits/biases:0'],
activation_fn=tf.nn.relu)
# Loss and optimizer
### Only necessary if no existing params are found
### and a trainable graph has to be initialized
if not found_params:
loss = tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=tf_y_onehot)
cost = tf.reduce_mean(loss, name='cost')
optimizer = tf.train.GradientDescentOptimizer(
learning_rate=learning_rate)
train = optimizer.minimize(cost, name='train')
# Prediction
probabilities = tf.nn.softmax(logits, name='probabilities')
labels = tf.cast(tf.argmax(logits, 1), tf.int32, name='labels')
correct_prediction = tf.equal(labels,
tf_y, name='correct_predictions')
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),
name='accuracy')
In [3]:
from tensorflow.examples.tutorials.mnist import input_data
##########################
### SETTINGS
##########################
# Hyperparameters
learning_rate = 0.1
training_epochs = 10
batch_size = 64
##########################
### GRAPH DEFINITION
##########################
g = tf.Graph()
with g.as_default():
mlp_graph()
##########################
### DATASET
##########################
mnist = input_data.read_data_sets("./", one_hot=False)
##########################
### TRAINING & EVALUATION
##########################
with tf.Session(graph=g) as sess:
sess.run(tf.global_variables_initializer())
saver0 = tf.train.Saver()
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = mnist.train.num_examples // batch_size
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(batch_size)
_, c = sess.run(['train', 'cost:0'], feed_dict={'features:0': batch_x,
'targets:0': batch_y})
avg_cost += c
train_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.train.images,
'targets:0': mnist.train.labels})
valid_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.validation.images,
'targets:0': mnist.validation.labels})
print("Epoch: %03d | AvgCost: %.3f" % (epoch + 1, avg_cost / (i + 1)), end="")
print(" | Train/Valid ACC: %.3f/%.3f" % (train_acc, valid_acc))
test_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.test.images,
'targets:0': mnist.test.labels})
print('Test ACC: %.3f' % test_acc)
##########################
### SAVE TRAINED MODEL
##########################
saver0.save(sess, save_path='./mlp')
You can restart and the notebook and the following code cells should execute without any additional code dependencies.
In [4]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./", one_hot=False)
with tf.Session() as sess:
saver1 = tf.train.import_meta_graph('./mlp.meta')
saver1.restore(sess, save_path='./mlp')
test_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.test.images,
'targets:0': mnist.test.labels})
print('Test ACC: %.3f' % test_acc)
In [5]:
import tensorflow as tf
import numpy as np
tf.reset_default_graph()
with tf.Session() as sess:
saver1 = tf.train.import_meta_graph('./mlp.meta')
saver1.restore(sess, save_path='./mlp')
var_names = [v.name for v in
tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]
params = {}
print('Found variables:')
for v in var_names:
print(v)
ary = sess.run(v)
params[v] = ary
np.savez('mlp', **params)
Note that the graph def was set up in a way that it constructs "rigid," not trainable TensorFlow classifier if .npz files are provided. This is on purpose, since it may come handy in certain use cases, but the code can be easily modified to make the model trainable if NumPy .npz files are provided (e.g., by wrapping the tf.constant
calls in fc_layer
in a tf.Variable
constructor.
Note: If you defined the fc_layer
and mlp_graph
wrapper functions in Define Multilayer Perceptron Graph, the following code cell is otherwise independent and has no other code dependencies.
In [6]:
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
###########################
### LOAD DATA AND PARAMS
###########################
mnist = input_data.read_data_sets("./", one_hot=False)
param_dict = np.load('mlp.npz')
##########################
### GRAPH DEFINITION
##########################
g = tf.Graph()
with g.as_default():
# here: constructs a non-trainable graph
# due to the provided fixed_params argument
mlp_graph(fixed_params=param_dict)
with tf.Session(graph=g) as sess:
test_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.test.images,
'targets:0': mnist.test.labels})
print('Test ACC: %.3f' % test_acc)