This notebook contains code to train a neural network with a single hidden layer on MNIST. At the end is a short exercise to add a second hidden layer, transforming it into a deep neural network.
In [ ]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import math
import os
%matplotlib inline
import matplotlib.pyplot as plt
In [ ]:
LOGDIR = './graphs'
In [ ]:
tf.reset_default_graph()
sess = tf.Session()
In [ ]:
mnist = input_data.read_data_sets('/tmp/data', one_hot=True)
In [ ]:
NUM_CLASSES = 10
NUM_PIXELS = 28 * 28
TRAIN_STEPS = 1000
BATCH_SIZE = 100
HIDDEN1_UNITS = 128
LEARNING_RATE = 0.5
In [ ]:
x = tf.placeholder(tf.float32, [None, NUM_PIXELS], name="pixels")
y_ = tf.placeholder(tf.float32, [None, NUM_CLASSES], name="labels")
# helped functions to create a weight and bias variable
# with proper initialization
def weight_variable(inputs, outputs, name):
# why do we initialize weights this way?
# see http://cs231n.github.io/neural-networks-2/ for more details
initial = tf.truncated_normal(shape=[inputs, outputs], stddev=1.0 / math.sqrt(float(inputs)))
return tf.Variable(initial, name=name)
def bias_variable(shape, name):
initial = tf.constant(0.1, shape=[shape])
return tf.Variable(initial, name=name)
with tf.name_scope("hidden_layer_1"):
# weights and baises for the first layer
weights1 = weight_variable(NUM_PIXELS, HIDDEN1_UNITS, "weights1")
biases1 = bias_variable(HIDDEN1_UNITS, "biases1")
# activations for the first hidden layer
hidden1 = tf.nn.relu(tf.matmul(x, weights1) + biases1, name="hidden1")
with tf.name_scope("output_layer"):
# weights and biases for the second layer
weights2 = weight_variable(HIDDEN1_UNITS, NUM_CLASSES, "weights2")
biases2 = bias_variable(NUM_CLASSES, "biases2")
# logits - you can think of these (roughly)
# as unnormalized probabilities, or the amount of
# evidence we have that the input image corresponds to
# each digit
y = tf.matmul(hidden1, weights2) + biases2
with tf.name_scope("loss"):
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
tf.summary.scalar('loss', loss)
with tf.name_scope("optimizer"):
train = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
train_writer = tf.summary.FileWriter(os.path.join(LOGDIR, "train"))
train_writer.add_graph(sess.graph)
test_writer = tf.summary.FileWriter(os.path.join(LOGDIR, "test"))
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', accuracy)
summary_op = tf.summary.merge_all()
sess.run(tf.global_variables_initializer())
for step in range(TRAIN_STEPS):
batch_xs, batch_ys = mnist.train.next_batch(BATCH_SIZE)
summary_result, _ = sess.run([summary_op, train],
feed_dict={x: batch_xs, y_: batch_ys})
train_writer.add_summary(summary_result, step)
train_writer.add_run_metadata(tf.RunMetadata(), 'step%03d' % step)
# calculate accuracy on the test set
if step % 100 == 0:
summary_result, acc = sess.run([summary_op, accuracy],
feed_dict={x: mnist.test.images,
y_: mnist.test.labels})
test_writer.add_summary(summary_result, step)
test_writer.add_run_metadata(tf.RunMetadata(), 'step%03d' % step)
print ("test accuracy: %f at step %d" % (acc, step))
print("Accuracy %f" % sess.run(accuracy,
feed_dict={x: mnist.test.images,
y_: mnist.test.labels}))
train_writer.close()
test_writer.close()
In [ ]:
# Put your solution here or modify the above code.