Chapter 10 – Introduction to Artificial Neural Networks
This notebook contains all the sample code in chapter 10.
First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:
In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals
# Common imports
import numpy as np
import os
# to make this notebook's output stable across runs
def reset_graph(seed=42):
tf.reset_default_graph()
tf.set_random_seed(seed)
np.random.seed(seed)
# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "ann"
def save_fig(fig_id, tight_layout=True):
path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
print("Saving figure", fig_id)
if tight_layout:
plt.tight_layout()
plt.savefig(path, format='png', dpi=300)
From the original data, we extract the student response tf-idf vector as the input feature, and the binary correctness value as the output label. In this way it becomes a binary classification task, where we aim to predict if the student's answer is correct or not based on the tf-idf vector of the answer.
We randomly select 80% samples for training, and the remaining 20% as test set.
In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
beetles_full = pd.read_csv('beetleTrainingData.csv')
beetles_feature = beetles_full[beetles_full.columns[~beetles_full.columns.str.contains('_RA')]].drop(['accuracy_num', 'accuracy_txt'], axis=1).as_matrix()
beetles_label = beetles_full.accuracy_txt.apply(lambda a: int(a == 'correct')).as_matrix()
beetles_train_feature, beetles_test_feature, beetles_train_label, beetles_test_label = train_test_split(
beetles_feature, beetles_label, test_size=0.2, random_state=42)
In [3]:
beetles_train_feature.shape
Out[3]:
In [4]:
beetles_train_label.shape
Out[4]:
In [5]:
beetles_test_feature.shape
Out[5]:
In [6]:
beetles_test_label.shape
Out[6]:
A perceptron is the simplest type of neuron, where the input and output satisfy the relationship shown in the image below:
In [7]:
import numpy as np
from sklearn.linear_model import Perceptron
per_clf = Perceptron(random_state=42) # The random state is used to shuffle the data
per_clf.fit(beetles_train_feature, beetles_train_label)
training_accuracy = per_clf.score(beetles_train_feature, beetles_train_label)
training_accuracy
Out[7]:
In [8]:
test_accuracy = per_clf.score(beetles_test_feature, beetles_test_label)
test_accuracy
Out[8]:
In [9]:
per_clf.coef_[0].shape # the shape of w
Out[9]:
In [10]:
per_clf.intercept_ # the value of b
Out[10]:
A perceptron uses the stepwise function to determine the output. In other neural networks, more complicated activation functions are used.
In [11]:
def logit(z): # logistic
return 1 / (1 + np.exp(-z))
def relu(z): # rectified linear unit
return np.maximum(0, z)
def derivative(f, z, eps=0.000001):
return (f(z + eps) - f(z - eps))/(2 * eps)
In [12]:
z = np.linspace(-5, 5, 200)
plt.figure(figsize=(11,4))
plt.subplot(121)
plt.plot(z, np.sign(z), "r-", linewidth=2, label="Step")
plt.plot(z, logit(z), "g--", linewidth=2, label="Logit")
plt.plot(z, np.tanh(z), "b-", linewidth=2, label="Tanh")
plt.plot(z, relu(z), "m-.", linewidth=2, label="ReLU")
plt.grid(True)
plt.legend(loc="center right", fontsize=14)
plt.title("Activation functions", fontsize=14)
plt.axis([-5, 5, -1.2, 1.2])
plt.subplot(122)
plt.plot(z, derivative(np.sign, z), "r-", linewidth=2, label="Step")
plt.plot(0, 0, "ro", markersize=5)
plt.plot(0, 0, "rx", markersize=10)
plt.plot(z, derivative(logit, z), "g--", linewidth=2, label="Logit")
plt.plot(z, derivative(np.tanh, z), "b-", linewidth=2, label="Tanh")
plt.plot(z, derivative(relu, z), "m-.", linewidth=2, label="ReLU")
plt.grid(True)
#plt.legend(loc="center right", fontsize=14)
plt.title("Derivatives", fontsize=14)
plt.axis([-5, 5, -0.2, 1.2])
# save_fig("activation_functions_plot")
plt.show()
In [13]:
def heaviside(z):
return (z >= 0).astype(z.dtype)
def sigmoid(z):
return 1/(1+np.exp(-z))
def mlp_xor(x1, x2, activation=heaviside):
return activation(-activation(x1 + x2 - 1.5) + activation(x1 + x2 - 0.5) - 0.5)
In [14]:
x1s = np.linspace(-0.2, 1.2, 100)
x2s = np.linspace(-0.2, 1.2, 100)
x1, x2 = np.meshgrid(x1s, x2s)
z1 = mlp_xor(x1, x2, activation=heaviside)
z2 = mlp_xor(x1, x2, activation=sigmoid)
plt.figure(figsize=(10,4))
plt.subplot(121)
plt.contourf(x1, x2, z1)
plt.plot([0, 1], [0, 1], "gs", markersize=20)
plt.plot([0, 1], [1, 0], "y^", markersize=20)
plt.title("Activation function: heaviside", fontsize=14)
plt.grid(True)
plt.subplot(122)
plt.contourf(x1, x2, z2)
plt.plot([0, 1], [0, 1], "gs", markersize=20)
plt.plot([0, 1], [1, 0], "y^", markersize=20)
plt.title("Activation function: sigmoid", fontsize=14)
plt.grid(True)
A feedforward neural network is an artificial neural network wherein connections between the units do not form a cycle.
In the training phase, the connection weights are updated through the back-propagation algorithm.
In [15]:
import tensorflow as tf
config = tf.contrib.learn.RunConfig(tf_random_seed=42)
feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(beetles_train_feature)
#The network has two hidden layers, 150 and 50
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[150,50], n_classes=2,feature_columns=feature_cols, config=config)
dnn_clf = tf.contrib.learn.SKCompat(dnn_clf) # if TensorFlow >= 1.1
#fit the model with data, i.e. backpropagation to update the weights
dnn_clf.fit(beetles_train_feature, beetles_train_label, batch_size=50, steps=10000)
Out[15]:
In [16]:
from sklearn.metrics import accuracy_score
pred_label = dnn_clf.predict(beetles_train_feature)
training_accuracy = accuracy_score(beetles_train_label, pred_label['classes'])
training_accuracy
Out[16]:
In [17]:
pred_label = dnn_clf.predict(beetles_test_feature)
test_accuracy = accuracy_score(beetles_test_label, pred_label['classes'])
test_accuracy
Out[17]:
In [18]:
from sklearn.metrics import log_loss
pred_proba = pred_label['probabilities']
test_log_loss = log_loss(beetles_test_label, pred_proba)
test_log_loss
Out[18]:
In [19]:
import tensorflow as tf
n_inputs = len(beetles_train_feature[0])
n_hidden1 = 150
n_hidden2 = 50
n_outputs = 2
In [20]:
reset_graph()
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
In [21]:
def neuron_layer(X, n_neurons, name, activation=None):
with tf.name_scope(name):
n_inputs = int(X.get_shape()[1])
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
W = tf.Variable(init, name="kernel")
b = tf.Variable(tf.zeros([n_neurons]), name="bias")
Z = tf.matmul(X, W) + b
if activation is not None:
return activation(Z)
else:
return Z
In [22]:
with tf.name_scope("dnn"):
hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
activation=tf.nn.relu)
hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
activation=tf.nn.relu)
logits = neuron_layer(hidden2, n_outputs, name="outputs")
In [23]:
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
In [24]:
learning_rate = 0.0003
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
In [25]:
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
In [26]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()
In [27]:
n_epochs = 20
batch_size = 100
In [28]:
from sklearn.utils import shuffle
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
train_feature, train_label = shuffle(beetles_train_feature, beetles_train_label, random_state = epoch)
for iteration in range(len(beetles_train_feature) // batch_size):
# X_batch, y_batch = mnist.train.next_batch(batch_size)
begin_index = iteration * batch_size
end_index = min((iteration+1) * batch_size, len(beetles_train_feature))
# print(begin_index, end_index)
X_batch = train_feature[begin_index:end_index]
y_batch = train_label[begin_index:end_index]
# print(X_batch.shape)
# print(y_batch.shape)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_test = accuracy.eval(feed_dict={X: beetles_test_feature,
y: beetles_test_label})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
save_path = saver.save(sess, "./my_model_plain.ckpt")
In [29]:
with tf.Session() as sess:
saver.restore(sess, "./my_model_plain.ckpt") # or better, use save_path
X_new_scaled = beetles_test_feature[:20]
Z = logits.eval(feed_dict={X: X_new_scaled})
y_pred = np.argmax(Z, axis=1)
In [30]:
print("Predicted classes:", y_pred)
print("Actual classes: ", beetles_test_label[:20])
In [31]:
from IPython.display import clear_output, Image, display, HTML
def strip_consts(graph_def, max_const_size=32):
"""Strip large constant values from graph_def."""
strip_def = tf.GraphDef()
for n0 in graph_def.node:
n = strip_def.node.add()
n.MergeFrom(n0)
if n.op == 'Const':
tensor = n.attr['value'].tensor
size = len(tensor.tensor_content)
if size > max_const_size:
tensor.tensor_content = b"<stripped %d bytes>"%size
return strip_def
def show_graph(graph_def, max_const_size=32):
"""Visualize TensorFlow graph."""
if hasattr(graph_def, 'as_graph_def'):
graph_def = graph_def.as_graph_def()
strip_def = strip_consts(graph_def, max_const_size=max_const_size)
code = """
<script>
function load() {{
document.getElementById("{id}").pbtxt = {data};
}}
</script>
<link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
<div style="height:600px">
<tf-graph-basic id="{id}"></tf-graph-basic>
</div>
""".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))
iframe = """
<iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
""".format(code.replace('"', '"'))
display(HTML(iframe))
In [32]:
show_graph(tf.get_default_graph())
Note: the book uses tensorflow.contrib.layers.fully_connected() rather than tf.layers.dense() (which did not exist when this chapter was written). It is now preferable to use tf.layers.dense(), because anything in the contrib module may change or be deleted without notice. The dense() function is almost identical to the fully_connected() function, except for a few minor differences:
scope becomes name, activation_fn becomes activation (and similarly the _fn suffix is removed from other parameters such as normalizer_fn), weights_initializer becomes kernel_initializer, etc.activation is now None rather than tf.nn.relu.
In [33]:
n_inputs = len(beetles_train_feature[0])
n_hidden1 = 150
n_hidden2 = 50
n_outputs = 2
In [34]:
reset_graph()
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
In [35]:
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1",
activation=tf.nn.relu)
hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2",
activation=tf.nn.relu)
logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
In [36]:
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
In [37]:
learning_rate = 0.0003
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
In [38]:
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
In [39]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()
In [40]:
n_epochs = 20
n_batches = 100
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
train_feature, train_label = shuffle(beetles_train_feature, beetles_train_label, random_state = epoch)
for iteration in range(len(beetles_train_feature) // batch_size):
begin_index = iteration * batch_size
end_index = min((iteration+1) * batch_size, len(beetles_train_feature))
# print(begin_index, end_index)
X_batch = train_feature[begin_index:end_index]
y_batch = train_label[begin_index:end_index]
# print(X_batch.shape)
# print(y_batch.shape)
#X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
# print(X_batch.shape)
# print(begin_index,end_index)
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_test = accuracy.eval(feed_dict={X: beetles_test_feature,
y: beetles_test_label})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
save_path = saver.save(sess, "./my_model_dense.ckpt")
In [41]:
show_graph(tf.get_default_graph())