In [1]:
# Introduction to Neural Networks and TensorFlow
import tensorflow as tf
Graphs -- a computational graph in TensorFlow is a series of operations arranged into a graph of nodes.
The development of Neural Networks algorithms in TensorFlow is done through two distinct steps: 1. building the computational graph and 2. running the computational graph. Step 1. focuses on creating and defining the nodes of the graph, while step 2. focuses on evaluating the graph through what is called a session.
Variables -- multiple type of variables exist in TensorFlow -- the most common ones are presneted below:
Constant variables: float or int variables that will remain constant and that we wish to declare in step1 Zeros: tensors with a specific shape that are initiated with zeros Placeholders: tensors for which we will pass a value in the future. We need to specify a shape when declaring placeholders
Operations -- TensorFlow has built-in functions for basic and complex operations
Addition: tf.add() Matrix multiplication: tf.matmul() ... But also prebuilt functions, optimization methods, network designs, cells (LSTM), etc.
The TensorFlow documentation can be found at https://www.tensorflow.org/api_docs/python/
Let's look at an example:
In [2]:
# Step 1: building the computational graph
node1 = tf.constant(7.0, dtype = tf.float32) # equivalent to node1 = 7.0
node2 = tf.zeros((2,3), dtype = tf.int32) # tensor of shape (2,3) initiated with zeros
node3 = tf.placeholder(dtype = tf.float32, shape = (2,3)) # a placeholder is an empty tensor with a specified shape and type for which we will pass values in the future (when evaluating the graph)
node4 = tf.random_normal([3, 2], seed=1234)
node5 = tf.matmul(node3,node4)
print(node1)
print(node2)
print(node3)
print(node4)
print(node5)
In [3]:
# Step 2: Running the computational graph
sess = tf.Session()
x = [[0,1,2],[3,4,5]]
print("node1:\n", sess.run(node1))
print("node2:\n", sess.run(node2))
print("node3:\n", sess.run(node3, feed_dict={node3:x}))
print("node4:\n", sess.run(node4))
print("node5:\n", sess.run(node5, feed_dict={node3:x}))
In [4]:
# Step 0: Let's create some data for our first application
import numpy as np
import pandas as pd
df = pd.DataFrame({'x1':np.array([0,1,0,0,1,1,1]),
'x2':np.array([0,0,0,1,1,1,0]),
'x3':np.array([1,1,0,0,1,1,1]),
'output':np.array([1,1,0,1,0,0,1])})
df
Out[4]:
In [5]:
# Resetting to default graph -- especially usefull when running multiple sessions
tf.reset_default_graph()
# Declaring parameters / architecture of neural network
num_input_features = 3 # represents the number of features in the input data
num_hidden_nodes = 4 # the number of nodes used in the 1st (and only) hidden layer of our network
num_classes = 1 # the number of features in the output data -- this is equivalent to a regression problem, we are not trying to predict a class but a number, therefore there is only 1 class
learning_rate = 0.01 # parameter used in the optimization process
seed = 7 # to replicate results
# Declaring placeholders for input data and true outputs
inputs = tf.placeholder(tf.float32, shape=[None, 3]) # inputs size will be size of dataset * num_input_features
true_outputs = tf.placeholder(tf.float32, shape=[None, 1]) # output size will be size of dataset * num_classes
# Randomely initializing weights and biases using normal distribution
weights = {
'hidden': tf.Variable(tf.random_normal([num_input_features, num_hidden_nodes], seed=seed)),
'output': tf.Variable(tf.random_normal([num_hidden_nodes, num_classes], seed=seed))}
biases = {
'hidden': tf.Variable(tf.random_normal([num_hidden_nodes], seed=seed)),
'output': tf.Variable(tf.random_normal([num_classes], seed=seed))}
# Computing layer_1 and output layer (this is a single-layer feed forward neural net) with a sigmoid activation function
# The introduction of an activation function allows for non-linearity
# Layers are simply equal to activation_function(Wx + biases)
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(inputs,weights['hidden']),biases['hidden']))
output_layer = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,weights['output']),biases['output']))
# Now that the architecture is designed, let's look at the optimization process -- our objective / cost / error function is the mean square error
# We use an iterative optimization process, here the Stochastic Gradient Descent Methode that learns at a predefined learning_rate
error = tf.subtract(output_layer, true_outputs)
mean_square_error = tf.reduce_sum(tf.square(error))
train = tf.train.GradientDescentOptimizer(learning_rate).minimize(mean_square_error)
In [6]:
# Step 2: Running the graph
# Creating a session to run the graph
sess = tf.Session()
# Initializing all variables
init = tf.global_variables_initializer()
sess.run(init)
# Let's limit the number of iterations
iter_ = 0
mse = 10
print("Starting optimization")
while iter_ < 10000 or mse > 0.1:
# Here we are running the optimization using the Stochastic Gradient Descent Methode
_ = sess.run(train, feed_dict={inputs:np.array(df[['x1','x2','x3']]),true_outputs:np.array(df[['output']])})
# Displaying results every 2000 iterations
if iter_ % 2000 == 0:
# Evaluating the output layer -- what is predicted for each observation
out = sess.run(output_layer, feed_dict={inputs:np.array(df[['x1','x2','x3']])})
# Evaluating the mean square error
mse = sess.run(mean_square_error, feed_dict={inputs:np.array(df[['x1','x2','x3']]),true_outputs:np.array(df[['output']])})
print("Iteration:",iter_, "Mean_square_error:",mse, "\nOutput\n",out)
iter_ += 1
print("Very cool, we are finished with the optimiztion!")
Code taken from : https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/neural_network_raw.ipynb
Problem here: we want to have a neural network recognize which digit correspond to which image. Best algorithms do better than the human eye nowadays
In [7]:
# Don't look at this -- not important
from __future__ import print_function
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
In [8]:
# Let's look at our data
print(mnist.train.next_batch(1))
# First array is a 28 * 28 vector with the color intensity for each pixel; the second array is the class of the image
In [9]:
# Resetting to default graph -- especially usefull when running multiple sessions
tf.reset_default_graph()
# Parameters
learning_rate = 0.1 # how quickly the model will learn in optimization methode
num_steps = 500 # max number of iterations
batch_size = 128 # the size of the batch fed in a training iteration to the model
display_step = 100 # displaying results of the optimization every 100 iterations
# Network Parameters
n_hidden_1 = 256 # number of neurons in 1st layer
n_hidden_2 = 256 # number of neurons in 2nd layer
num_input = 784 # MNIST data input (img shape: 28*28) -- equivalent to the number of features in the input dataset
num_classes = 10 # MNIST total classes (0-9 digits) -- number of classes in the output data
# tf Graph input
X = tf.placeholder("float", [None, num_input])
Y = tf.placeholder("float", [None, num_classes])
# Store layers weight & bias -- initiating weights and biases using a normal distribution
weights = {
'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, num_classes]))}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([num_classes]))}
# Create model
def neural_net(x):
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
# Hidden fully connected layer with 256 neurons
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
# Output fully connected layer with a neuron for each class
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Construct model
logits = neural_net(X)
# Define loss and optimizer -- here loss function is the cross entropy to which we apply the softmax function
# Softmax function is a normalized exponential funciton that transforms logits into a range from 0 to 1 and with sum if logits equal to 1
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) # reduce mean is simply the mean of all losses
# Here using the Adam algorithm for optimization
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Evaluate model
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
In [10]:
# Start training
with tf.Session() as sess:
# Run the initializer
sess.run(init)
for step in range(1, num_steps+1):
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Run optimization op (backprop)
sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
if step % display_step == 0 or step == 1:
# Calculate batch loss and accuracy
loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x, Y: batch_y})
print("Step " + str(step) + ", Minibatch Loss= " + "{:.4f}".format(loss) + ", Training Accuracy= " + "{:.3f}".format(acc))
print("Optimization Finished!")
# Calculate accuracy for MNIST test images
print("Testing Accuracy:", sess.run(accuracy, feed_dict={X: mnist.test.images,Y: mnist.test.labels}))