In [1]:
import tensorflow as tf

# Create TensorFlow object called hello_constant
hello_constant = tf.constant('Hello World!')

with tf.Session() as sess:
    # Run the tf.constant operation in the session
    output = sess.run(hello_constant)
    print(output)


b'Hello World!'

tensorflow stores data as tensors, which is an onject which can store stuff.

constant data is stored as a tf.constant. The output of a constant tensor doesn't change:


In [2]:
# A is a 0-dimensional int32 tensor
A = tf.constant(1234) 
# B is a 1-dimensional int32 tensor
B = tf.constant([123,456,789]) 
 # C is a 2-dimensional int32 tensor
C = tf.constant([ [123,456,789], [222,333,444] ])

A Tensorflow session is an environment for running a graph.

with tf.Session() as sess:
    output = sess.run(hello_constant)

The sess.run function runs the hello_constant tensor created above.

Tensorflow can't just take a dataset x as in input, it has to be a tensor. tf.placeholder is used for dynamic data, which is fed into a tensor part at a time.


In [3]:
x = tf.placeholder(tf.string)
y = tf.placeholder(tf.int32)
z = tf.placeholder(tf.float32)

with tf.Session() as sess:
    output = sess.run(x, feed_dict={x: 'Test String', y: 123, z: 45.67})
    print(output)


Test String

Tensorflow math


In [4]:
x = tf.add(5, 2)  # 7
y = tf.subtract(10, 4) # 6
z = tf.multiply(2, 5)  # 10
print(x ,y ,z)


Tensor("Add:0", shape=(), dtype=int32) Tensor("Sub:0", shape=(), dtype=int32) Tensor("Mul:0", shape=(), dtype=int32)

math operations have to use tensors of the same type, so watch out for floats, ints, etc. The below fails becuase that tries to subtract an int from a float:

tf.subtract(tf.constant(2.0),tf.constant(1))

Use tf.cast to convert a tensor to another type:


In [6]:
tf.subtract(tf.cast(tf.constant(2.0), tf.int32), tf.constant(1))   # 1


Out[6]:
<tf.Tensor 'Sub_2:0' shape=() dtype=int32>

to print the value of a tensorflow variable, you have to run it in a session:


In [7]:
x = tf.add(5, 2)  # 7
with tf.Session() as s:
    output = s.run(x)
print(x) # prints out a tensor object
output # prints the output of the tensor x


Tensor("Add_1:0", shape=(), dtype=int32)
Out[7]:
7

In [8]:
# TODO: Convert the following to TensorFlow:
x = tf.constant(10)
y = tf.constant(2)
x_minus_y = tf.divide(x,y)
z = tf.subtract(x_minus_y, tf.cast(1.0, tf.float64))

# TODO: Print z from a session
with tf.Session() as sess:
    output = sess.run(z)
    print(output)


4.0

Classification

this is the central building block of ML.

Logistic classifiers use a linear function like Wx + b = y and a softmax funtion to assign probablities to output.

Softmax can take any kind of scores and turn them into probalities.

The most common operation in neural networks is calculating the linear combination of inputs, weights, and biases.

Here, W is a matrix of the weights connecting two layers. The output y, the input x, and the biases b are all vectors.

Both weights and biases need to be modified as the NN trains, so they use a tf.variable class:


In [9]:
x = tf.Variable(5)

the tf.Variable class stores its state in the session, so it must be initialized manually. the tf.global_variables_initializer() function initializes all variable tensors.


In [10]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)

We need an initial value for weights and biases, and normally we just use random values from a normal distrubution, for which we can use the tf.truncated_normal() function.


In [11]:
n_features = 120
n_labels = 5
weights = tf.Variable(tf.truncated_normal((n_features, n_labels)))
weights.initial_value


Out[11]:
<tf.Tensor 'truncated_normal:0' shape=(120, 5) dtype=float32>

The weights are already randomized, so there isn't really a need to randominize the bias, so we can use the tf.zeros function to generate a tensor with zeros.


In [12]:
n_labels = 5
bias = tf.Variable(tf.zeros(n_labels))
bias.initial_value


Out[12]:
<tf.Tensor 'zeros:0' shape=(5,) dtype=float32>

TensorFlow Softmax

softmax returns an array of prob values


In [13]:
# how i used softmax - fed softmax the logit_data directly
def run():
    output = None
    logit_data = [2.0, 1.0, 0.1]
    logits = tf.placeholder(tf.float32)
    
    # TODO: Calculate the softmax of the logits
    softmax = tf.nn.softmax([2.0, 1.0, 0.1])     
    
    with tf.Session() as sess:
        # TODO: Feed in the logit data
        output = sess.run(softmax)

    return output

run()


Out[13]:
array([ 0.65900117,  0.24243298,  0.09856589], dtype=float32)

In [13]:
# udacity's version - they made a feed_dict
def run():
    output = None
    logit_data = [2.0, 1.0, 0.1]
    logits = tf.placeholder(tf.float32)

    softmax = tf.nn.softmax(logits)

    with tf.Session() as sess:
        output = sess.run(softmax, feed_dict={logits: logit_data})

    return output
run()


Out[13]:
array([ 0.65900117,  0.24243298,  0.09856589], dtype=float32)

One hot encoding

Use LabelBinarizer to turn labels into one hot encoded vectors:


In [14]:
import numpy as np
from sklearn import preprocessing

# Example labels
labels = np.array([1,5,3,2,1,4,2,1,3])

# Create the encoder
lb = preprocessing.LabelBinarizer()

# Here the encoder finds the classes and assigns one-hot vectors 
lb.fit(labels)

# And finally, transform the labels into one-hot encoded vectors
lb.transform(labels)


Out[14]:
array([[1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0],
       [0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0]])

Cross entropy in Tensorflow

We use tf.reduce_sum and tf.log to calculate cross-entropy in tensorflow


In [15]:
x = tf.reduce_sum([1, 2, 3, 4, 5])  # 15

In [16]:
x = tf.log(100.0)  # 4.60517

In [17]:
softmax_data = [0.7, 0.2, 0.1]
one_hot_data = [1.0, 0.0, 0.0]

softmax = tf.placeholder(tf.float32)
one_hot = tf.placeholder(tf.float32)

# TODO: Print cross entropy from session
cross_entropy = -tf.reduce_sum(tf.multiply(one_hot, tf.log(softmax)))
    
with tf.Session() as sess:
    output = sess.run(cross_entropy, feed_dict={softmax: softmax_data, one_hot: one_hot_data})
    print(output)


0.356675

Normalize data

Data should have zero mean and equal variance, else it can mess up all the maths. two main points:

  • datasets can have some big and some vaules, like say house value is large (600,000), swimming pools is small (0 or 1), yet both data points are important in evaluating a house. So normalizing the data gives all the data a chance to make a difference in the output.
  • math errors can happen when mixing large and small numbers, as the example below demonstrates.

In the example below, using a bg vs small initial number leads to a different result:


In [18]:
a, b = 1000000000, 1
for i in range(1000000):
    a = a + 1e-6
    b = b + 1e-6
a - 1000000000, b-1


Out[18]:
(0.95367431640625, 0.9999999999177334)

There are many ways to initialize weights but a good rule of thumb is to start as a random distrubution around zero with a small standard deviation. A small sigma implies uncertainity, while large sigma implies certainity, so it's good to start with a small standard dev.

Measuring Performance

neural networks try to memorize the training sets. Simplest way to fix this is to seperate datasets into training and test data, and measure performance on the test data. But, since we tune the NN to do well on the test data, indirectly it is learning the test data too.

what this does is makes the NN specialized to our existing dataset so it doesn't work well in the real world.

So we fix this by taking another chunk of our training data and hide it, only looking at it once we have done optimizing the NN. So we end up with three sets:

  • Training - what the NN trains on
  • Validation - the NN validates its model against this with every training run
  • Test - This portion of the data represents the real world data. we test the model on this only at the end, thus preventing the NN to learn this data set during the numerous training runs.

Stochastic Gradient Descent

Linear descent is computing intensive at scale. We compute the gradient for every single element in the training set, which is a lot of compute for big sets. Since gradient descent is iterative you have to go through your data many times.

Stochastic Gradient Descent is a shortcut to speed things up tremondously. It takes a small random random fraction of the data (say b/w 1 and a 1000 samples) and computes the average loss and gradient. This is only a estimate of the actual loss and gradient, so can be wrong so we take many small steps instead of one large correct step. However since it is computationaly so much faster, this works out much more faster.

This scales well with data and hence is at the core of deep learning, but it comes with issues in practice. Some ways we help SGD:

  • normalized data sets with mean 0 and equal variance (small)
  • random initial weights with mean 0 and equal variance (small)
  • Momentum: keep a running avg of the gradient and use that instead of the actual current estimate of the gradient. This technique works very well.
  • Learning rate decay - make the learning rate smaller as we train. There ae lots of ways to implement this but the key is to lower it over time.

Since there are many parameters to tune, SGD is sensitive to parameters.ADAGRAD is a version of SGD which does learning rate decay and momentum, which makes learning less sensitive to hyperparameters.

Mini batching

Large datasets dont fit into the memory of a typical home pc, so we get around this limitation by training on batches of the dataset. We do this by:

  • randomnly shuffling the dataset at the start of each epoch and creating batches.
  • for each batch, train the network weights

In [19]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

# Import MNIST data
#mnist.SOURCE_URL = 'https://s3.amazonaws.com/lasagne/recipes/datasets/mnist/'
mnist = input_data.read_data_sets('data/mnist', one_hot=True)

# The features are already scaled and the data is shuffled
train_features = mnist.train.images
test_features = mnist.test.images

train_labels = mnist.train.labels.astype(np.float32)
test_labels = mnist.test.labels.astype(np.float32)

# Weights & bias
weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))


Extracting data/mnist/train-images-idx3-ubyte.gz
Extracting data/mnist/train-labels-idx1-ubyte.gz
Extracting data/mnist/t10k-images-idx3-ubyte.gz
Extracting data/mnist/t10k-labels-idx1-ubyte.gz

Calculating how much memory in bytes the inputs, weights and bias use. Note: they all contain float32 which is 4 bytes in size.


In [20]:
print(train_features.shape)
train_features.shape[0] * train_features.shape[1] * 4


(55000, 784)
Out[20]:
172480000

In [45]:
train_labels.shape[0] * train_labels.shape[1] * 4


Out[45]:
2200000

In [51]:
weights.get_shape()[0] * weights.get_shape()[1] * 4


Out[51]:
Dimension(31360)

In [57]:
bias.get_shape()[0] * 4


Out[57]:
Dimension(40)

The total data is about 175 megabytes, which easily fits into memory, but most other datasets won't, hence the need for breaking up datasets into batches.

Moving on to making batches


In [21]:
import math
def batches(batch_size, features, labels):
    """
    Create batches of features and labels
    :param batch_size: The batch size
    :param features: List of features
    :param labels: List of labels
    :return: Batches of (Features, Labels)
    """
    assert len(features) == len(labels)
    # TODO: Implement batching
    batches = math.ceil(len(labels) / batch_size)
    last_batch_size = len(labels) - (batch_size * (batches - 1))
    
    output = []
    for b in range(batches-1):
        start = b * batch_size
        end = (b+1) * batch_size
        batch = [features[start:end], labels[start:end]]
        output.append(batch)
    last_one = (batches - 1) * batch_size
    output.append([features[last_one:], labels[last_one:]])
    
    return output

Testing out the batch feature:


In [22]:
import numpy as np

learning_rate = 0.001
n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

# Import MNIST data - already imported above

# The features are already scaled and the data is shuffled
train_features = mnist.train.images
test_features = mnist.test.images

train_labels = mnist.train.labels.astype(np.float32)
test_labels = mnist.test.labels.astype(np.float32)

# Features and Labels
features = tf.placeholder(tf.float32, [None, n_input])
labels = tf.placeholder(tf.float32, [None, n_classes])

# Weights & bias
weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))

# Logits - xW + b
logits = tf.add(tf.matmul(features, weights), bias)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


# TODO: Set batch size
batch_size = 128
assert batch_size is not None, 'You must set the batch size'

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    # TODO: Train optimizer on all batches
    for batch_features, batch_labels in batches(batch_size, train_features, train_labels):
        sess.run(optimizer, feed_dict={features: batch_features, labels: batch_labels})

    # Calculate accuracy for test dataset
    test_accuracy = sess.run(
        accuracy,
        feed_dict={features: test_features, labels: test_labels})

print('Test Accuracy: {}'.format(test_accuracy))


Test Accuracy: 0.14229999482631683

Epochs

An epoch is a single forward and backward pass of the whole dataset. This is used to increase the accuracy of the model without requiring more data.


In [28]:
def print_epoch_stats(epoch_i, sess, last_features, last_labels):
    """
    Print cost and validation accuracy of an epoch
    """
    current_cost = sess.run(
        cost,
        feed_dict={features: last_features, labels: last_labels})
    valid_accuracy = sess.run(
        accuracy,
        feed_dict={features: valid_features, labels: valid_labels})
    print('Epoch: {:<4} - Cost: {:<8.3} Valid Accuracy: {:<5.3}'.format(
        epoch_i,
        current_cost,
        valid_accuracy))

n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

# Import MNIST data - already imported above

# The features are already scaled and the data is shuffled
train_features = mnist.train.images
valid_features = mnist.validation.images
test_features = mnist.test.images

train_labels = mnist.train.labels.astype(np.float32)
valid_labels = mnist.validation.labels.astype(np.float32)
test_labels = mnist.test.labels.astype(np.float32)

# Features and Labels
features = tf.placeholder(tf.float32, [None, n_input])
labels = tf.placeholder(tf.float32, [None, n_classes])

# Weights & bias
weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))

# Logits - xW + b
logits = tf.add(tf.matmul(features, weights), bias)

# Define loss and optimizer
learning_rate = tf.placeholder(tf.float32)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init = tf.global_variables_initializer()

batch_size = 128
epochs = 50
learn_rate = 0.01

train_batches = batches(batch_size, train_features, train_labels)

with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch_i in range(epochs):

        # Loop over all batches
        for batch_features, batch_labels in train_batches:
            train_feed_dict = {
                features: batch_features,
                labels: batch_labels,
                learning_rate: learn_rate}
            sess.run(optimizer, feed_dict=train_feed_dict)

        # Print cost and validation accuracy of an epoch
        print_epoch_stats(epoch_i, sess, batch_features, batch_labels)

    # Calculate accuracy for test dataset
    test_accuracy = sess.run(
        accuracy,
        feed_dict={features: test_features, labels: test_labels})

print('Test Accuracy: {}'.format(test_accuracy))


Epoch: 0    - Cost: 8.75     Valid Accuracy: 0.218
Epoch: 1    - Cost: 6.47     Valid Accuracy: 0.344
Epoch: 2    - Cost: 5.09     Valid Accuracy: 0.449
Epoch: 3    - Cost: 4.23     Valid Accuracy: 0.532
Epoch: 4    - Cost: 3.61     Valid Accuracy: 0.588
Epoch: 5    - Cost: 3.15     Valid Accuracy: 0.626
Epoch: 6    - Cost: 2.81     Valid Accuracy: 0.653
Epoch: 7    - Cost: 2.55     Valid Accuracy: 0.673
Epoch: 8    - Cost: 2.34     Valid Accuracy: 0.694
Epoch: 9    - Cost: 2.18     Valid Accuracy: 0.71 
Epoch: 10   - Cost: 2.04     Valid Accuracy: 0.721
Epoch: 11   - Cost: 1.92     Valid Accuracy: 0.733
Epoch: 12   - Cost: 1.82     Valid Accuracy: 0.743
Epoch: 13   - Cost: 1.74     Valid Accuracy: 0.754
Epoch: 14   - Cost: 1.66     Valid Accuracy: 0.762
Epoch: 15   - Cost: 1.59     Valid Accuracy: 0.769
Epoch: 16   - Cost: 1.53     Valid Accuracy: 0.774
Epoch: 17   - Cost: 1.48     Valid Accuracy: 0.779
Epoch: 18   - Cost: 1.43     Valid Accuracy: 0.783
Epoch: 19   - Cost: 1.39     Valid Accuracy: 0.788
Epoch: 20   - Cost: 1.35     Valid Accuracy: 0.794
Epoch: 21   - Cost: 1.31     Valid Accuracy: 0.798
Epoch: 22   - Cost: 1.28     Valid Accuracy: 0.803
Epoch: 23   - Cost: 1.24     Valid Accuracy: 0.807
Epoch: 24   - Cost: 1.22     Valid Accuracy: 0.809
Epoch: 25   - Cost: 1.19     Valid Accuracy: 0.811
Epoch: 26   - Cost: 1.16     Valid Accuracy: 0.814
Epoch: 27   - Cost: 1.14     Valid Accuracy: 0.817
Epoch: 28   - Cost: 1.12     Valid Accuracy: 0.82 
Epoch: 29   - Cost: 1.1      Valid Accuracy: 0.823
Epoch: 30   - Cost: 1.08     Valid Accuracy: 0.826
Epoch: 31   - Cost: 1.06     Valid Accuracy: 0.829
Epoch: 32   - Cost: 1.04     Valid Accuracy: 0.832
Epoch: 33   - Cost: 1.03     Valid Accuracy: 0.834
Epoch: 34   - Cost: 1.01     Valid Accuracy: 0.835
Epoch: 35   - Cost: 0.997    Valid Accuracy: 0.837
Epoch: 36   - Cost: 0.983    Valid Accuracy: 0.839
Epoch: 37   - Cost: 0.97     Valid Accuracy: 0.839
Epoch: 38   - Cost: 0.957    Valid Accuracy: 0.84 
Epoch: 39   - Cost: 0.945    Valid Accuracy: 0.841
Epoch: 40   - Cost: 0.934    Valid Accuracy: 0.843
Epoch: 41   - Cost: 0.922    Valid Accuracy: 0.845
Epoch: 42   - Cost: 0.912    Valid Accuracy: 0.846
Epoch: 43   - Cost: 0.902    Valid Accuracy: 0.847
Epoch: 44   - Cost: 0.892    Valid Accuracy: 0.847
Epoch: 45   - Cost: 0.882    Valid Accuracy: 0.848
Epoch: 46   - Cost: 0.873    Valid Accuracy: 0.85 
Epoch: 47   - Cost: 0.864    Valid Accuracy: 0.851
Epoch: 48   - Cost: 0.856    Valid Accuracy: 0.852
Epoch: 49   - Cost: 0.848    Valid Accuracy: 0.853
Test Accuracy: 0.8504999876022339

Going deeper

Tensorflow can easily deal with more layers.


In [29]:
# Solution is available in the other "solution.py" tab
import tensorflow as tf

output = None
hidden_layer_weights = [
    [0.1, 0.2, 0.4],
    [0.4, 0.6, 0.6],
    [0.5, 0.9, 0.1],
    [0.8, 0.2, 0.8]]
out_weights = [
    [0.1, 0.6],
    [0.2, 0.1],
    [0.7, 0.9]]

# Weights and biases
weights = [
    tf.Variable(hidden_layer_weights),
    tf.Variable(out_weights)]
biases = [
    tf.Variable(tf.zeros(3)),
    tf.Variable(tf.zeros(2))]

# Input
features = tf.Variable([[1.0, 2.0, 3.0, 4.0], [-1.0, -2.0, -3.0, -4.0], [11.0, 12.0, 13.0, 14.0]])

# TODO: Create Model
input_layer = tf.add(tf.matmul(features, weights[0]), biases[0])
input_layer = tf.nn.relu(input_layer)

hidden_layer = tf.add(tf.matmul(input_layer, weights[1]), biases[1])

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    output = sess.run(hidden_layer)
    
# TODO: Print session results
print(output)


[[  5.11000013   8.44000053]
 [  0.           0.        ]
 [ 24.01000214  38.23999786]]

Saving variables

To be able to reuse a model after training it, save the weights and variables.


In [62]:
# The file path to save the data
save_file = './model.ckpt'

# Two Tensor Variables: weights and bias
weights = tf.Variable(tf.truncated_normal([2, 3]))
bias = tf.Variable(tf.truncated_normal([3]))

# Class used to save and/or restore Tensor Variables
saver = tf.train.Saver()

with tf.Session() as sess:
    # Initialize all the Variables
    sess.run(tf.global_variables_initializer())

    # Show the values of weights and bias
    print('Weights:')
    print(sess.run(weights))
    print('Bias:')
    print(sess.run(bias))

    # Save the model
    saver.save(sess, save_file)


Weights:
[[ 0.32390234  0.54774678  0.91753024]
 [ 0.92633474  0.43032318 -0.44317827]]
Bias:
[ 0.29144895 -1.10052252 -0.61645734]

In [64]:
# Remove the previous weights and bias
tf.reset_default_graph()

# Two Variables: weights and bias
weights = tf.Variable(tf.truncated_normal([2, 3]))
bias = tf.Variable(tf.truncated_normal([3]))

# Class used to save and/or restore Tensor Variables
saver = tf.train.Saver()

with tf.Session() as sess:
    # Load the weights and bias
    saver.restore(sess, save_file)

    # Show the values of weights and bias
    print('Weight:')
    print(sess.run(weights))
    print('Bias:')
    print(sess.run(bias))


Weight:
[[ 0.67779112 -0.94861162  1.16522622]
 [-0.89981604  0.48758447  0.4933596 ]]
Bias:
[ 0.8552441   0.92860544  1.94516361]

Saving an entire model after training


In [65]:
# Remove previous Tensors and Operations
tf.reset_default_graph()

from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

learning_rate = 0.001
n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

# Import MNIST data
mnist = input_data.read_data_sets('data/mnist', one_hot=True)

# Features and Labels
features = tf.placeholder(tf.float32, [None, n_input])
labels = tf.placeholder(tf.float32, [None, n_classes])

# Weights & bias
weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))

# Logits - xW + b
logits = tf.add(tf.matmul(features, weights), bias)

# Define loss and optimizer
cost = tf.reduce_mean(\
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\
    .minimize(cost)

# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


Extracting data/mnist/train-images-idx3-ubyte.gz
Extracting data/mnist/train-labels-idx1-ubyte.gz
Extracting data/mnist/t10k-images-idx3-ubyte.gz
Extracting data/mnist/t10k-labels-idx1-ubyte.gz

In [66]:
import math

save_file = './train_model.ckpt'
batch_size = 128
n_epochs = 100

saver = tf.train.Saver()

# Launch the graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # Training cycle
    for epoch in range(n_epochs):
        total_batch = math.ceil(mnist.train.num_examples / batch_size)

        # Loop over all batches
        for i in range(total_batch):
            batch_features, batch_labels = mnist.train.next_batch(batch_size)
            sess.run(
                optimizer,
                feed_dict={features: batch_features, labels: batch_labels})

        # Print status for every 10 epochs
        if epoch % 10 == 0:
            valid_accuracy = sess.run(
                accuracy,
                feed_dict={
                    features: mnist.validation.images,
                    labels: mnist.validation.labels})
            print('Epoch {:<3} - Validation Accuracy: {}'.format(
                epoch,
                valid_accuracy))

    # Save the model
    saver.save(sess, save_file)
    print('Trained Model Saved.')


Epoch 0   - Validation Accuracy: 0.08100000023841858
Epoch 10  - Validation Accuracy: 0.2797999978065491
Epoch 20  - Validation Accuracy: 0.42340001463890076
Epoch 30  - Validation Accuracy: 0.5099999904632568
Epoch 40  - Validation Accuracy: 0.5726000070571899
Epoch 50  - Validation Accuracy: 0.6155999898910522
Epoch 60  - Validation Accuracy: 0.6517999768257141
Epoch 70  - Validation Accuracy: 0.6705999970436096
Epoch 80  - Validation Accuracy: 0.6891999840736389
Epoch 90  - Validation Accuracy: 0.7056000232696533
Trained Model Saved.

In [67]:
saver = tf.train.Saver()

# Launch the graph
with tf.Session() as sess:
    saver.restore(sess, save_file)

    test_accuracy = sess.run(
        accuracy,
        feed_dict={features: mnist.test.images, labels: mnist.test.labels})

print('Test Accuracy: {}'.format(test_accuracy))


Test Accuracy: 0.7305999994277954

simple model with Dropout


In [73]:
import tensorflow as tf

hidden_layer_weights = [
    [0.1, 0.2, 0.4],
    [0.4, 0.6, 0.6],
    [0.5, 0.9, 0.1],
    [0.8, 0.2, 0.8]]
out_weights = [
    [0.1, 0.6],
    [0.2, 0.1],
    [0.7, 0.9]]

# Weights and biases
weights = [
    tf.Variable(hidden_layer_weights),
    tf.Variable(out_weights)]
biases = [
    tf.Variable(tf.zeros(3)),
    tf.Variable(tf.zeros(2))]

# Input
features = tf.Variable([[0.0, 2.0, 3.0, 4.0], [0.1, 0.2, 0.3, 0.4], [11.0, 12.0, 13.0, 14.0]])

# TODO: Create Model with Dropout
keep_prob = tf.placeholder(tf.float32) # probability to keep units

layer_1 = tf.add(tf.matmul(features,weights[0]), biases[0])
layer_1 = tf.nn.relu(layer_1)
layer_1 = tf.nn.dropout(layer_1, keep_prob)

layer_2 = tf.add(tf.matmul(layer_1, weights[1]), biases[1])

# TODO: Print logits from a session

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    output = sess.run(layer_2, feed_dict={keep_prob: 0.5})
    print(output)


[[  9.55999947  16.        ]
 [  0.19600002   0.09800001]
 [  0.           0.        ]]

In [ ]: