In [1]:
import tensorflow as tf
# Create TensorFlow object called hello_constant
hello_constant = tf.constant('Hello World!')
with tf.Session() as sess:
# Run the tf.constant operation in the session
output = sess.run(hello_constant)
print(output)
tensorflow stores data as tensors, which is an onject which can store stuff.
constant data is stored as a tf.constant. The output of a constant tensor doesn't change:
In [2]:
# A is a 0-dimensional int32 tensor
A = tf.constant(1234)
# B is a 1-dimensional int32 tensor
B = tf.constant([123,456,789])
# C is a 2-dimensional int32 tensor
C = tf.constant([ [123,456,789], [222,333,444] ])
A Tensorflow session is an environment for running a graph.
with tf.Session() as sess:
output = sess.run(hello_constant)
The sess.run function runs the hello_constant tensor created above.
Tensorflow can't just take a dataset x as in input, it has to be a tensor. tf.placeholder is used for dynamic data, which is fed into a tensor part at a time.
In [3]:
x = tf.placeholder(tf.string)
y = tf.placeholder(tf.int32)
z = tf.placeholder(tf.float32)
with tf.Session() as sess:
output = sess.run(x, feed_dict={x: 'Test String', y: 123, z: 45.67})
print(output)
In [4]:
x = tf.add(5, 2) # 7
y = tf.subtract(10, 4) # 6
z = tf.multiply(2, 5) # 10
print(x ,y ,z)
math operations have to use tensors of the same type, so watch out for floats, ints, etc. The below fails becuase that tries to subtract an int from a float:
tf.subtract(tf.constant(2.0),tf.constant(1))
Use tf.cast to convert a tensor to another type:
In [6]:
tf.subtract(tf.cast(tf.constant(2.0), tf.int32), tf.constant(1)) # 1
Out[6]:
to print the value of a tensorflow variable, you have to run it in a session:
In [7]:
x = tf.add(5, 2) # 7
with tf.Session() as s:
output = s.run(x)
print(x) # prints out a tensor object
output # prints the output of the tensor x
Out[7]:
In [8]:
# TODO: Convert the following to TensorFlow:
x = tf.constant(10)
y = tf.constant(2)
x_minus_y = tf.divide(x,y)
z = tf.subtract(x_minus_y, tf.cast(1.0, tf.float64))
# TODO: Print z from a session
with tf.Session() as sess:
output = sess.run(z)
print(output)
this is the central building block of ML.
Logistic classifiers use a linear function like Wx + b = y and a softmax funtion to assign probablities to output.
Softmax can take any kind of scores and turn them into probalities.
The most common operation in neural networks is calculating the linear combination of inputs, weights, and biases.
Here, W is a matrix of the weights connecting two layers. The output y, the input x, and the biases b are all vectors.
Both weights and biases need to be modified as the NN trains, so they use a tf.variable class:
In [9]:
x = tf.Variable(5)
the tf.Variable class stores its state in the session, so it must be initialized manually. the tf.global_variables_initializer() function initializes all variable tensors.
In [10]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
We need an initial value for weights and biases, and normally we just use random values from a normal distrubution, for which we can use the tf.truncated_normal() function.
In [11]:
n_features = 120
n_labels = 5
weights = tf.Variable(tf.truncated_normal((n_features, n_labels)))
weights.initial_value
Out[11]:
The weights are already randomized, so there isn't really a need to randominize the bias, so we can use the tf.zeros function to generate a tensor with zeros.
In [12]:
n_labels = 5
bias = tf.Variable(tf.zeros(n_labels))
bias.initial_value
Out[12]:
softmax returns an array of prob values
In [13]:
# how i used softmax - fed softmax the logit_data directly
def run():
output = None
logit_data = [2.0, 1.0, 0.1]
logits = tf.placeholder(tf.float32)
# TODO: Calculate the softmax of the logits
softmax = tf.nn.softmax([2.0, 1.0, 0.1])
with tf.Session() as sess:
# TODO: Feed in the logit data
output = sess.run(softmax)
return output
run()
Out[13]:
In [13]:
# udacity's version - they made a feed_dict
def run():
output = None
logit_data = [2.0, 1.0, 0.1]
logits = tf.placeholder(tf.float32)
softmax = tf.nn.softmax(logits)
with tf.Session() as sess:
output = sess.run(softmax, feed_dict={logits: logit_data})
return output
run()
Out[13]:
Use LabelBinarizer to turn labels into one hot encoded vectors:
In [14]:
import numpy as np
from sklearn import preprocessing
# Example labels
labels = np.array([1,5,3,2,1,4,2,1,3])
# Create the encoder
lb = preprocessing.LabelBinarizer()
# Here the encoder finds the classes and assigns one-hot vectors
lb.fit(labels)
# And finally, transform the labels into one-hot encoded vectors
lb.transform(labels)
Out[14]:
We use tf.reduce_sum and tf.log to calculate cross-entropy in tensorflow
In [15]:
x = tf.reduce_sum([1, 2, 3, 4, 5]) # 15
In [16]:
x = tf.log(100.0) # 4.60517
In [17]:
softmax_data = [0.7, 0.2, 0.1]
one_hot_data = [1.0, 0.0, 0.0]
softmax = tf.placeholder(tf.float32)
one_hot = tf.placeholder(tf.float32)
# TODO: Print cross entropy from session
cross_entropy = -tf.reduce_sum(tf.multiply(one_hot, tf.log(softmax)))
with tf.Session() as sess:
output = sess.run(cross_entropy, feed_dict={softmax: softmax_data, one_hot: one_hot_data})
print(output)
Data should have zero mean and equal variance, else it can mess up all the maths. two main points:
In the example below, using a bg vs small initial number leads to a different result:
In [18]:
a, b = 1000000000, 1
for i in range(1000000):
a = a + 1e-6
b = b + 1e-6
a - 1000000000, b-1
Out[18]:
There are many ways to initialize weights but a good rule of thumb is to start as a random distrubution around zero with a small standard deviation. A small sigma implies uncertainity, while large sigma implies certainity, so it's good to start with a small standard dev.
neural networks try to memorize the training sets. Simplest way to fix this is to seperate datasets into training and test data, and measure performance on the test data. But, since we tune the NN to do well on the test data, indirectly it is learning the test data too.
what this does is makes the NN specialized to our existing dataset so it doesn't work well in the real world.
So we fix this by taking another chunk of our training data and hide it, only looking at it once we have done optimizing the NN. So we end up with three sets:
Linear descent is computing intensive at scale. We compute the gradient for every single element in the training set, which is a lot of compute for big sets. Since gradient descent is iterative you have to go through your data many times.
Stochastic Gradient Descent is a shortcut to speed things up tremondously. It takes a small random random fraction of the data (say b/w 1 and a 1000 samples) and computes the average loss and gradient. This is only a estimate of the actual loss and gradient, so can be wrong so we take many small steps instead of one large correct step. However since it is computationaly so much faster, this works out much more faster.
This scales well with data and hence is at the core of deep learning, but it comes with issues in practice. Some ways we help SGD:
Since there are many parameters to tune, SGD is sensitive to parameters.ADAGRAD is a version of SGD which does learning rate decay and momentum, which makes learning less sensitive to hyperparameters.
In [19]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# Import MNIST data
#mnist.SOURCE_URL = 'https://s3.amazonaws.com/lasagne/recipes/datasets/mnist/'
mnist = input_data.read_data_sets('data/mnist', one_hot=True)
# The features are already scaled and the data is shuffled
train_features = mnist.train.images
test_features = mnist.test.images
train_labels = mnist.train.labels.astype(np.float32)
test_labels = mnist.test.labels.astype(np.float32)
# Weights & bias
weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))
Calculating how much memory in bytes the inputs, weights and bias use. Note: they all contain float32 which is 4 bytes in size.
In [20]:
print(train_features.shape)
train_features.shape[0] * train_features.shape[1] * 4
Out[20]:
In [45]:
train_labels.shape[0] * train_labels.shape[1] * 4
Out[45]:
In [51]:
weights.get_shape()[0] * weights.get_shape()[1] * 4
Out[51]:
In [57]:
bias.get_shape()[0] * 4
Out[57]:
The total data is about 175 megabytes, which easily fits into memory, but most other datasets won't, hence the need for breaking up datasets into batches.
Moving on to making batches
In [21]:
import math
def batches(batch_size, features, labels):
"""
Create batches of features and labels
:param batch_size: The batch size
:param features: List of features
:param labels: List of labels
:return: Batches of (Features, Labels)
"""
assert len(features) == len(labels)
# TODO: Implement batching
batches = math.ceil(len(labels) / batch_size)
last_batch_size = len(labels) - (batch_size * (batches - 1))
output = []
for b in range(batches-1):
start = b * batch_size
end = (b+1) * batch_size
batch = [features[start:end], labels[start:end]]
output.append(batch)
last_one = (batches - 1) * batch_size
output.append([features[last_one:], labels[last_one:]])
return output
Testing out the batch feature:
In [22]:
import numpy as np
learning_rate = 0.001
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# Import MNIST data - already imported above
# The features are already scaled and the data is shuffled
train_features = mnist.train.images
test_features = mnist.test.images
train_labels = mnist.train.labels.astype(np.float32)
test_labels = mnist.test.labels.astype(np.float32)
# Features and Labels
features = tf.placeholder(tf.float32, [None, n_input])
labels = tf.placeholder(tf.float32, [None, n_classes])
# Weights & bias
weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))
# Logits - xW + b
logits = tf.add(tf.matmul(features, weights), bias)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# TODO: Set batch size
batch_size = 128
assert batch_size is not None, 'You must set the batch size'
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# TODO: Train optimizer on all batches
for batch_features, batch_labels in batches(batch_size, train_features, train_labels):
sess.run(optimizer, feed_dict={features: batch_features, labels: batch_labels})
# Calculate accuracy for test dataset
test_accuracy = sess.run(
accuracy,
feed_dict={features: test_features, labels: test_labels})
print('Test Accuracy: {}'.format(test_accuracy))
In [28]:
def print_epoch_stats(epoch_i, sess, last_features, last_labels):
"""
Print cost and validation accuracy of an epoch
"""
current_cost = sess.run(
cost,
feed_dict={features: last_features, labels: last_labels})
valid_accuracy = sess.run(
accuracy,
feed_dict={features: valid_features, labels: valid_labels})
print('Epoch: {:<4} - Cost: {:<8.3} Valid Accuracy: {:<5.3}'.format(
epoch_i,
current_cost,
valid_accuracy))
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# Import MNIST data - already imported above
# The features are already scaled and the data is shuffled
train_features = mnist.train.images
valid_features = mnist.validation.images
test_features = mnist.test.images
train_labels = mnist.train.labels.astype(np.float32)
valid_labels = mnist.validation.labels.astype(np.float32)
test_labels = mnist.test.labels.astype(np.float32)
# Features and Labels
features = tf.placeholder(tf.float32, [None, n_input])
labels = tf.placeholder(tf.float32, [None, n_classes])
# Weights & bias
weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))
# Logits - xW + b
logits = tf.add(tf.matmul(features, weights), bias)
# Define loss and optimizer
learning_rate = tf.placeholder(tf.float32)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.global_variables_initializer()
batch_size = 128
epochs = 50
learn_rate = 0.01
train_batches = batches(batch_size, train_features, train_labels)
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch_i in range(epochs):
# Loop over all batches
for batch_features, batch_labels in train_batches:
train_feed_dict = {
features: batch_features,
labels: batch_labels,
learning_rate: learn_rate}
sess.run(optimizer, feed_dict=train_feed_dict)
# Print cost and validation accuracy of an epoch
print_epoch_stats(epoch_i, sess, batch_features, batch_labels)
# Calculate accuracy for test dataset
test_accuracy = sess.run(
accuracy,
feed_dict={features: test_features, labels: test_labels})
print('Test Accuracy: {}'.format(test_accuracy))
In [29]:
# Solution is available in the other "solution.py" tab
import tensorflow as tf
output = None
hidden_layer_weights = [
[0.1, 0.2, 0.4],
[0.4, 0.6, 0.6],
[0.5, 0.9, 0.1],
[0.8, 0.2, 0.8]]
out_weights = [
[0.1, 0.6],
[0.2, 0.1],
[0.7, 0.9]]
# Weights and biases
weights = [
tf.Variable(hidden_layer_weights),
tf.Variable(out_weights)]
biases = [
tf.Variable(tf.zeros(3)),
tf.Variable(tf.zeros(2))]
# Input
features = tf.Variable([[1.0, 2.0, 3.0, 4.0], [-1.0, -2.0, -3.0, -4.0], [11.0, 12.0, 13.0, 14.0]])
# TODO: Create Model
input_layer = tf.add(tf.matmul(features, weights[0]), biases[0])
input_layer = tf.nn.relu(input_layer)
hidden_layer = tf.add(tf.matmul(input_layer, weights[1]), biases[1])
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
output = sess.run(hidden_layer)
# TODO: Print session results
print(output)
In [62]:
# The file path to save the data
save_file = './model.ckpt'
# Two Tensor Variables: weights and bias
weights = tf.Variable(tf.truncated_normal([2, 3]))
bias = tf.Variable(tf.truncated_normal([3]))
# Class used to save and/or restore Tensor Variables
saver = tf.train.Saver()
with tf.Session() as sess:
# Initialize all the Variables
sess.run(tf.global_variables_initializer())
# Show the values of weights and bias
print('Weights:')
print(sess.run(weights))
print('Bias:')
print(sess.run(bias))
# Save the model
saver.save(sess, save_file)
In [64]:
# Remove the previous weights and bias
tf.reset_default_graph()
# Two Variables: weights and bias
weights = tf.Variable(tf.truncated_normal([2, 3]))
bias = tf.Variable(tf.truncated_normal([3]))
# Class used to save and/or restore Tensor Variables
saver = tf.train.Saver()
with tf.Session() as sess:
# Load the weights and bias
saver.restore(sess, save_file)
# Show the values of weights and bias
print('Weight:')
print(sess.run(weights))
print('Bias:')
print(sess.run(bias))
In [65]:
# Remove previous Tensors and Operations
tf.reset_default_graph()
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
learning_rate = 0.001
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# Import MNIST data
mnist = input_data.read_data_sets('data/mnist', one_hot=True)
# Features and Labels
features = tf.placeholder(tf.float32, [None, n_input])
labels = tf.placeholder(tf.float32, [None, n_classes])
# Weights & bias
weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))
# Logits - xW + b
logits = tf.add(tf.matmul(features, weights), bias)
# Define loss and optimizer
cost = tf.reduce_mean(\
tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\
.minimize(cost)
# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
In [66]:
import math
save_file = './train_model.ckpt'
batch_size = 128
n_epochs = 100
saver = tf.train.Saver()
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(n_epochs):
total_batch = math.ceil(mnist.train.num_examples / batch_size)
# Loop over all batches
for i in range(total_batch):
batch_features, batch_labels = mnist.train.next_batch(batch_size)
sess.run(
optimizer,
feed_dict={features: batch_features, labels: batch_labels})
# Print status for every 10 epochs
if epoch % 10 == 0:
valid_accuracy = sess.run(
accuracy,
feed_dict={
features: mnist.validation.images,
labels: mnist.validation.labels})
print('Epoch {:<3} - Validation Accuracy: {}'.format(
epoch,
valid_accuracy))
# Save the model
saver.save(sess, save_file)
print('Trained Model Saved.')
In [67]:
saver = tf.train.Saver()
# Launch the graph
with tf.Session() as sess:
saver.restore(sess, save_file)
test_accuracy = sess.run(
accuracy,
feed_dict={features: mnist.test.images, labels: mnist.test.labels})
print('Test Accuracy: {}'.format(test_accuracy))
In [73]:
import tensorflow as tf
hidden_layer_weights = [
[0.1, 0.2, 0.4],
[0.4, 0.6, 0.6],
[0.5, 0.9, 0.1],
[0.8, 0.2, 0.8]]
out_weights = [
[0.1, 0.6],
[0.2, 0.1],
[0.7, 0.9]]
# Weights and biases
weights = [
tf.Variable(hidden_layer_weights),
tf.Variable(out_weights)]
biases = [
tf.Variable(tf.zeros(3)),
tf.Variable(tf.zeros(2))]
# Input
features = tf.Variable([[0.0, 2.0, 3.0, 4.0], [0.1, 0.2, 0.3, 0.4], [11.0, 12.0, 13.0, 14.0]])
# TODO: Create Model with Dropout
keep_prob = tf.placeholder(tf.float32) # probability to keep units
layer_1 = tf.add(tf.matmul(features,weights[0]), biases[0])
layer_1 = tf.nn.relu(layer_1)
layer_1 = tf.nn.dropout(layer_1, keep_prob)
layer_2 = tf.add(tf.matmul(layer_1, weights[1]), biases[1])
# TODO: Print logits from a session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output = sess.run(layer_2, feed_dict={keep_prob: 0.5})
print(output)
In [ ]: