In this project, we'll classify images from the CIFAR-10 dataset. The dataset consists of airplanes, dogs, cats, and other objects. We'll preprocess the images, then train a convolutional neural network on all the samples. The images need to be normalized and the labels need to be one-hot encoded. We'll build a convolutional, max pooling, dropout, and fully connected layers. At the end, we'll get to see our neural network's predictions on the sample images.
Run the following cell to download the CIFAR-10 dataset for python.
In [1]:
from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm
import problem_unittests as tests
import tarfile
cifar10_dataset_folder_path = 'cifar-10-batches-py'
# Use Floyd's cifar-10 dataset if present
floyd_cifar10_location = '/input/cifar-10/python.tar.gz'
if isfile(floyd_cifar10_location):
tar_gz_path = floyd_cifar10_location
else:
tar_gz_path = 'cifar-10-python.tar.gz'
class DLProgress(tqdm):
last_block = 0
def hook(self, block_num=1, block_size=1, total_size=None):
self.total = total_size
self.update((block_num - self.last_block) * block_size)
self.last_block = block_num
if not isfile(tar_gz_path):
with DLProgress(unit='B', unit_scale=True, miniters=1, desc='CIFAR-10 Dataset') as pbar:
urlretrieve(
'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz',
tar_gz_path,
pbar.hook)
if not isdir(cifar10_dataset_folder_path):
with tarfile.open(tar_gz_path) as tar:
tar.extractall()
tar.close()
tests.test_folder_path(cifar10_dataset_folder_path)
The dataset is broken into batches to prevent your machine from running out of memory. The CIFAR-10 dataset consists of 5 batches, named data_batch_1
, data_batch_2
, etc.. Each batch contains the labels and images that are one of the following:
Understanding a dataset is part of making predictions on the data. Play around with the code cell below by changing the batch_id
and sample_id
. The batch_id
is the id for a batch (1-5). The sample_id
is the id for a image and label pair in the batch.
In [2]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import helper
import numpy as np
# Explore the dataset
batch_id = 3
sample_id = 2
helper.display_stats(cifar10_dataset_folder_path, batch_id, sample_id)
In [3]:
def normalize(x):
"""
Normalize a list of sample image data in the range of 0 to 1
: x: List of image data. The image shape is (32, 32, 3)
: return: Numpy array of normalize data
"""
#Implement Function
result_normalize = x/255
#print(result_normalize[0])
return result_normalize
tests.test_normalize(normalize)
Just like the previous code cell, we'll be implementing a function for preprocessing. This time, we'll implement the one_hot_encode
function. The input, x
, are a list of labels. Implement the function to return the list of labels as One-Hot encoded Numpy array. The possible values for labels are 0 to 9. The one-hot encoding function should return the same encoding for each value between each call to one_hot_encode
. Make sure to save the map of encodings outside the function.
In [4]:
def one_hot_encode(x):
"""
One hot encode a list of sample labels. Return a one-hot encoded vector for each label.
: x: List of sample Labels
: return: Numpy array of one-hot encoded labels
"""
#Implement Function
#print(x)
result_one_hot_encode = np.eye(10)[x]
#print(result_one_hot_encode)
return result_one_hot_encode
tests.test_one_hot_encode(one_hot_encode)
In [5]:
# Preprocess Training, Validation, and Testing Data
helper.preprocess_and_save_data(cifar10_dataset_folder_path, normalize, one_hot_encode)
In [6]:
import pickle
import problem_unittests as tests
import helper
# Load the Preprocessed Validation data
valid_features, valid_labels = pickle.load(open('preprocess_validation.p', mode='rb'))
For the neural network, we'll build each layer into a function. Most of the code you've seen has been outside of functions. To test your code more thoroughly, we require that you put each layer in a function. This allows us to give you better feedback and test for simple mistakes using our unittests before you submit your project.
Note: None
for shapes in TensorFlow allow for a dynamic size.
In [7]:
import tensorflow as tf
def neural_net_image_input(image_shape):
"""
Return a Tensor for a batch of image input
: image_shape: Shape of the images
: return: Tensor for image input.
"""
#Implement Function
print(image_shape)
return tf.placeholder(tf.float32, [None, image_shape[0], image_shape[1], image_shape[2]], name="x")
def neural_net_label_input(n_classes):
"""
Return a Tensor for a batch of label input
: n_classes: Number of classes
: return: Tensor for label input.
"""
#Implement Function
#print(n_classes)
return tf.placeholder(tf.float32, [None, n_classes], name="y")
def neural_net_keep_prob_input():
"""
Return a Tensor for keep probability
: return: Tensor for keep probability.
"""
# Implement Function
return tf.placeholder(tf.float32, name="keep_prob")
tf.reset_default_graph()
tests.test_nn_image_inputs(neural_net_image_input)
tests.test_nn_label_inputs(neural_net_label_input)
tests.test_nn_keep_prob_inputs(neural_net_keep_prob_input)
In [8]:
def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides):
"""
Apply convolution then max pooling to x_tensor
:param x_tensor: TensorFlow Tensor
:param conv_num_outputs: Number of outputs for the convolutional layer
:param conv_ksize: kernal size 2-D Tuple for the convolutional layer
:param conv_strides: Stride 2-D Tuple for convolution
:param pool_ksize: kernal size 2-D Tuple for pool
:param pool_strides: Stride 2-D Tuple for pool
: return: A tensor that represents convolution and max pooling of x_tensor
"""
#Implement Function
print(conv_ksize)
print(x_tensor.shape)
print(conv_num_outputs)
weights = tf.Variable(tf.truncated_normal((conv_ksize[0], conv_ksize[1], int(x_tensor.shape[3]), conv_num_outputs ),\
mean=0, stddev=0.1))
bias = tf.Variable(tf.zeros(conv_num_outputs))
print(conv_strides)
conv_layer = tf.nn.conv2d(x_tensor, weights, strides=[1, conv_strides[0], conv_strides[1], 1], padding = 'SAME')
conv_layer = tf.nn.bias_add(conv_layer, bias)
conv_layer = tf.nn.relu(conv_layer)
print(pool_ksize)
print(pool_strides)
conv_layer = tf.nn.max_pool(conv_layer, ksize=[1, pool_ksize[0], pool_ksize[1], 1], \
strides=[1, pool_strides[0], pool_strides[1], 1], padding='SAME' )
return conv_layer
tests.test_con_pool(conv2d_maxpool)
In [9]:
def flatten(x_tensor):
"""
Flatten x_tensor to (Batch Size, Flattened Image Size)
: x_tensor: A tensor of size (Batch Size, ...), where ... are the image dimensions.
: return: A tensor of size (Batch Size, Flattened Image Size).
"""
#Implement Function
print(x_tensor.shape)
flattened_x_tensor = tf.reshape(x_tensor, [-1, int(x_tensor.shape[1]) * int(x_tensor.shape[2]) * int(x_tensor.shape[3]) ])
print(flattened_x_tensor.shape)
return flattened_x_tensor
tests.test_flatten(flatten)
In [10]:
def fully_conn(x_tensor, num_outputs):
"""
Apply a fully connected layer to x_tensor using weight and bias
: x_tensor: A 2-D tensor where the first dimension is batch size.
: num_outputs: The number of output that the new tensor should be.
: return: A 2-D tensor where the second dimension is num_outputs.
"""
# Implement Function
print(x_tensor.shape)
print(num_outputs)
weights = tf.Variable(tf.truncated_normal( (int(x_tensor.shape[1]), num_outputs), mean=0, stddev=0.1 ) )
bias = tf.Variable(tf.zeros(num_outputs))
layer = tf.add(tf.matmul(x_tensor, weights), bias)
print(layer.shape)
return layer
tests.test_fully_conn(fully_conn)
In [33]:
def output(x_tensor, num_outputs):
"""
Apply a output layer to x_tensor using weight and bias
: x_tensor: A 2-D tensor where the first dimension is batch size.
: num_outputs: The number of output that the new tensor should be.
: return: A 2-D tensor where the second dimension is num_outputs.
"""
#Implement Function
print(x_tensor.shape)
print(num_outputs)
weights = tf.Variable(tf.truncated_normal((int(x_tensor.shape[1]), num_outputs), mean=0, stddev=0.1 ) )
bias = tf.Variable(tf.zeros(num_outputs))
layer = tf.add(tf.matmul(x_tensor, weights), bias )
print(layer.shape)
return layer
tests.test_output(output)
Implement the function conv_net
to create a convolutional neural network model. The function takes in a batch of images, x
, and outputs logits. Use the layers you created above to create this model:
In [35]:
def conv_net(x, keep_prob):
"""
Create a convolutional neural network model
: x: Placeholder tensor that holds image data.
: keep_prob: Placeholder tensor that hold dropout keep probability.
: return: Tensor that represents logits
"""
# Apply 1, 2, or 3 Convolution and Max Pool layers
# Play around with different number of outputs, kernel size and stride
# Function Definition from Above:
# conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides)
conv1 = conv2d_maxpool(x, 64, (5,5), (1,1), (3,3), (2,2))
#conv1 = conv2d_maxpool(x, 64, (8,8), (1,1), (3,3), (2,2))
#conv1 = conv2d_maxpool(x, 64, (5,5), (1,1), (3,3), (2,2))
#print('dsdsa')
#print(conv1.shape)
conv2 = conv2d_maxpool(conv1, 128, (3,3), (1,1), (2,2), (2,2))
#conv2 = conv2d_maxpool(conv1, 32, (4,4), (1,1), (3,3), (2,2))
#conv2 = conv2d_maxpool(conv1, 32, (3,3), (1,1), (3,3), (2,2))
conv3 = conv2d_maxpool(conv2, 256, (2,2), (1,1), (2,2), (2,2))
#conv3 = conv2d_maxpool(conv2, 16, (2,2), (1,1), (2,2), (2,2))
#conv3 = conv2d_maxpool(conv2, 16, (2,2), (1,1), (2,2), (2,2) )
# Apply a Flatten Layer
# Function Definition from Above:
# flatten(x_tensor)
fc1 = flatten(conv3)
# Apply 1, 2, or 3 Fully Connected Layers
# Play around with different number of outputs
# Function Definition from Above:
# fully_conn(x_tensor, num_outputs)
fc1 = fully_conn(fc1, 1024)
fc1 = fully_conn(fc1, 512)
fc1 = fully_conn(fc1, 256)
#fc1 = fully_conn(fc1, 256)
#c1 = fully_conn(fc1, 256)
fc1 = tf.nn.relu(fc1)
# Apply an Output Layer
# Set this to the number of classes
# Function Definition from Above:
# output(x_tensor, num_outputs)
fc1 = tf.nn.dropout(fc1, keep_prob)
outputvar = output(fc1, 10)
# return output
return outputvar
##############################
## Build the Neural Network ##
##############################
# Remove previous weights, bias, inputs, etc..
tf.reset_default_graph()
# Inputs
x = neural_net_image_input((32, 32, 3))
y = neural_net_label_input(10)
keep_prob = neural_net_keep_prob_input()
# Model
logits = conv_net(x, keep_prob)
# Name logits Tensor, so that is can be loaded from disk after training
logits = tf.identity(logits, name='logits')
# Loss and Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
tests.test_conv_net(conv_net)
Implement the function train_neural_network
to do a single optimization. The optimization should use optimizer
to optimize in session
with a feed_dict
of the following:
x
for image inputy
for labelskeep_prob
for keep probability for dropoutThis function will be called for each batch, so tf.global_variables_initializer()
has already been called.
Note: Nothing needs to be returned. This function is only optimizing the neural network.
In [36]:
def train_neural_network(session, optimizer, keep_probability, feature_batch, label_batch):
"""
Optimize the session on a batch of images and labels
: session: Current TensorFlow session
: optimizer: TensorFlow optimizer function
: keep_probability: keep probability
: feature_batch: Batch of Numpy image data
: label_batch: Batch of Numpy label data
"""
# Implement Function
#print(optimizer)
#print(feature_batch.size)
#cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=feature_batch, labels=label_batch))
#optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
session.run(optimizer, feed_dict={x: feature_batch, y:label_batch, keep_prob:keep_probability})
tests.test_train_nn(train_neural_network)
In [37]:
def print_stats(session, feature_batch, label_batch, cost, accuracy):
"""
Print information about loss and validation accuracy
: session: Current TensorFlow session
: feature_batch: Batch of Numpy image data
: label_batch: Batch of Numpy label data
: cost: TensorFlow cost function
: accuracy: TensorFlow accuracy function
"""
# Implement Function
loss = session.run(cost, feed_dict={x:feature_batch, y:label_batch, keep_prob:1.})
valid_acc = session.run(accuracy, feed_dict={x:valid_features, y:valid_labels, keep_prob:1.})
print ('Loss: {:>10.4f} Validation Accuracy: {:.6f} '.format(loss,valid_acc))
In [38]:
# Tune Parameters
epochs = 40
batch_size = 128
keep_probability = 0.5
Instead of training the neural network on all the CIFAR-10 batches of data, let's use a single batch. This should save time while you iterate on the model to get a better accuracy. Once the final validation accuracy is 50% or greater, run the model on all the data in the next section.
In [39]:
print('Checking the Training on a Single Batch...')
with tf.Session() as sess:
# Initializing the variables
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(epochs):
batch_i = 1
for batch_features, batch_labels in helper.load_preprocess_training_batch(batch_i, batch_size):
train_neural_network(sess, optimizer, keep_probability, batch_features, batch_labels)
print('Epoch {:>2}, CIFAR-10 Batch {}: '.format(epoch + 1, batch_i), end='')
print_stats(sess, batch_features, batch_labels, cost, accuracy)
In [40]:
save_model_path = './image_classification'
print('Training...')
with tf.Session() as sess:
# Initializing the variables
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(epochs):
# Loop over all batches
n_batches = 5
for batch_i in range(1, n_batches + 1):
for batch_features, batch_labels in helper.load_preprocess_training_batch(batch_i, batch_size):
train_neural_network(sess, optimizer, keep_probability, batch_features, batch_labels)
print('Epoch {:>2}, CIFAR-10 Batch {}: '.format(epoch + 1, batch_i), end='')
print_stats(sess, batch_features, batch_labels, cost, accuracy)
# Save Model
saver = tf.train.Saver()
save_path = saver.save(sess, save_model_path)
In [41]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import tensorflow as tf
import pickle
import helper
import random
# Set batch size if not already set
try:
if batch_size:
pass
except NameError:
batch_size = 64
save_model_path = './image_classification'
n_samples = 4
top_n_predictions = 3
def test_model():
"""
Test the saved model against the test dataset
"""
test_features, test_labels = pickle.load(open('preprocess_test.p', mode='rb'))
loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
# Load model
loader = tf.train.import_meta_graph(save_model_path + '.meta')
loader.restore(sess, save_model_path)
# Get Tensors from loaded model
loaded_x = loaded_graph.get_tensor_by_name('x:0')
loaded_y = loaded_graph.get_tensor_by_name('y:0')
loaded_keep_prob = loaded_graph.get_tensor_by_name('keep_prob:0')
loaded_logits = loaded_graph.get_tensor_by_name('logits:0')
loaded_acc = loaded_graph.get_tensor_by_name('accuracy:0')
# Get accuracy in batches for memory limitations
test_batch_acc_total = 0
test_batch_count = 0
for test_feature_batch, test_label_batch in helper.batch_features_labels(test_features, test_labels, batch_size):
test_batch_acc_total += sess.run(
loaded_acc,
feed_dict={loaded_x: test_feature_batch, loaded_y: test_label_batch, loaded_keep_prob: 1.0})
test_batch_count += 1
print('Testing Accuracy: {}\n'.format(test_batch_acc_total/test_batch_count))
# Print Random Samples
random_test_features, random_test_labels = tuple(zip(*random.sample(list(zip(test_features, test_labels)), n_samples)))
random_test_predictions = sess.run(
tf.nn.top_k(tf.nn.softmax(loaded_logits), top_n_predictions),
feed_dict={loaded_x: random_test_features, loaded_y: random_test_labels, loaded_keep_prob: 1.0})
helper.display_image_predictions(random_test_features, random_test_labels, random_test_predictions)
test_model()