In [ ]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('.', one_hot=True)
In [ ]:
import tensorflow as tf
load_path = './2_simple_model'
with tf.Session() as sess:
saver = tf.train.import_meta_graph(load_path + '.meta')
saver.restore(sess, load_path)
graph = tf.get_default_graph()
features = graph.get_tensor_by_name('features:0')
labels = graph.get_tensor_by_name('labels:0')
accuracy = graph.get_tensor_by_name('accuracy:0')
print("Accuracy:", accuracy.eval({features: mnist.test.images, labels: mnist.test.labels}))
In [ ]:
import os
import hashlib
from urllib.request import urlretrieve
def download(url, file):
"""
Download file from <url>
:param url: URL to file
:param file: Local file path
"""
if not os.path.isfile(file):
print('Downloading ' + file + '...')
urlretrieve(url, file)
print('Download Finished')
# Download the training and test dataset.
download('http://s3.amazonaws.com/udacity-sdc/notMNIST_train.zip', 'notMNIST_train.zip')
download('http://s3.amazonaws.com/udacity-sdc/notMNIST_test.zip', 'notMNIST_test.zip')
# Make sure the files aren't corrupted
assert hashlib.md5(open('notMNIST_train.zip', 'rb').read()).hexdigest() == 'c8673b3f28f489e9cdf3a3d74e2ac8fa',\
'notMNIST_train.zip file is corrupted. Remove the file and try again.'
assert hashlib.md5(open('notMNIST_test.zip', 'rb').read()).hexdigest() == '5d3c7e653e63471c88df796156a9dfa9',\
'notMNIST_test.zip file is corrupted. Remove the file and try again.'
# Wait until you see that all files have been downloaded.
print('All files downloaded.')
In [ ]:
from zipfile import ZipFile
import numpy as np
from tqdm import tqdm
from PIL import Image
def uncompress_features_labels(file):
"""
Uncompress features and labels from a zip file
:param file: The zip file to extract the data from
"""
features = []
labels = []
with ZipFile(file) as zipf:
# Progress Bar
filenames_pbar = tqdm(zipf.namelist(), unit='files')
# Get features and labels from all files
for filename in filenames_pbar:
# Check if the file is a directory
if not filename.endswith('/'):
with zipf.open(filename) as image_file:
image = Image.open(image_file)
image.load()
# Load image data as 1 dimensional array
# We're using float32 to save on memory space
feature = np.array(image, dtype=np.float32).flatten()
# Get the the letter from the filename. This is the letter of the image.
label = os.path.split(filename)[1][0]
features.append(feature)
labels.append(label)
return np.array(features), np.array(labels)
# Get the features and labels from the zip files
train_features, train_labels = uncompress_features_labels('notMNIST_train.zip')
test_features, test_labels = uncompress_features_labels('notMNIST_test.zip')
In [ ]:
print('\nStats:')
print('Training Samples: {}'.format(len(train_features)))
print(dict(zip(*np.unique(train_labels, return_counts=True))))
print('NO VALIDATION DATA')
print('Testing Samples: {}'.format(len(test_features)))
print(dict(zip(*np.unique(test_labels, return_counts=True))))
sample_image = train_features[0]
sample_label = train_labels[0]
print('\nExamples:')
print('Image - Min: {} Max: {}'.format(sample_image.min(), sample_image.max()))
print('Label: {}'.format(sample_label))
print('First 15 Labels: {}'.format(train_labels[:15]))
In [ ]:
print('Max: {} Min: {}'.format(train_features.max(), train_features.min()))
train_features = train_features/255
test_features = test_features/255
print('Max: {} Min: {}'.format(train_features.max(), train_features.min()))
In [ ]:
from sklearn.preprocessing import LabelBinarizer
# Turn labels into numbers and apply One-Hot Encoding
encoder = LabelBinarizer()
encoder.fit(train_labels)
train_labels = encoder.transform(train_labels)
test_labels = encoder.transform(test_labels)
# Change to float32, so it can be multiplied against the features in TensorFlow, which are float32
train_labels = train_labels.astype(np.float32)
test_labels = test_labels.astype(np.float32)
In [ ]:
from sklearn.model_selection import train_test_split
# Get randomized datasets for training and validation
train_features, valid_features, train_labels, valid_labels = train_test_split(
train_features,
train_labels,
test_size=0.05,
random_state=832289)
In [ ]:
import tensorflow as tf
load_path = './2_simple_model'
with tf.Session() as sess:
saver = tf.train.import_meta_graph(load_path + '.meta')
saver.restore(sess, load_path)
graph = tf.get_default_graph()
features = graph.get_tensor_by_name('features:0')
labels = graph.get_tensor_by_name('labels:0')
accuracy = graph.get_tensor_by_name('accuracy:0')
# Print accuracy
print("Accuracy:", accuracy.eval({features: test_features, labels: test_labels}))
In [ ]:
# Parameters
epochs = 16
batch_size = 100
with tf.Session() as sess:
saver = tf.train.import_meta_graph(load_path + '.meta')
saver.restore(sess, load_path)
graph = tf.get_default_graph()
optimizer = graph.get_operation_by_name('optimizer')
features = graph.get_tensor_by_name('features:0')
labels = graph.get_tensor_by_name('labels:0')
cost = graph.get_tensor_by_name('cost:0')
accuracy = graph.get_tensor_by_name('accuracy:0')
for epoch in range(epochs):
for batch_i in range(batch_size):
# Get a batch of training features and labels
batch_start = batch_i*batch_size
batch_features = train_features[batch_start:batch_start + batch_size]
batch_labels = train_labels[batch_start:batch_start + batch_size]
_, loss = sess.run([optimizer, cost], feed_dict={features: batch_features, labels: batch_labels})
# Print Loss
print('Epoch {:>2} - Loss: {}'.format(epoch, loss))
# Print accuracy
print("Accuracy:", accuracy.eval({features: test_features, labels: test_labels}))