In [1]:
from __future__ import print_function
#Basic libraries
import numpy as np
import tensorflow as tf
print('Tensorflow version: ', tf.__version__)
import time
#Show images
import matplotlib.pyplot as plt
%matplotlib inline
# plt configuration
plt.rcParams['figure.figsize'] = (10, 10) # size of images
plt.rcParams['image.interpolation'] = 'nearest' # show exact image
plt.rcParams['image.cmap'] = 'gray' # use grayscale
In [2]:
# Select GPU
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
In [3]:
# Import mnist dataset and rescale between [0,1]
from tensorflow.contrib.keras import datasets
(X_train, y_train), (X_test, y_test) = datasets.mnist.load_data(path='mnist.npz')
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.
X_test /= 255.
print('X train shape: ',X_train.shape)
print('y train shape: ',y_train.shape)
print('X test shape: ', X_test.shape)
print('y test shape: ', y_test.shape)
In [4]:
#Examine the data
fig = plt.figure()
for n, i in enumerate(range(32)):
a = fig.add_subplot(4,8,n+1)
a.set_title(str(y_test[i]))
fig.tight_layout()
plt.imshow(X_test[i])
In [5]:
from tensorflow.contrib.keras import models, layers, optimizers, callbacks
print('Linear model...')
images = layers.Input(batch_shape=(None, 28, 28), dtype='float32', name='Images')
flat = layers.Flatten(name='Flat_image')(images)
output = layers.Dense(10, activation='softmax', name='Dense_output')(flat)
# Model Architecture defined
model_linear = models.Model(inputs=images, outputs=output)
model_linear.summary()
In [6]:
# Select optimizer and compile model
sgd_optimizer = optimizers.SGD(lr=0.01)
model_linear.compile(loss='sparse_categorical_crossentropy',
optimizer=sgd_optimizer, metrics=['accuracy'])
In [7]:
# Train the model
start = time.time()
tb_callback_ln = callbacks.TensorBoard(log_dir='/tmp/tensorboard/keras/linear/')
history_linear = model_linear.fit(X_train, y_train, batch_size=128, epochs=50,
verbose=1, validation_data=(X_test, y_test), callbacks=[tb_callback_ln])
print('Seconds to train: ', time.time() - start)
In [8]:
plt.plot(history_linear.history['loss'], label='acc')
plt.plot(history_linear.history['val_loss'], label='val acc')
plt.legend(loc='lower right')
plt.show()
In [9]:
# Score and select prediction with max prob
import numpy as np
p_test = model_linear.predict(X_test)
pred_test = np.argmax(p_test, axis=1)
#Evaluate the confusion matrix
from sklearn.metrics import confusion_matrix
conf_matrix = confusion_matrix(y_test, pred_test)
print(conf_matrix)
In [10]:
import seaborn as sns
# Plot confussion matrix
target_names = ['0','1','2','3','4','5','6','7','8','9']
sns.set(rc={'figure.figsize': (18, 6)})
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap="Blues",
xticklabels=target_names, yticklabels=target_names)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
In [11]:
print('NN model...')
images = layers.Input(batch_shape=(None, 28, 28), dtype='float32', name='Images')
flat = layers.Flatten(name='Flat_image')(images)
dense = layers.Dense(500, activation='relu', name='Dense_layer')(flat)
output = layers.Dense(10, activation='softmax', name='Dense_output')(dense)
# Model Architecture defined
model_nn = models.Model(inputs=images, outputs=output)
model_nn.summary()
In [12]:
sgd_optimizer = optimizers.SGD(lr=0.01)
model_nn.compile(loss='sparse_categorical_crossentropy',
optimizer=sgd_optimizer, metrics=['accuracy'])
In [13]:
# Train the model
start = time.time()
tb_callback_nn = callbacks.TensorBoard(log_dir='/tmp/tensorboard/keras/neural/')
history_nn = model_nn.fit(X_train, y_train, batch_size=128, epochs=50,
verbose=1, validation_data=(X_test, y_test), callbacks=[tb_callback_nn])
print('Seconds to train: ', time.time() - start)
In [14]:
plt.plot(history_nn.history['acc'], label='acc')
plt.plot(history_nn.history['val_acc'], label='val acc')
plt.legend(loc='lower right')
plt.show()
In [15]:
# Compare the two models
plt.plot(history_linear.history['val_acc'], label='Linear')
plt.plot(history_nn.history['val_acc'], label='NN')
plt.legend(loc='lower right')
plt.show()
In [16]:
#Define the model
print('Convolutional model...')
print('LeNet architecture.')
#Inputs
images = layers.Input(batch_shape=(None, 28, 28, 1), dtype='float32', name='Images')
#First convolutional layer
conv1 = layers.Conv2D(20, (5,5))(images)
pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1)
#Second convolutional layer
conv2 = layers.Conv2D(20, (5,5))(pool1)
pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)
#Flatten convolution output
flat = layers.Flatten(name='Flat_image')(pool2)
# First dense layer
dense1 = layers.Dense(500, activation='relu', name='Dense_1')(flat)
# Second dense layer
output = layers.Dense(10, activation='softmax', name='Dense_output')(dense1)
# Model Architecture defined
model_conv = models.Model(inputs=images, outputs=output)
model_conv.summary()
In [17]:
# Compile model and select optimizer
sgd_optimizer = optimizers.SGD(lr=0.01)
model_conv.compile(loss='sparse_categorical_crossentropy', optimizer=sgd_optimizer, metrics=['accuracy'])
In [18]:
# Train the model
X_train_img = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test_img = X_test.reshape(X_test.shape[0], 28, 28, 1)
start = time.time()
tb_callback_cnn = callbacks.TensorBoard(log_dir='/tmp/tensorboard/keras/CNN/')
history = model_conv.fit(X_train_img, y_train, batch_size=128, epochs=10,
verbose=1, validation_data=(X_test_img, y_test), callbacks=[tb_callback_cnn])
print('Seconds to train: ', time.time() - start)
In [18]:
#Compare with previous
# Compare the two models
plt.plot(history_linear.history['val_acc'], label='Linear')
plt.plot(history_nn.history['val_acc'], label='NN')
plt.plot(history.history['val_acc'], label='Conv')
plt.legend(loc='lower right')
plt.show()
In [19]:
# Score and select prediction with max prob
import numpy as np
p_test = model_conv.predict(X_test_img)
pred_test = np.argmax(p_test, axis=1)
#Evaluate the confusion matrix
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, pred_test))
In [20]:
#Print probabilities to the first erros
test_errors=[]
test_errors_real=[]
test_errors_predict=[]
num_errors = 32
n = 0
for i in range(len(pred_test)):
if pred_test[i] != y_test[i] and n < num_errors: #Error!
n += 1
test_errors += [X_test_img[i,:,:,0]]
test_errors_real += [y_test[i]]
test_errors_predict += [pred_test[i]]
#Print first propabilities
index = p_test[i].argsort()[::-1]
print('n: %2i - Pred 1: %1i(%0.3f) - Pred 2: %1i(%0.3f)' % (n,
index[0], p_test[i][index[0]], index[1], p_test[i][index[1]]))
print("Num errors: ", len(test_errors))
#Plot 32 images wrong classified.
fig = plt.figure()
for n, i in enumerate(range(32)):
a = fig.add_subplot(4,8,n+1)
a.set_title('R:'+str(test_errors_real[i])+' - P:'+str(test_errors_predict[i]))
fig.tight_layout()
plt.imshow(test_errors[i])
In [21]:
# New model. Equal to the last.
model_conv2 = models.Model(inputs=images, outputs=output)
sgd_optimizer = optimizers.SGD(lr=0.01, momentum=0.99, decay=0.005, nesterov=True)
model_conv2.compile(loss='sparse_categorical_crossentropy', optimizer=sgd_optimizer, metrics=['accuracy'])
In [22]:
# Define a generator to X_train. Same code to an HDF5 source
def batch_generator(X, y, batch_size=64):
data_size = X.shape[0]
while 1:
# Randomize batches in each epoch
batch_randomized = np.random.permutation(range(0, data_size-batch_size, batch_size))
# Iterate over each batch
for batch in batch_randomized:
x_batch = X[batch : batch+batch_size]
y_batch = y[batch : batch+batch_size]
yield x_batch, y_batch
In [23]:
#Test the generator
seq = batch_generator(X_train_img, y_train)
next_seq = next(seq)
print(next_seq[0][0].shape)
print(next_seq[1])
In [24]:
# Train using the fit_generator function
# Number of steps to use all the train data in a epoch.
steps = X_train.shape[0]//64
history = model_conv2.fit_generator(batch_generator(X_train_img, y_train, batch_size=64),
steps_per_epoch=steps, epochs=10,
verbose=1, validation_data=(X_test_img, y_test))
In [ ]: