Deep Learning models can take quite a bit of time to run, particularly if GPU isn't used.
In the interest of time, you could sample a subset of observations (e.g. $1000$) that are a particular number of your choice (e.g. $6$) and $1000$ observations that aren't that particular number (i.e. $\neq 6$).
We will build a model using that and see how it performs on the test dataset
In [1]:
#Import the required libraries
import numpy as np
np.random.seed(1338)
from keras.datasets import mnist
In [2]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
In [3]:
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
In [4]:
from keras.utils import np_utils
from keras.optimizers import SGD
In [5]:
#Load the training and testing data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
In [6]:
X_test_orig = X_test
In [7]:
from keras import backend as K
In [8]:
img_rows, img_cols = 28, 28
if K.image_data_format() == 'channels_first':
shape_ord = (1, img_rows, img_cols)
else: # channel_last
shape_ord = (img_rows, img_cols, 1)
In [9]:
X_train = X_train.reshape((X_train.shape[0],) + shape_ord)
X_test = X_test.reshape((X_test.shape[0],) + shape_ord)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
In [10]:
np.random.seed(1338) # for reproducibilty!!
# Test data
X_test = X_test.copy()
Y = y_test.copy()
# Converting the output to binary classification(Six=1,Not Six=0)
Y_test = Y == 6
Y_test = Y_test.astype(int)
# Selecting the 5918 examples where the output is 6
X_six = X_train[y_train == 6].copy()
Y_six = y_train[y_train == 6].copy()
# Selecting the examples where the output is not 6
X_not_six = X_train[y_train != 6].copy()
Y_not_six = y_train[y_train != 6].copy()
# Selecting 6000 random examples from the data that
# only contains the data where the output is not 6
random_rows = np.random.randint(0,X_six.shape[0],6000)
X_not_six = X_not_six[random_rows]
Y_not_six = Y_not_six[random_rows]
In [11]:
# Appending the data with output as 6 and data with output as <> 6
X_train = np.append(X_six,X_not_six)
# Reshaping the appended data to appropraite form
X_train = X_train.reshape((X_six.shape[0] + X_not_six.shape[0],) + shape_ord)
# Appending the labels and converting the labels to
# binary classification(Six=1,Not Six=0)
Y_labels = np.append(Y_six,Y_not_six)
Y_train = Y_labels == 6
Y_train = Y_train.astype(int)
In [12]:
print(X_train.shape, Y_labels.shape, X_test.shape, Y_test.shape)
In [13]:
# Converting the classes to its binary categorical form
nb_classes = 2
Y_train = np_utils.to_categorical(Y_train, nb_classes)
Y_test = np_utils.to_categorical(Y_test, nb_classes)
In [14]:
# -- Initializing the values for the convolution neural network
nb_epoch = 2 # kept very low! Please increase if you have GPU
batch_size = 64
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
nb_pool = 2
# convolution kernel size
nb_conv = 3
# Vanilla SGD
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
In [15]:
model = Sequential()
model.add(Conv2D(nb_filters, (nb_conv, nb_conv), padding='valid',
input_shape=shape_ord)) # note: the very first layer **must** always specify the input_shape
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
In [16]:
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
In [17]:
hist = model.fit(X_train, Y_train, batch_size=batch_size,
epochs=nb_epoch, verbose=1,
validation_data=(X_test, Y_test))
In [18]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.legend(['Training', 'Validation'])
plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.plot(hist.history['acc'])
plt.plot(hist.history['val_acc'])
plt.legend(['Training', 'Validation'], loc='lower right')
Out[18]:
In [19]:
print('Available Metrics in Model: {}'.format(model.metrics_names))
In [20]:
# Evaluating the model on the test data
loss, accuracy = model.evaluate(X_test, Y_test, verbose=0)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)
In [21]:
import matplotlib.pyplot as plt
%matplotlib inline
In [22]:
slice = 15
predicted = model.predict(X_test[:slice]).argmax(-1)
plt.figure(figsize=(16,8))
for i in range(slice):
plt.subplot(1, slice, i+1)
plt.imshow(X_test_orig[i], interpolation='nearest')
plt.text(0, 0, predicted[i], color='black',
bbox=dict(facecolor='white', alpha=1))
plt.axis('off')
In [23]:
model = Sequential()
model.add(Conv2D(nb_filters, (nb_conv, nb_conv),
padding='valid', input_shape=shape_ord))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
In [24]:
model.compile(loss='categorical_crossentropy',
optimizer='sgd',
metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=batch_size,
epochs=nb_epoch,verbose=1,
validation_data=(X_test, Y_test))
Out[24]:
In [25]:
#Evaluating the model on the test data
score, accuracy = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score)
print('Test accuracy:', accuracy)
In [26]:
model = Sequential()
model.add(Conv2D(nb_filters, (nb_conv, nb_conv),
padding='valid',
input_shape=shape_ord))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
In [27]:
model.compile(loss='categorical_crossentropy',
optimizer='sgd',
metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=batch_size,
epochs=nb_epoch,verbose=1,
validation_data=(X_test, Y_test))
Out[27]:
In [28]:
#Evaluating the model on the test data
score, accuracy = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score)
print('Test accuracy:', accuracy)
In [29]:
model = Sequential()
model.add(Conv2D(nb_filters, (nb_conv, nb_conv),
padding='valid', input_shape=shape_ord))
model.add(Activation('relu'))
model.add(Conv2D(nb_filters, (nb_conv, nb_conv)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
In [30]:
model.compile(loss='categorical_crossentropy',
optimizer='sgd',
metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=batch_size,
epochs=nb_epoch,verbose=1,
validation_data=(X_test, Y_test))
Out[30]:
In [31]:
#Evaluating the model on the test data
score, accuracy = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score)
print('Test accuracy:', accuracy)
In [32]:
# Function for constructing the convolution neural network
# Feel free to add parameters, if you want
def build_model():
""""""
model = Sequential()
model.add(Conv2D(nb_filters, (nb_conv, nb_conv),
padding='valid',
input_shape=shape_ord))
model.add(Activation('relu'))
model.add(Conv2D(nb_filters, (nb_conv, nb_conv)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='sgd',
metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=batch_size,
epochs=nb_epoch,verbose=1,
validation_data=(X_test, Y_test))
#Evaluating the model on the test data
score, accuracy = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score)
print('Test accuracy:', accuracy)
In [33]:
#Timing how long it takes to build the model and test it.
%timeit -n1 -r1 build_model()
In this exercise we want to build a (quite shallow) network which contains two [Convolution, Convolution, MaxPooling] stages, and two Dense layers.
To test a different optimizer, we will use AdaDelta, which is a bit more complex than the simple Vanilla SGD with momentum.
In [35]:
from keras.optimizers import Adadelta
In [36]:
input_shape = shape_ord
nb_classes = 10
## [conv@32x3x3+relu]x2 --> MaxPool@2x2 --> DropOut@0.25 -->
## [conv@64x3x3+relu]x2 --> MaxPool@2x2 --> DropOut@0.25 -->
## Flatten--> FC@512+relu --> DropOut@0.5 --> FC@nb_classes+SoftMax
## NOTE: each couple of Conv filters must have `border_mode="same"` and `"valid"`, respectively
In [40]:
# %load solutions/sol31.py
An important feature of Keras layers is that each of them has an input_shape
attribute, which you can use to visualize the shape of the input tensor, and an output_shape
attribute, for inspecting the shape of the output tensor.
As we can see, the input shape of the first convolutional layer corresponds to the input_shape
attribute (which must be specified by the user).
In this case, it is a 28x28
image with three color channels.
Since this convolutional layer has the padding
set to same
, its output width and height will remain the same, and the number of output channel will be equal to the number of filters learned by the layer, 16.
The following convolutional layer, instead, have the default padding
, and therefore reduce width and height by $(k-1)$, where $k$ is the size of the kernel.
MaxPooling
layers, instead, reduce width and height of the input tensor, but keep the same number of channels.
Activation
layers, of course, don't change the shape.
In [42]:
for i, layer in enumerate(model.layers):
print ("Layer", i, "\t", layer.name, "\t\t", layer.input_shape, "\t", layer.output_shape)
In the same way, we can visualize the shape of the weights learned by each layer.
In particular, Keras lets you inspect weights by using the get_weights
method of a layer object.
This will return a list with two elements, the first one being the weight tensor and the second one being the bias vector.
In particular:
For each of the $n_o$ filters, a bias is also learned.
In [45]:
for i, layer in enumerate(model.layers):
if len(layer.get_weights()) > 0:
W, b = layer.get_weights()
print("Layer", i, "\t", layer.name, "\t\t", W.shape, "\t", b.shape)
Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1.
from keras.layers.normalization import BatchNormalization
BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True,
beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros',
moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None,
beta_constraint=None, gamma_constraint=None)
Conv2D
layer with
data_format="channels_first"
,
set axis=1
in BatchNormalization
.beta
to normalized tensor.
If False, beta
is ignored.gamma
.
If False, gamma
is not used.
When the next layer is linear (also e.g. nn.relu
),
this can be disabled since the scaling
will be done by the next layer.
In [34]:
# Try to add a new BatchNormalization layer to the Model
# (after the Dropout layer) - before or after the ReLU Activation