In [53]:
%matplotlib inline
import keras.backend as K
K.set_image_dim_ordering('th')

In [54]:
import numpy as np
np.random.seed(123)

Next, we'll import the Sequential model type from Keras. This is simply a linear stack of neural network layers, and it's perfect for the type of feed-forward CNN we're building in this tutorial.


In [55]:
from keras.models import Sequential

In [56]:
from keras.layers import Dense, Dropout, Activation, Flatten

In [57]:
from keras.layers import Convolution2D, MaxPooling2D

In [58]:
from keras.utils import np_utils

In [59]:
from keras.datasets import mnist
#load pre-shffuled MNIST data into train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [60]:
print(X_train.shape)


(60000, 28, 28)

In [61]:
from matplotlib import pyplot as plt
plt.imshow(X_train[0])


Out[61]:
<matplotlib.image.AxesImage at 0x232b5ab50f0>

In [62]:
X_train = X_train.reshape(X_train.shape[0],1,28,28)
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)

In [63]:
print(X_train.shape)


(60000, 1, 28, 28)

In [64]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [65]:
print(y_train.shape)


(60000,)

In [66]:
print(y_train[:10])


[5 0 4 1 9 2 1 3 1 4]

In [67]:
Y_train=np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)

In [68]:
print(Y_train.shape)


(60000, 10)

Step 7: Define model architecture. Now we're ready to define our model architecture. In actual R&D work, researchers will spend a considerable amount of time studying model architectures.

To keep this tutorial moving along, we're not going to discuss the theory or math here. This alone is a rich and meaty field, and we recommend the CS231n class mentioned earlier for those who want to learn more.

Plus, when you're just starting out, you can just replicate proven architectures from academic papers or use existing examples. Here's a list of example implementations in Keras.

Let's start by declaring a sequential model format:


In [69]:
model = Sequential()

In [70]:
model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(1,28,28)))

In [71]:
print(model.output_shape)


(None, 32, 26, 26)

In [72]:
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

In [73]:
model.add(Flatten())
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [74]:
model = Sequential()
 
model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(1,28,28)))
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
 
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [75]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [ ]:
model.fit(X_train, Y_train, 
          batch_size=32, nb_epoch=10, verbose=1)

In [ ]:
score = model.evaluate(X_test, Y_test, verbose=0)