In [53]:
%matplotlib inline
import keras.backend as K
K.set_image_dim_ordering('th')
In [54]:
import numpy as np
np.random.seed(123)
Next, we'll import the Sequential model type from Keras. This is simply a linear stack of neural network layers, and it's perfect for the type of feed-forward CNN we're building in this tutorial.
In [55]:
from keras.models import Sequential
In [56]:
from keras.layers import Dense, Dropout, Activation, Flatten
In [57]:
from keras.layers import Convolution2D, MaxPooling2D
In [58]:
from keras.utils import np_utils
In [59]:
from keras.datasets import mnist
#load pre-shffuled MNIST data into train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
In [60]:
print(X_train.shape)
In [61]:
from matplotlib import pyplot as plt
plt.imshow(X_train[0])
Out[61]:
In [62]:
X_train = X_train.reshape(X_train.shape[0],1,28,28)
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
In [63]:
print(X_train.shape)
In [64]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
In [65]:
print(y_train.shape)
In [66]:
print(y_train[:10])
In [67]:
Y_train=np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)
In [68]:
print(Y_train.shape)
Step 7: Define model architecture. Now we're ready to define our model architecture. In actual R&D work, researchers will spend a considerable amount of time studying model architectures.
To keep this tutorial moving along, we're not going to discuss the theory or math here. This alone is a rich and meaty field, and we recommend the CS231n class mentioned earlier for those who want to learn more.
Plus, when you're just starting out, you can just replicate proven architectures from academic papers or use existing examples. Here's a list of example implementations in Keras.
Let's start by declaring a sequential model format:
In [69]:
model = Sequential()
In [70]:
model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(1,28,28)))
In [71]:
print(model.output_shape)
In [72]:
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
In [73]:
model.add(Flatten())
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
In [74]:
model = Sequential()
model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(1,28,28)))
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
In [75]:
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
In [ ]:
model.fit(X_train, Y_train,
batch_size=32, nb_epoch=10, verbose=1)
In [ ]:
score = model.evaluate(X_test, Y_test, verbose=0)