from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import RMSprop
We first fetch the MNIST dataset, which is a commonly used dataset for handwritten digit recognition. Keras provides a handy function for loading this data.
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10 # number of classes (digits) to predict
x_train = x_train.reshape(60000, 28 * 28) # we preprocess data and reshape into the input the neural network expects
x_train = x_train.astype('float32') #transform the type
x_train /= 255 #values were in the [0..255] interval, while neural network expects values to be between 0 and 1
x_test = x_test.reshape(10000, 28 * 28)
x_test = x_test.astype('float32')
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
#our labels "0", "1".."9" should be encoded categorically for both train and test labels
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
First, define the model structure.
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(28*28,))) # Layer 1: takes input shape 28x28, uses ReLU activation function, and produces and output space with 512 units
model.add(Dense(10, activation='softmax')) # Layer 2: 10-way softmax layer, which returns an array of 10 probability scores for a specific class (0..9)
# Layer (type) Output Shape Param #
# dense_4 (Dense) (None, 512) 401920 # (28 * 28 * 1[grayscale])[input values] * 512 [neurons in this layer] + 512 [bias values]
# dense_5 (Dense) (None, 10) 5130 # 512 [input values] * 10 [neurons in this layer] + 10 [bias values]
Now, we can fit the model. This should take about 10-15 seconds per epoch on a commodity GPU, or about 2-3 minutes for 12 epochs.
#to get neural network ready for training we need to compile it and specify a few metrics as below
model.compile(loss='categorical_crossentropy', # loss: how network measures its performance on the training data (and steers itself into right direction)
optimizer=RMSprop(), # optimiser: the mechanism how the network will update itself on the data it sees and the loss it regenerates
metrics=['accuracy']) # metrics: what we care about during training and testing of the model (accuracy: how many images were correctly classified)
#now we ready to train the network, and we use fit method in Keras to do it
history =, y_train,
batch_size=128, # the size of the mini-batch used by the network to train
epochs=5, # the number of iterations over the training dataset
validation_data=(x_test, y_test))
# now we use our test dataste (that has not been seen by the model before) and score it and evaluate the results
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
