In [1]:
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.optimizers import Adam, RMSprop
from keras.models import model_from_json
%matplotlib inline 
# from keras.datasets import mnist
tf.logging.set_verbosity(tf.logging.ERROR)
tf.set_random_seed(2017)


Using TensorFlow backend.

Load MNIST Data


In [2]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# test data
test_images = mnist.test.images.reshape(10000, 28, 28, 1)
test_labels = mnist.test.labels[:]


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz

Data Agumentation

  • image rotation
  • image width shift

In [3]:
augmentation_size = 110000
images = np.concatenate((mnist.train.images.reshape(55000, 28, 28, 1), mnist.validation.images.reshape(5000, 28, 28, 1)), axis=0)
labels = np.concatenate((mnist.train.labels, mnist.validation.labels), axis=0)

datagen_list = [
                ImageDataGenerator(rotation_range=20),
                ImageDataGenerator(rotation_range=30),
                ImageDataGenerator(width_shift_range=0.1),
                ImageDataGenerator(width_shift_range=0.2),
               ]

for datagen in datagen_list:
    datagen.fit(images)
    for image, label in datagen.flow(images, labels, batch_size=augmentation_size, shuffle=False):
        images = np.concatenate((images, image), axis=0)
        labels = np.concatenate((labels, label), axis=0)
        break

print(images.shape)
print(labels.shape)


(450000, 28, 28, 1)
(450000, 10)

Train Parameter


In [4]:
model_1_filter_size = 3
model_2_filter_size = 5
model_3_filter_size = 7
epochs = 10

Model 1 Architecture

  1. Convolution + Convolution + MaxPool + Dropout
  2. Convolution + Convolution + MaxPool + Dropout
  3. Convolution + MaxPool + Dropout
  4. Dense + Dropout
  5. Dense + Dropout
  6. Output

In [5]:
model1 = Sequential([Convolution2D(filters=64, kernel_size=(model_1_filter_size, model_1_filter_size), padding='same', activation='elu', input_shape=(28, 28, 1)), 
                     Convolution2D(filters=128, kernel_size=(model_1_filter_size, model_1_filter_size), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(model_1_filter_size, model_1_filter_size), padding='same', activation='elu'),
                     Convolution2D(filters=128, kernel_size=(model_1_filter_size, model_1_filter_size), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(model_1_filter_size, model_1_filter_size), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Flatten(),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(10, activation='softmax'),
                     ])
model1.compile(optimizer=Adam(lr=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])
model1.fit(images, labels, batch_size=256, epochs=epochs, shuffle=True, verbose=1, validation_data=(test_images, test_labels))


Train on 450000 samples, validate on 10000 samples
Epoch 1/10
450000/450000 [==============================] - 111s - loss: 0.2300 - acc: 0.9279 - val_loss: 0.0270 - val_acc: 0.9915
Epoch 2/10
450000/450000 [==============================] - 110s - loss: 0.1013 - acc: 0.9691 - val_loss: 0.0196 - val_acc: 0.9934
Epoch 3/10
450000/450000 [==============================] - 108s - loss: 0.0845 - acc: 0.9748 - val_loss: 0.0181 - val_acc: 0.9950
Epoch 4/10
450000/450000 [==============================] - 108s - loss: 0.0770 - acc: 0.9772 - val_loss: 0.0199 - val_acc: 0.9953
Epoch 5/10
450000/450000 [==============================] - 108s - loss: 0.0721 - acc: 0.9789 - val_loss: 0.0204 - val_acc: 0.9943
Epoch 6/10
450000/450000 [==============================] - 110s - loss: 0.0692 - acc: 0.9797 - val_loss: 0.0177 - val_acc: 0.9953
Epoch 7/10
450000/450000 [==============================] - 108s - loss: 0.0656 - acc: 0.9812 - val_loss: 0.0182 - val_acc: 0.9951
Epoch 8/10
450000/450000 [==============================] - 108s - loss: 0.0641 - acc: 0.9816 - val_loss: 0.0162 - val_acc: 0.9952
Epoch 9/10
450000/450000 [==============================] - 108s - loss: 0.0633 - acc: 0.9822 - val_loss: 0.0172 - val_acc: 0.9947
Epoch 10/10
450000/450000 [==============================] - 107s - loss: 0.0620 - acc: 0.9826 - val_loss: 0.0193 - val_acc: 0.9956
Out[5]:
<keras.callbacks.History at 0x14a8ad625f8>

In [6]:
model_json = model1.to_json()
with open("model1.json", "w") as json_file:
    json_file.write(model_json)
model1.save_weights("model1.h5")
print("Saved model to disk")


Saved model to disk

Model 2 Architecture

  1. Convolution * 2 + MaxPool + Dropout
  2. Convolution * 2 + MaxPool + Dropout
  3. Convolution + MaxPool + Dropout
  4. Dense + Dropout
  5. Dense + Dropout
  6. Output

In [7]:
model2 = Sequential([Convolution2D(filters=64, kernel_size=(model_2_filter_size, model_2_filter_size), padding='same', activation='elu', input_shape=(28, 28, 1)), 
                     Convolution2D(filters=128, kernel_size=(model_2_filter_size, model_2_filter_size), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(model_2_filter_size, model_2_filter_size), padding='same', activation='elu'),
                     Convolution2D(filters=128, kernel_size=(model_2_filter_size, model_2_filter_size), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(model_2_filter_size, model_2_filter_size), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Flatten(),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(10, activation='softmax'),
                     ])
model2.compile(optimizer=Adam(lr=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])
model2.fit(images, labels, batch_size=256, epochs=epochs, shuffle=True, verbose=1, validation_data=(test_images, test_labels))


Train on 450000 samples, validate on 10000 samples
Epoch 1/10
450000/450000 [==============================] - 157s - loss: 0.2090 - acc: 0.9365 - val_loss: 0.0299 - val_acc: 0.9919
Epoch 2/10
450000/450000 [==============================] - 156s - loss: 0.0965 - acc: 0.9718 - val_loss: 0.0199 - val_acc: 0.9939
Epoch 3/10
450000/450000 [==============================] - 157s - loss: 0.0815 - acc: 0.9766 - val_loss: 0.0207 - val_acc: 0.9950
Epoch 4/10
450000/450000 [==============================] - 156s - loss: 0.0794 - acc: 0.9775 - val_loss: 0.0161 - val_acc: 0.9954
Epoch 5/10
450000/450000 [==============================] - 157s - loss: 0.0754 - acc: 0.9792 - val_loss: 0.0195 - val_acc: 0.9946
Epoch 6/10
450000/450000 [==============================] - 156s - loss: 0.0725 - acc: 0.9805 - val_loss: 0.0139 - val_acc: 0.9956
Epoch 7/10
450000/450000 [==============================] - 156s - loss: 0.0725 - acc: 0.9809 - val_loss: 0.0200 - val_acc: 0.9953
Epoch 8/10
450000/450000 [==============================] - 156s - loss: 0.0692 - acc: 0.9819 - val_loss: 0.0189 - val_acc: 0.9952
Epoch 9/10
450000/450000 [==============================] - 156s - loss: 0.0716 - acc: 0.9818 - val_loss: 0.0156 - val_acc: 0.9963
Epoch 10/10
450000/450000 [==============================] - 156s - loss: 0.0707 - acc: 0.9829 - val_loss: 0.0149 - val_acc: 0.9959
Out[7]:
<keras.callbacks.History at 0x14ae64d9b00>

In [8]:
model_json = model2.to_json()
with open("model2.json", "w") as json_file:
    json_file.write(model_json)
model2.save_weights("model2.h5")
print("Saved model to disk")


Saved model to disk

Model 3 Architecture

  1. Convolution + Convolution + MaxPool + Dropout
  2. Convolution + Convolution + MaxPool + Dropout
  3. Convolution + MaxPool + Dropout
  4. Dense + Dropout
  5. Dense + Dropout
  6. Output

In [9]:
model3 = Sequential([Convolution2D(filters=64, kernel_size=(model_3_filter_size, model_3_filter_size), padding='same', activation='elu', input_shape=(28, 28, 1)), 
                     Convolution2D(filters=128, kernel_size=(model_3_filter_size, model_3_filter_size), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(model_3_filter_size, model_3_filter_size), padding='same', activation='elu'),
                     Convolution2D(filters=128, kernel_size=(model_3_filter_size, model_3_filter_size), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(model_3_filter_size, model_3_filter_size), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Flatten(),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(10, activation='softmax'),
                     ])
model3.compile(optimizer=Adam(lr=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])
model3.fit(images, labels, batch_size=256, epochs=epochs, shuffle=True, verbose=1, validation_data=(test_images, test_labels))


Train on 450000 samples, validate on 10000 samples
Epoch 1/10
450000/450000 [==============================] - 218s - loss: 0.2103 - acc: 0.9382 - val_loss: 0.0412 - val_acc: 0.9896
Epoch 2/10
450000/450000 [==============================] - 216s - loss: 0.0983 - acc: 0.9723 - val_loss: 0.0203 - val_acc: 0.9945
Epoch 3/10
450000/450000 [==============================] - 216s - loss: 0.0872 - acc: 0.9762 - val_loss: 0.0236 - val_acc: 0.9948
Epoch 4/10
450000/450000 [==============================] - 216s - loss: 0.0809 - acc: 0.9792 - val_loss: 0.0244 - val_acc: 0.9945
Epoch 5/10
450000/450000 [==============================] - 216s - loss: 0.0778 - acc: 0.9808 - val_loss: 0.0210 - val_acc: 0.9946
Epoch 6/10
450000/450000 [==============================] - 215s - loss: 0.0759 - acc: 0.9821 - val_loss: 0.0347 - val_acc: 0.9929
Epoch 7/10
450000/450000 [==============================] - 216s - loss: 0.0780 - acc: 0.9822 - val_loss: 0.0223 - val_acc: 0.9947
Epoch 8/10
450000/450000 [==============================] - 215s - loss: 0.0771 - acc: 0.9829 - val_loss: 0.0239 - val_acc: 0.9954
Epoch 9/10
450000/450000 [==============================] - 215s - loss: 0.0769 - acc: 0.9835 - val_loss: 0.0213 - val_acc: 0.9961
Epoch 10/10
450000/450000 [==============================] - 215s - loss: 0.0743 - acc: 0.9845 - val_loss: 0.0234 - val_acc: 0.9949
Out[9]:
<keras.callbacks.History at 0x14cdd304940>

In [10]:
model_json = model3.to_json()
with open("model3.json", "w") as json_file:
    json_file.write(model_json)
model3.save_weights("model3.h5")
print("Saved model to disk")


Saved model to disk

Evaluate


In [3]:
# load json and create model
def model_open(name, test_images, test_labels):
    json_file = open(name + '.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(name + '.h5')
    print("Loaded model from disk")
    loaded_model.compile(optimizer=Adam(lr=0.0005), loss='categorical_crossentropy', metrics=['acc'])
    prob = loaded_model.predict_proba(test_images)
    acc = np.mean(np.equal(np.argmax(prob, axis=1), np.argmax(test_labels, axis=1)))
    print('\nmodel : %s, test accuracy : %4f\n' %(name, acc))
    return prob

In [4]:
prob_1 = model_open('model1', test_images, test_labels)
prob_2 = model_open('model2', test_images, test_labels)
prob_3 = model_open('model3', test_images, test_labels)


Loaded model from disk
10000/10000 [==============================] - 1s     

model : model1, test accuracy : 0.995600

Loaded model from disk
10000/10000 [==============================] - 1s     

model : model2, test accuracy : 0.995900

Loaded model from disk
10000/10000 [==============================] - 2s     

model : model3, test accuracy : 0.994900


In [9]:
final_prob = prob_1 * 1 + prob_2 * 2  + prob_3 * 1
final_score = np.mean(np.equal(np.argmax(final_prob, axis=1), np.argmax(test_labels, axis=1)))
print('final test accuracy : ', final_score)


final test accuracy :  0.9965

In [ ]: