General End-to-End Model Building Process

See video: https://youtu.be/6kwQEBMandw?t=6526


In [1]:
from theano.sandbox import cuda
#cuda.use('gpu2')


WARNING (theano.sandbox.cuda): The cuda backend is deprecated and will be removed in the next release (v0.10).  Please switch to the gpuarray backend. You can get more information about how to switch at this URL:
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX 1060 6GB (CNMeM is enabled with initial size: 80.0% of memory, cuDNN 5110)

In [2]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function


Using Theano backend.

Setup


In [3]:
batch_size=64

In [4]:
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)


Out[4]:
((60000L, 28L, 28L), (60000L,), (10000L, 28L, 28L), (10000L,))

In [5]:
X_test = np.expand_dims(X_test,1)
X_train = np.expand_dims(X_train,1)

In [6]:
X_train.shape


Out[6]:
(60000L, 1L, 28L, 28L)

In [7]:
y_train[:5]


Out[7]:
array([5, 0, 4, 1, 9], dtype=uint8)

In [8]:
y_train = onehot(y_train)
y_test = onehot(y_test)

In [9]:
y_train[:5]


Out[9]:
array([[ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])

In [10]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)

In [11]:
def norm_input(x): return (x-mean_px)/std_px

Linear model


In [12]:
def get_lin_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [13]:
lm = get_lin_model()


C:\Users\matsaleh\AppData\Local\conda\conda\envs\fastai2\lib\site-packages\keras\layers\core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_1 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))

In [14]:
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)

In [16]:
lm.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
60000/60000 [==============================] - 18s - loss: 0.4292 - acc: 0.8730 - val_loss: 0.3056 - val_acc: 0.9105
Out[16]:
<keras.callbacks.History at 0x7dd3bcf8>

In [18]:
lm.optimizer.lr=0.1

In [20]:
lm.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
60000/60000 [==============================] - 17s - loss: 0.2994 - acc: 0.9144 - val_loss: 0.2976 - val_acc: 0.9138
Out[20]:
<keras.callbacks.History at 0x7df70a90>

In [21]:
lm.optimizer.lr=0.01

In [23]:
lm.fit_generator(batches, batches.n, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/4
60000/60000 [==============================] - 18s - loss: 0.2856 - acc: 0.9195 - val_loss: 0.2782 - val_acc: 0.9203
Epoch 2/4
60000/60000 [==============================] - 18s - loss: 0.2769 - acc: 0.9214 - val_loss: 0.2820 - val_acc: 0.9196
Epoch 3/4
60000/60000 [==============================] - 17s - loss: 0.2733 - acc: 0.9230 - val_loss: 0.2687 - val_acc: 0.9247
Epoch 4/4
60000/60000 [==============================] - 18s - loss: 0.2686 - acc: 0.9258 - val_loss: 0.2793 - val_acc: 0.9186
Out[23]:
<keras.callbacks.History at 0x17e545e80>

Single dense layer


In [24]:
def get_fc_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(512, activation='softmax'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [25]:
fc = get_fc_model()


C:\Users\matsaleh\AppData\Local\conda\conda\envs\fastai2\lib\site-packages\keras\layers\core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_2 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))

In [27]:
fc.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
60000/60000 [==============================] - 18s - loss: 1.5902 - acc: 0.7605 - val_loss: 1.1153 - val_acc: 0.7872
Out[27]:
<keras.callbacks.History at 0x17f1d8c50>

In [28]:
fc.optimizer.lr=0.1

In [30]:
fc.fit_generator(batches, batches.n, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/4
60000/60000 [==============================] - 19s - loss: 0.8727 - acc: 0.8054 - val_loss: 0.6914 - val_acc: 0.8202
Epoch 2/4
60000/60000 [==============================] - 19s - loss: 0.5973 - acc: 0.8354 - val_loss: 0.5383 - val_acc: 0.8377
Epoch 3/4
60000/60000 [==============================] - 18s - loss: 0.4874 - acc: 0.8500 - val_loss: 0.4618 - val_acc: 0.8423
Epoch 4/4
60000/60000 [==============================] - 18s - loss: 0.4345 - acc: 0.8588 - val_loss: 0.4412 - val_acc: 0.8569
Out[30]:
<keras.callbacks.History at 0x17f3beef0>

In [31]:
fc.optimizer.lr=0.01

In [33]:
fc.fit_generator(batches, batches.n, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/4
60000/60000 [==============================] - 19s - loss: 0.4011 - acc: 0.8658 - val_loss: 0.4143 - val_acc: 0.8639
Epoch 2/4
60000/60000 [==============================] - 18s - loss: 0.3724 - acc: 0.8783 - val_loss: 0.3970 - val_acc: 0.8554
Epoch 3/4
60000/60000 [==============================] - 18s - loss: 0.3533 - acc: 0.8890 - val_loss: 0.3736 - val_acc: 0.8816
Epoch 4/4
60000/60000 [==============================] - 18s - loss: 0.3319 - acc: 0.9044 - val_loss: 0.3556 - val_acc: 0.8967
Out[33]:
<keras.callbacks.History at 0x17f3bea20>

Basic 'VGG-style' CNN


In [34]:
def get_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        Convolution2D(64,3,3, activation='relu'),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [ ]:
model = get_model()

In [ ]:
model.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [37]:
model.optimizer.lr=0.1

In [38]:
model.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
60000/60000 [==============================] - 7s - loss: 0.0353 - acc: 0.9889 - val_loss: 0.0291 - val_acc: 0.9902
Out[38]:
<keras.callbacks.History at 0x7f3807ebbe10>

In [39]:
model.optimizer.lr=0.01

In [ ]:
model.fit_generator(batches, batches.n, nb_epoch=8, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

Data augmentation


In [23]:
model = get_model()

In [76]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)

In [24]:
model.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
60000/60000 [==============================] - 7s - loss: 0.2064 - acc: 0.9360 - val_loss: 0.0643 - val_acc: 0.9778
Out[24]:
<keras.callbacks.History at 0x7fa800c8d710>

In [25]:
model.optimizer.lr=0.1

In [26]:
model.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/4
60000/60000 [==============================] - 7s - loss: 0.0706 - acc: 0.9787 - val_loss: 0.0496 - val_acc: 0.9844
Epoch 2/4
60000/60000 [==============================] - 7s - loss: 0.0531 - acc: 0.9838 - val_loss: 0.0395 - val_acc: 0.9873
Epoch 3/4
60000/60000 [==============================] - 7s - loss: 0.0473 - acc: 0.9856 - val_loss: 0.0329 - val_acc: 0.9886
Epoch 4/4
60000/60000 [==============================] - 7s - loss: 0.0402 - acc: 0.9870 - val_loss: 0.0381 - val_acc: 0.9878
Out[26]:
<keras.callbacks.History at 0x7fa8003268d0>

In [27]:
model.optimizer.lr=0.01

In [28]:
model.fit_generator(batches, batches.N, nb_epoch=8, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/8
60000/60000 [==============================] - 7s - loss: 0.0381 - acc: 0.9887 - val_loss: 0.0295 - val_acc: 0.9908
Epoch 2/8
60000/60000 [==============================] - 7s - loss: 0.0340 - acc: 0.9893 - val_loss: 0.0266 - val_acc: 0.9918
Epoch 3/8
60000/60000 [==============================] - 7s - loss: 0.0318 - acc: 0.9903 - val_loss: 0.0400 - val_acc: 0.9877
Epoch 4/8
60000/60000 [==============================] - 7s - loss: 0.0322 - acc: 0.9899 - val_loss: 0.0264 - val_acc: 0.9922
Epoch 5/8
60000/60000 [==============================] - 7s - loss: 0.0281 - acc: 0.9910 - val_loss: 0.0266 - val_acc: 0.9911
Epoch 6/8
60000/60000 [==============================] - 7s - loss: 0.0283 - acc: 0.9909 - val_loss: 0.0238 - val_acc: 0.9922
Epoch 7/8
60000/60000 [==============================] - 7s - loss: 0.0277 - acc: 0.9917 - val_loss: 0.0314 - val_acc: 0.9911
Epoch 8/8
60000/60000 [==============================] - 6s - loss: 0.0251 - acc: 0.9925 - val_loss: 0.0287 - val_acc: 0.9921
Out[28]:
<keras.callbacks.History at 0x7fa800326790>

In [29]:
model.optimizer.lr=0.001

In [30]:
model.fit_generator(batches, batches.N, nb_epoch=14, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/12
60000/60000 [==============================] - 6s - loss: 0.0242 - acc: 0.9920 - val_loss: 0.0271 - val_acc: 0.9912
Epoch 2/12
60000/60000 [==============================] - 7s - loss: 0.0250 - acc: 0.9922 - val_loss: 0.0351 - val_acc: 0.9894
Epoch 3/12
60000/60000 [==============================] - 7s - loss: 0.0225 - acc: 0.9931 - val_loss: 0.0323 - val_acc: 0.9905
Epoch 4/12
60000/60000 [==============================] - 7s - loss: 0.0223 - acc: 0.9932 - val_loss: 0.0235 - val_acc: 0.9927
Epoch 5/12
60000/60000 [==============================] - 7s - loss: 0.0236 - acc: 0.9926 - val_loss: 0.0216 - val_acc: 0.9937
Epoch 6/12
60000/60000 [==============================] - 6s - loss: 0.0220 - acc: 0.9933 - val_loss: 0.0259 - val_acc: 0.9918
Epoch 7/12
60000/60000 [==============================] - 7s - loss: 0.0207 - acc: 0.9936 - val_loss: 0.0298 - val_acc: 0.9899
Epoch 8/12
60000/60000 [==============================] - 7s - loss: 0.0216 - acc: 0.9932 - val_loss: 0.0268 - val_acc: 0.9929
Epoch 9/12
60000/60000 [==============================] - 7s - loss: 0.0206 - acc: 0.9936 - val_loss: 0.0282 - val_acc: 0.9913
Epoch 10/12
60000/60000 [==============================] - 7s - loss: 0.0194 - acc: 0.9940 - val_loss: 0.0296 - val_acc: 0.9927
Epoch 11/12
60000/60000 [==============================] - 7s - loss: 0.0191 - acc: 0.9940 - val_loss: 0.0193 - val_acc: 0.9941
Epoch 12/12
60000/60000 [==============================] - 7s - loss: 0.0187 - acc: 0.9945 - val_loss: 0.0294 - val_acc: 0.9914
Out[30]:
<keras.callbacks.History at 0x7fa800326ad0>

In [31]:
model.optimizer.lr=0.0001

In [32]:
model.fit_generator(batches, batches.N, nb_epoch=10, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/10
60000/60000 [==============================] - 7s - loss: 0.0191 - acc: 0.9942 - val_loss: 0.0277 - val_acc: 0.9906
Epoch 2/10
60000/60000 [==============================] - 7s - loss: 0.0196 - acc: 0.9938 - val_loss: 0.0192 - val_acc: 0.9945
Epoch 3/10
60000/60000 [==============================] - 6s - loss: 0.0173 - acc: 0.9946 - val_loss: 0.0258 - val_acc: 0.9924
Epoch 4/10
60000/60000 [==============================] - 7s - loss: 0.0189 - acc: 0.9943 - val_loss: 0.0249 - val_acc: 0.9924
Epoch 5/10
60000/60000 [==============================] - 7s - loss: 0.0166 - acc: 0.9951 - val_loss: 0.0271 - val_acc: 0.9920
Epoch 6/10
60000/60000 [==============================] - 7s - loss: 0.0183 - acc: 0.9942 - val_loss: 0.0229 - val_acc: 0.9937
Epoch 7/10
60000/60000 [==============================] - 7s - loss: 0.0177 - acc: 0.9944 - val_loss: 0.0275 - val_acc: 0.9924
Epoch 8/10
60000/60000 [==============================] - 6s - loss: 0.0168 - acc: 0.9946 - val_loss: 0.0246 - val_acc: 0.9926
Epoch 9/10
60000/60000 [==============================] - 7s - loss: 0.0169 - acc: 0.9943 - val_loss: 0.0215 - val_acc: 0.9936
Epoch 10/10
60000/60000 [==============================] - 7s - loss: 0.0160 - acc: 0.9953 - val_loss: 0.0267 - val_acc: 0.9919
Out[32]:
<keras.callbacks.History at 0x7fa800326fd0>

Batchnorm + data augmentation


In [125]:
def get_model_bn():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [126]:
model = get_model_bn()

In [127]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 12s - loss: 0.1273 - acc: 0.9605 - val_loss: 0.0559 - val_acc: 0.9833
Out[127]:
<keras.callbacks.History at 0x7f37acf896d0>

In [128]:
model.optimizer.lr=0.1

In [129]:
model.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/4
60000/60000 [==============================] - 13s - loss: 0.0555 - acc: 0.9827 - val_loss: 0.0439 - val_acc: 0.9859
Epoch 2/4
60000/60000 [==============================] - 13s - loss: 0.0455 - acc: 0.9859 - val_loss: 0.0337 - val_acc: 0.9899
Epoch 3/4
60000/60000 [==============================] - 13s - loss: 0.0377 - acc: 0.9882 - val_loss: 0.0332 - val_acc: 0.9890
Epoch 4/4
60000/60000 [==============================] - 13s - loss: 0.0372 - acc: 0.9884 - val_loss: 0.0303 - val_acc: 0.9904
Out[129]:
<keras.callbacks.History at 0x7f37acc5b450>

In [130]:
model.optimizer.lr=0.01

In [131]:
model.fit_generator(batches, batches.N, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/12
60000/60000 [==============================] - 13s - loss: 0.0327 - acc: 0.9900 - val_loss: 0.0312 - val_acc: 0.9911
Epoch 2/12
60000/60000 [==============================] - 12s - loss: 0.0290 - acc: 0.9911 - val_loss: 0.0349 - val_acc: 0.9893
Epoch 3/12
60000/60000 [==============================] - 13s - loss: 0.0293 - acc: 0.9912 - val_loss: 0.0452 - val_acc: 0.9853
Epoch 4/12
60000/60000 [==============================] - 13s - loss: 0.0266 - acc: 0.9915 - val_loss: 0.0260 - val_acc: 0.9924
Epoch 5/12
60000/60000 [==============================] - 12s - loss: 0.0236 - acc: 0.9924 - val_loss: 0.0234 - val_acc: 0.9927
Epoch 6/12
60000/60000 [==============================] - 13s - loss: 0.0234 - acc: 0.9927 - val_loss: 0.0305 - val_acc: 0.9901
Epoch 7/12
60000/60000 [==============================] - 12s - loss: 0.0234 - acc: 0.9929 - val_loss: 0.0164 - val_acc: 0.9960
Epoch 8/12
60000/60000 [==============================] - 13s - loss: 0.0198 - acc: 0.9935 - val_loss: 0.0333 - val_acc: 0.9898
Epoch 9/12
60000/60000 [==============================] - 12s - loss: 0.0201 - acc: 0.9939 - val_loss: 0.0184 - val_acc: 0.9940
Epoch 10/12
60000/60000 [==============================] - 12s - loss: 0.0173 - acc: 0.9945 - val_loss: 0.0194 - val_acc: 0.9938
Epoch 11/12
60000/60000 [==============================] - 13s - loss: 0.0183 - acc: 0.9940 - val_loss: 0.0323 - val_acc: 0.9904
Epoch 12/12
60000/60000 [==============================] - 13s - loss: 0.0177 - acc: 0.9945 - val_loss: 0.0294 - val_acc: 0.9918
Out[131]:
<keras.callbacks.History at 0x7f37b176aa50>

In [132]:
model.optimizer.lr=0.001

In [133]:
model.fit_generator(batches, batches.N, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/12
60000/60000 [==============================] - 13s - loss: 0.0166 - acc: 0.9947 - val_loss: 0.0205 - val_acc: 0.9933
Epoch 2/12
60000/60000 [==============================] - 13s - loss: 0.0168 - acc: 0.9950 - val_loss: 0.0194 - val_acc: 0.9942
Epoch 3/12
60000/60000 [==============================] - 12s - loss: 0.0151 - acc: 0.9953 - val_loss: 0.0197 - val_acc: 0.9942
Epoch 4/12
60000/60000 [==============================] - 13s - loss: 0.0135 - acc: 0.9954 - val_loss: 0.0179 - val_acc: 0.9938
Epoch 5/12
60000/60000 [==============================] - 12s - loss: 0.0143 - acc: 0.9953 - val_loss: 0.0257 - val_acc: 0.9925
Epoch 6/12
60000/60000 [==============================] - 12s - loss: 0.0139 - acc: 0.9954 - val_loss: 0.0150 - val_acc: 0.9949
Epoch 7/12
60000/60000 [==============================] - 13s - loss: 0.0127 - acc: 0.9958 - val_loss: 0.0218 - val_acc: 0.9932
Epoch 8/12
60000/60000 [==============================] - 13s - loss: 0.0121 - acc: 0.9962 - val_loss: 0.0264 - val_acc: 0.9917
Epoch 9/12
60000/60000 [==============================] - 13s - loss: 0.0120 - acc: 0.9960 - val_loss: 0.0209 - val_acc: 0.9935
Epoch 10/12
60000/60000 [==============================] - 13s - loss: 0.0130 - acc: 0.9957 - val_loss: 0.0171 - val_acc: 0.9948
Epoch 11/12
60000/60000 [==============================] - 13s - loss: 0.0132 - acc: 0.9958 - val_loss: 0.0227 - val_acc: 0.9932
Epoch 12/12
60000/60000 [==============================] - 12s - loss: 0.0115 - acc: 0.9964 - val_loss: 0.0172 - val_acc: 0.9945
Out[133]:
<keras.callbacks.History at 0x7f37b1789c50>

Batchnorm + dropout + data augmentation


In [79]:
def get_model_bn_do():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [80]:
model = get_model_bn_do()

In [81]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 13s - loss: 0.1894 - acc: 0.9419 - val_loss: 0.0605 - val_acc: 0.9815
Out[81]:
<keras.callbacks.History at 0x7fa7cea0d950>

In [82]:
model.optimizer.lr=0.1

In [83]:
model.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/4
60000/60000 [==============================] - 13s - loss: 0.0766 - acc: 0.9764 - val_loss: 0.0394 - val_acc: 0.9871
Epoch 2/4
60000/60000 [==============================] - 13s - loss: 0.0622 - acc: 0.9806 - val_loss: 0.0360 - val_acc: 0.9885
Epoch 3/4
60000/60000 [==============================] - 13s - loss: 0.0576 - acc: 0.9830 - val_loss: 0.0364 - val_acc: 0.9882
Epoch 4/4
60000/60000 [==============================] - 14s - loss: 0.0512 - acc: 0.9842 - val_loss: 0.0347 - val_acc: 0.9911
Out[83]:
<keras.callbacks.History at 0x7fa7ce2c69d0>

In [84]:
model.optimizer.lr=0.01

In [85]:
model.fit_generator(batches, batches.N, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/12
60000/60000 [==============================] - 14s - loss: 0.0464 - acc: 0.9862 - val_loss: 0.0300 - val_acc: 0.9904
Epoch 2/12
60000/60000 [==============================] - 13s - loss: 0.0474 - acc: 0.9856 - val_loss: 0.0287 - val_acc: 0.9912
Epoch 3/12
60000/60000 [==============================] - 13s - loss: 0.0400 - acc: 0.9880 - val_loss: 0.0408 - val_acc: 0.9879
Epoch 4/12
60000/60000 [==============================] - 14s - loss: 0.0379 - acc: 0.9884 - val_loss: 0.0255 - val_acc: 0.9918
Epoch 5/12
60000/60000 [==============================] - 13s - loss: 0.0394 - acc: 0.9881 - val_loss: 0.0247 - val_acc: 0.9923
Epoch 6/12
60000/60000 [==============================] - 14s - loss: 0.0344 - acc: 0.9893 - val_loss: 0.0267 - val_acc: 0.9921
Epoch 7/12
60000/60000 [==============================] - 14s - loss: 0.0342 - acc: 0.9895 - val_loss: 0.0208 - val_acc: 0.9938
Epoch 8/12
60000/60000 [==============================] - 14s - loss: 0.0291 - acc: 0.9908 - val_loss: 0.0251 - val_acc: 0.9914
Epoch 9/12
60000/60000 [==============================] - 14s - loss: 0.0309 - acc: 0.9907 - val_loss: 0.0253 - val_acc: 0.9919
Epoch 10/12
60000/60000 [==============================] - 14s - loss: 0.0299 - acc: 0.9906 - val_loss: 0.0205 - val_acc: 0.9934
Epoch 11/12
60000/60000 [==============================] - 14s - loss: 0.0276 - acc: 0.9912 - val_loss: 0.0200 - val_acc: 0.9940
Epoch 12/12
60000/60000 [==============================] - 13s - loss: 0.0268 - acc: 0.9918 - val_loss: 0.0201 - val_acc: 0.9929
Out[85]:
<keras.callbacks.History at 0x7fa7ce2e1810>

In [86]:
model.optimizer.lr=0.001

In [89]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 13s - loss: 0.0186 - acc: 0.9942 - val_loss: 0.0193 - val_acc: 0.9945
Out[89]:
<keras.callbacks.History at 0x7fa7ce5cf290>

Ensembling


In [90]:
def fit_model():
    model = get_model_bn_do()
    model.fit_generator(batches, batches.N, nb_epoch=1, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.1
    model.fit_generator(batches, batches.N, nb_epoch=4, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.01
    model.fit_generator(batches, batches.N, nb_epoch=12, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.001
    model.fit_generator(batches, batches.N, nb_epoch=18, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    return model

In [91]:
models = [fit_model() for i in range(6)]

In [92]:
path = "data/mnist/"
model_path = path + 'models/'

In [93]:
for i,m in enumerate(models):
    m.save_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')

In [94]:
evals = np.array([m.evaluate(X_test, y_test, batch_size=256) for m in models])


 9984/10000 [============================>.] - ETA: 0s

In [95]:
evals.mean(axis=0)


Out[95]:
array([ 0.016,  0.995])

In [96]:
all_preds = np.stack([m.predict(X_test, batch_size=256) for m in models])

In [97]:
all_preds.shape


Out[97]:
(6, 10000, 10)

In [98]:
avg_preds = all_preds.mean(axis=0)

In [99]:
keras.metrics.categorical_accuracy(y_test, avg_preds).eval()


Out[99]:
array(0.9969000220298767, dtype=float32)

In [ ]: