In [1]:
from theano.sandbox import cuda


WARNING (theano.sandbox.cuda): The cuda backend is deprecated and will be removed in the next release (v0.10).  Please switch to the gpuarray backend. You can get more information about how to switch at this URL:
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)

In [2]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function


Using Theano backend.

Setup


In [3]:
batch_size=64

In [4]:
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)


Out[4]:
((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [5]:
X_test = np.expand_dims(X_test,1)
X_train = np.expand_dims(X_train,1)

In [6]:
X_train.shape


Out[6]:
(60000, 1, 28, 28)

In [7]:
y_train[:5]


Out[7]:
array([5, 0, 4, 1, 9], dtype=uint8)

In [8]:
y_train = onehot(y_train)
y_test = onehot(y_test)

In [9]:
y_train[:5]


Out[9]:
array([[ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])

In [10]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)

In [11]:
def norm_input(x): return (x-mean_px)/std_px

Linear model


In [12]:
def get_lin_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [13]:
lm = get_lin_model()


/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_1 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))

In [14]:
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)

In [15]:
lm.fit_generator(batches, batches.n, nb_epoch=1, verbose=2, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
11s - loss: 0.4375 - acc: 0.8701 - val_loss: 0.2989 - val_acc: 0.9159
Out[15]:
<keras.callbacks.History at 0x7f80c41091d0>

In [16]:
lm.optimizer.lr=0.1

In [17]:
lm.fit_generator(batches, batches.n, nb_epoch=1, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
11s - loss: 0.3002 - acc: 0.9144 - val_loss: 0.2866 - val_acc: 0.9220
Out[17]:
<keras.callbacks.History at 0x7f80bc7b0dd0>

In [18]:
lm.optimizer.lr=0.01

In [19]:
lm.fit_generator(batches, batches.n, nb_epoch=4, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/4
11s - loss: 0.2848 - acc: 0.9186 - val_loss: 0.2795 - val_acc: 0.9236
Epoch 2/4
11s - loss: 0.2767 - acc: 0.9230 - val_loss: 0.2755 - val_acc: 0.9211
Epoch 3/4
11s - loss: 0.2735 - acc: 0.9238 - val_loss: 0.2789 - val_acc: 0.9201
Epoch 4/4
11s - loss: 0.2690 - acc: 0.9248 - val_loss: 0.2857 - val_acc: 0.9215
Out[19]:
<keras.callbacks.History at 0x7f80bc7b0bd0>

Single dense layer


In [20]:
def get_fc_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(512, activation='softmax'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [21]:
fc = get_fc_model()


/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_2 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))

In [22]:
fc.fit_generator(batches, batches.n, nb_epoch=1, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
11s - loss: 1.5520 - acc: 0.8422 - val_loss: 1.0398 - val_acc: 0.8762
Out[22]:
<keras.callbacks.History at 0x7f80b3689550>

In [23]:
fc.optimizer.lr=0.1

In [24]:
fc.fit_generator(batches, batches.n, nb_epoch=4, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/4
12s - loss: 0.7773 - acc: 0.8971 - val_loss: 0.5822 - val_acc: 0.9114
Epoch 2/4
12s - loss: 0.4893 - acc: 0.9169 - val_loss: 0.4197 - val_acc: 0.9204
Epoch 3/4
11s - loss: 0.3763 - acc: 0.9255 - val_loss: 0.3379 - val_acc: 0.9290
Epoch 4/4
11s - loss: 0.3246 - acc: 0.9301 - val_loss: 0.3326 - val_acc: 0.9264
Out[24]:
<keras.callbacks.History at 0x7f80b355c090>

In [25]:
fc.optimizer.lr=0.01

In [26]:
fc.fit_generator(batches, batches.n, nb_epoch=4, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/4
11s - loss: 0.2970 - acc: 0.9334 - val_loss: 0.3031 - val_acc: 0.9299
Epoch 2/4
12s - loss: 0.2729 - acc: 0.9367 - val_loss: 0.2911 - val_acc: 0.9302
Epoch 3/4
12s - loss: 0.2608 - acc: 0.9393 - val_loss: 0.2785 - val_acc: 0.9331
Epoch 4/4
12s - loss: 0.2475 - acc: 0.9407 - val_loss: 0.2678 - val_acc: 0.9363
Out[26]:
<keras.callbacks.History at 0x7f80b355c150>

Basic 'VGG-style' CNN


In [27]:
def get_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        Convolution2D(64,3,3, activation='relu'),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [28]:
model = get_model()


/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_3 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))

In [29]:
model.fit_generator(batches, batches.n, nb_epoch=1, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
22s - loss: 0.1127 - acc: 0.9643 - val_loss: 0.0321 - val_acc: 0.9891
Out[29]:
<keras.callbacks.History at 0x7f80a8616050>

In [30]:
model.optimizer.lr=0.1

In [31]:
model.fit_generator(batches, batches.n, nb_epoch=1, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
22s - loss: 0.0363 - acc: 0.9887 - val_loss: 0.0248 - val_acc: 0.9921
Out[31]:
<keras.callbacks.History at 0x7f80a6d41550>

In [32]:
model.optimizer.lr=0.01

In [33]:
model.fit_generator(batches, batches.n, nb_epoch=8, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/8
22s - loss: 0.0255 - acc: 0.9919 - val_loss: 0.0283 - val_acc: 0.9911
Epoch 2/8
22s - loss: 0.0188 - acc: 0.9945 - val_loss: 0.0188 - val_acc: 0.9941
Epoch 3/8
22s - loss: 0.0155 - acc: 0.9951 - val_loss: 0.0283 - val_acc: 0.9907
Epoch 4/8
22s - loss: 0.0137 - acc: 0.9956 - val_loss: 0.0229 - val_acc: 0.9928
Epoch 5/8
22s - loss: 0.0099 - acc: 0.9968 - val_loss: 0.0259 - val_acc: 0.9918
Epoch 6/8
22s - loss: 0.0100 - acc: 0.9971 - val_loss: 0.0386 - val_acc: 0.9877
Epoch 7/8
22s - loss: 0.0101 - acc: 0.9969 - val_loss: 0.0326 - val_acc: 0.9920
Epoch 8/8
22s - loss: 0.0072 - acc: 0.9977 - val_loss: 0.0209 - val_acc: 0.9940
Out[33]:
<keras.callbacks.History at 0x7f80a6d41510>

Data augmentation


In [34]:
model = get_model()


/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_4 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))

In [35]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)

In [36]:
model.fit_generator(batches, batches.n, nb_epoch=1, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
22s - loss: 0.1969 - acc: 0.9375 - val_loss: 0.0575 - val_acc: 0.9819
Out[36]:
<keras.callbacks.History at 0x7f80ced6a110>

In [37]:
model.optimizer.lr=0.1

In [38]:
model.fit_generator(batches, batches.n, nb_epoch=4, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/4
22s - loss: 0.0696 - acc: 0.9779 - val_loss: 0.0464 - val_acc: 0.9857
Epoch 2/4
22s - loss: 0.0546 - acc: 0.9831 - val_loss: 0.0430 - val_acc: 0.9855
Epoch 3/4
22s - loss: 0.0453 - acc: 0.9860 - val_loss: 0.0522 - val_acc: 0.9833
Epoch 4/4
22s - loss: 0.0433 - acc: 0.9870 - val_loss: 0.0418 - val_acc: 0.9863
Out[38]:
<keras.callbacks.History at 0x7f80ce5ae390>

In [39]:
model.optimizer.lr=0.01

In [40]:
model.fit_generator(batches, batches.n, nb_epoch=8, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/8
22s - loss: 0.0406 - acc: 0.9869 - val_loss: 0.0311 - val_acc: 0.9896
Epoch 2/8
22s - loss: 0.0362 - acc: 0.9889 - val_loss: 0.0283 - val_acc: 0.9919
Epoch 3/8
22s - loss: 0.0340 - acc: 0.9895 - val_loss: 0.0381 - val_acc: 0.9886
Epoch 4/8
22s - loss: 0.0328 - acc: 0.9894 - val_loss: 0.0309 - val_acc: 0.9905
Epoch 5/8
22s - loss: 0.0298 - acc: 0.9906 - val_loss: 0.0292 - val_acc: 0.9912
Epoch 6/8
22s - loss: 0.0292 - acc: 0.9906 - val_loss: 0.0360 - val_acc: 0.9899
Epoch 7/8
22s - loss: 0.0290 - acc: 0.9914 - val_loss: 0.0279 - val_acc: 0.9902
Epoch 8/8
22s - loss: 0.0256 - acc: 0.9922 - val_loss: 0.0298 - val_acc: 0.9890
Out[40]:
<keras.callbacks.History at 0x7f80ce5ae3d0>

In [41]:
model.optimizer.lr=0.001

In [42]:
model.fit_generator(batches, batches.n, nb_epoch=14, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/14
22s - loss: 0.0256 - acc: 0.9915 - val_loss: 0.0387 - val_acc: 0.9896
Epoch 2/14
22s - loss: 0.0254 - acc: 0.9919 - val_loss: 0.0310 - val_acc: 0.9902
Epoch 3/14
22s - loss: 0.0246 - acc: 0.9923 - val_loss: 0.0330 - val_acc: 0.9901
Epoch 4/14
22s - loss: 0.0225 - acc: 0.9928 - val_loss: 0.0311 - val_acc: 0.9907
Epoch 5/14
22s - loss: 0.0225 - acc: 0.9930 - val_loss: 0.0291 - val_acc: 0.9912
Epoch 6/14
22s - loss: 0.0243 - acc: 0.9924 - val_loss: 0.0237 - val_acc: 0.9926
Epoch 7/14
22s - loss: 0.0214 - acc: 0.9933 - val_loss: 0.0297 - val_acc: 0.9924
Epoch 8/14
22s - loss: 0.0216 - acc: 0.9933 - val_loss: 0.0249 - val_acc: 0.9923
Epoch 9/14
22s - loss: 0.0211 - acc: 0.9929 - val_loss: 0.0262 - val_acc: 0.9914
Epoch 10/14
22s - loss: 0.0204 - acc: 0.9937 - val_loss: 0.0254 - val_acc: 0.9921
Epoch 11/14
21s - loss: 0.0205 - acc: 0.9936 - val_loss: 0.0293 - val_acc: 0.9904
Epoch 12/14
22s - loss: 0.0210 - acc: 0.9937 - val_loss: 0.0237 - val_acc: 0.9915
Epoch 13/14
22s - loss: 0.0200 - acc: 0.9935 - val_loss: 0.0282 - val_acc: 0.9917
Epoch 14/14
22s - loss: 0.0194 - acc: 0.9938 - val_loss: 0.0323 - val_acc: 0.9904
Out[42]:
<keras.callbacks.History at 0x7f80ce5ae110>

In [43]:
model.optimizer.lr=0.0001

In [44]:
model.fit_generator(batches, batches.n, nb_epoch=10, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/10
22s - loss: 0.0183 - acc: 0.9940 - val_loss: 0.0235 - val_acc: 0.9931
Epoch 2/10
21s - loss: 0.0188 - acc: 0.9940 - val_loss: 0.0292 - val_acc: 0.9916
Epoch 3/10
22s - loss: 0.0200 - acc: 0.9934 - val_loss: 0.0254 - val_acc: 0.9914
Epoch 4/10
22s - loss: 0.0186 - acc: 0.9940 - val_loss: 0.0334 - val_acc: 0.9900
Epoch 5/10
22s - loss: 0.0188 - acc: 0.9938 - val_loss: 0.0246 - val_acc: 0.9929
Epoch 6/10
22s - loss: 0.0188 - acc: 0.9944 - val_loss: 0.0208 - val_acc: 0.9932
Epoch 7/10
22s - loss: 0.0176 - acc: 0.9945 - val_loss: 0.0375 - val_acc: 0.9899
Epoch 8/10
22s - loss: 0.0172 - acc: 0.9946 - val_loss: 0.0309 - val_acc: 0.9919
Epoch 9/10
22s - loss: 0.0174 - acc: 0.9946 - val_loss: 0.0314 - val_acc: 0.9907
Epoch 10/10
21s - loss: 0.0179 - acc: 0.9942 - val_loss: 0.0309 - val_acc: 0.9912
Out[44]:
<keras.callbacks.History at 0x7f80ce5ae150>

Batchnorm + data augmentation


In [45]:
def get_model_bn():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [46]:
model = get_model_bn()


/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_5 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))

In [47]:
model.fit_generator(batches, batches.n, nb_epoch=1, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
36s - loss: 0.1561 - acc: 0.9514 - val_loss: 0.0846 - val_acc: 0.9732
Out[47]:
<keras.callbacks.History at 0x7f80ce309250>

In [48]:
model.optimizer.lr=0.1

In [49]:
model.fit_generator(batches, batches.n, nb_epoch=4, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/4
36s - loss: 0.0703 - acc: 0.9779 - val_loss: 0.0550 - val_acc: 0.9823
Epoch 2/4
36s - loss: 0.0582 - acc: 0.9820 - val_loss: 0.0529 - val_acc: 0.9820
Epoch 3/4
36s - loss: 0.0522 - acc: 0.9832 - val_loss: 0.0427 - val_acc: 0.9879
Epoch 4/4
36s - loss: 0.0480 - acc: 0.9847 - val_loss: 0.0367 - val_acc: 0.9886
Out[49]:
<keras.callbacks.History at 0x7f80ce32d250>

In [50]:
model.optimizer.lr=0.01

In [51]:
model.fit_generator(batches, batches.n, nb_epoch=12,verbose=2, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/12
36s - loss: 0.0452 - acc: 0.9862 - val_loss: 0.0284 - val_acc: 0.9911
Epoch 2/12
36s - loss: 0.0407 - acc: 0.9866 - val_loss: 0.0368 - val_acc: 0.9868
Epoch 3/12
36s - loss: 0.0393 - acc: 0.9874 - val_loss: 0.0390 - val_acc: 0.9872
Epoch 4/12
36s - loss: 0.0359 - acc: 0.9886 - val_loss: 0.0349 - val_acc: 0.9886
Epoch 5/12
36s - loss: 0.0351 - acc: 0.9885 - val_loss: 0.0329 - val_acc: 0.9907
Epoch 6/12
36s - loss: 0.0327 - acc: 0.9895 - val_loss: 0.0304 - val_acc: 0.9906
Epoch 7/12
36s - loss: 0.0298 - acc: 0.9911 - val_loss: 0.0399 - val_acc: 0.9874
Epoch 8/12
36s - loss: 0.0297 - acc: 0.9901 - val_loss: 0.0276 - val_acc: 0.9906
Epoch 9/12
36s - loss: 0.0291 - acc: 0.9905 - val_loss: 0.0387 - val_acc: 0.9878
Epoch 10/12
36s - loss: 0.0289 - acc: 0.9907 - val_loss: 0.0209 - val_acc: 0.9934
Epoch 11/12
36s - loss: 0.0265 - acc: 0.9917 - val_loss: 0.0311 - val_acc: 0.9907
Epoch 12/12
36s - loss: 0.0259 - acc: 0.9918 - val_loss: 0.0230 - val_acc: 0.9926
Out[51]:
<keras.callbacks.History at 0x7f80a6812490>

In [52]:
model.optimizer.lr=0.001

In [53]:
model.fit_generator(batches, batches.n, nb_epoch=12, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/12
36s - loss: 0.0249 - acc: 0.9921 - val_loss: 0.0254 - val_acc: 0.9917
Epoch 2/12
36s - loss: 0.0245 - acc: 0.9922 - val_loss: 0.0216 - val_acc: 0.9927
Epoch 3/12
36s - loss: 0.0240 - acc: 0.9922 - val_loss: 0.0243 - val_acc: 0.9926
Epoch 4/12
36s - loss: 0.0227 - acc: 0.9928 - val_loss: 0.0347 - val_acc: 0.9898
Epoch 5/12
36s - loss: 0.0201 - acc: 0.9935 - val_loss: 0.0240 - val_acc: 0.9919
Epoch 6/12
36s - loss: 0.0216 - acc: 0.9931 - val_loss: 0.0272 - val_acc: 0.9917
Epoch 7/12
36s - loss: 0.0200 - acc: 0.9936 - val_loss: 0.0334 - val_acc: 0.9914
Epoch 8/12
36s - loss: 0.0211 - acc: 0.9932 - val_loss: 0.0229 - val_acc: 0.9926
Epoch 9/12
36s - loss: 0.0197 - acc: 0.9937 - val_loss: 0.0280 - val_acc: 0.9917
Epoch 10/12
36s - loss: 0.0201 - acc: 0.9933 - val_loss: 0.0211 - val_acc: 0.9929
Epoch 11/12
36s - loss: 0.0191 - acc: 0.9942 - val_loss: 0.0209 - val_acc: 0.9934
Epoch 12/12
36s - loss: 0.0197 - acc: 0.9935 - val_loss: 0.0235 - val_acc: 0.9926
Out[53]:
<keras.callbacks.History at 0x7f80ce309c50>

Batchnorm + dropout + data augmentation


In [54]:
def get_model_bn_do():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [55]:
model = get_model_bn_do()


/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_6 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))

In [56]:
model.fit_generator(batches, batches.n, nb_epoch=1, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
36s - loss: 0.2162 - acc: 0.9349 - val_loss: 0.0650 - val_acc: 0.9792
Out[56]:
<keras.callbacks.History at 0x7f809138e2d0>

In [57]:
model.optimizer.lr=0.1

In [58]:
model.fit_generator(batches, batches.n, nb_epoch=4, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/4
36s - loss: 0.0917 - acc: 0.9716 - val_loss: 0.0507 - val_acc: 0.9845
Epoch 2/4
36s - loss: 0.0752 - acc: 0.9774 - val_loss: 0.0468 - val_acc: 0.9850
Epoch 3/4
36s - loss: 0.0632 - acc: 0.9802 - val_loss: 0.0465 - val_acc: 0.9851
Epoch 4/4
36s - loss: 0.0622 - acc: 0.9810 - val_loss: 0.0433 - val_acc: 0.9841
Out[58]:
<keras.callbacks.History at 0x7f809138ea50>

In [59]:
model.optimizer.lr=0.01

In [60]:
model.fit_generator(batches, batches.n, nb_epoch=12,verbose=2, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/12
36s - loss: 0.0569 - acc: 0.9829 - val_loss: 0.0372 - val_acc: 0.9878
Epoch 2/12
36s - loss: 0.0518 - acc: 0.9840 - val_loss: 0.0333 - val_acc: 0.9889
Epoch 3/12
36s - loss: 0.0491 - acc: 0.9851 - val_loss: 0.0345 - val_acc: 0.9889
Epoch 4/12
36s - loss: 0.0466 - acc: 0.9860 - val_loss: 0.0293 - val_acc: 0.9903
Epoch 5/12
36s - loss: 0.0439 - acc: 0.9863 - val_loss: 0.0295 - val_acc: 0.9900
Epoch 6/12
36s - loss: 0.0422 - acc: 0.9874 - val_loss: 0.0266 - val_acc: 0.9921
Epoch 7/12
36s - loss: 0.0410 - acc: 0.9878 - val_loss: 0.0295 - val_acc: 0.9903
Epoch 8/12
36s - loss: 0.0394 - acc: 0.9874 - val_loss: 0.0286 - val_acc: 0.9921
Epoch 9/12
36s - loss: 0.0374 - acc: 0.9881 - val_loss: 0.0258 - val_acc: 0.9921
Epoch 10/12
36s - loss: 0.0354 - acc: 0.9889 - val_loss: 0.0289 - val_acc: 0.9920
Epoch 11/12
36s - loss: 0.0362 - acc: 0.9889 - val_loss: 0.0266 - val_acc: 0.9924
Epoch 12/12
36s - loss: 0.0367 - acc: 0.9888 - val_loss: 0.0241 - val_acc: 0.9929
Out[60]:
<keras.callbacks.History at 0x7f809138ead0>

In [61]:
model.optimizer.lr=0.001

In [62]:
model.fit_generator(batches, batches.n, nb_epoch=1, verbose=2,
                    validation_data=test_batches, nb_val_samples=test_batches.n)


Epoch 1/1
36s - loss: 0.0331 - acc: 0.9899 - val_loss: 0.0244 - val_acc: 0.9921
Out[62]:
<keras.callbacks.History at 0x7f809138e950>

Ensembling


In [63]:
def fit_model():
    model = get_model_bn_do()
    model.fit_generator(batches, batches.n, nb_epoch=1, verbose=2,
                        validation_data=test_batches, nb_val_samples=test_batches.n)
    model.optimizer.lr=0.1
    model.fit_generator(batches, batches.n, nb_epoch=4, verbose=2,
                        validation_data=test_batches, nb_val_samples=test_batches.n)
    model.optimizer.lr=0.01
    model.fit_generator(batches, batches.n, nb_epoch=12, verbose=2,
                        validation_data=test_batches, nb_val_samples=test_batches.n)
    model.optimizer.lr=0.001
    model.fit_generator(batches, batches.n, nb_epoch=18, verbose=2,
                        validation_data=test_batches, nb_val_samples=test_batches.n)
    return model

In [64]:
models = [fit_model() for i in range(6)]


/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_7 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))
/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_8 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))
/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_9 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))
/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_10 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))
/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_11 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))
/home/ubuntu/anaconda2/lib/python2.7/site-packages/keras/layers/core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_12 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))

In [65]:
path = "data/mnist/"
model_path = path + 'models/'

In [67]:
for i,m in enumerate(models):
    m.save_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')

In [69]:
evals = np.array([m.evaluate(X_test, y_test, batch_size=256) for m in models])


 9472/10000 [===========================>..] - ETA: 0s

In [70]:
evals.mean(axis=0)


Out[70]:
array([ 0.0137,  0.9955])

In [71]:
all_preds = np.stack([m.predict(X_test, batch_size=256) for m in models])

In [72]:
all_preds.shape


Out[72]:
(6, 10000, 10)

In [73]:
avg_preds = all_preds.mean(axis=0)

In [74]:
keras.metrics.categorical_accuracy(y_test, avg_preds).eval()


Out[74]:
array(0.9968000054359436, dtype=float32)

In [ ]: