In [1]:
from theano.sandbox import cuda
cuda.use('gpu')


WARNING (theano.sandbox.cuda): The cuda backend is deprecated and will be removed in the next release (v0.10).  Please switch to the gpuarray backend. You can get more information about how to switch at this URL:
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\theano\sandbox\cuda\__init__.py:556: UserWarning: Theano flag device=gpu* (old gpu back-end) only support floatX=float32. You have floatX=float64. Use the new gpu back-end with device=cuda* for that value of floatX.
  warnings.warn(msg)
Using gpu device 0: TITAN X (Pascal) (CNMeM is disabled, cuDNN 6020)
C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\theano\sandbox\cuda\__init__.py:631: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.1.
  warnings.warn(warn)

In [3]:
%matplotlib inline
from imp import reload
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

Setup


In [4]:
batch_size=64

In [5]:
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)


Out[5]:
((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [6]:
X_test = np.expand_dims(X_test,1)
X_train = np.expand_dims(X_train,1)

In [7]:
X_train.shape


Out[7]:
(60000, 1, 28, 28)

In [8]:
y_train[:5]


Out[8]:
array([5, 0, 4, 1, 9], dtype=uint8)

In [9]:
y_train = onehot(y_train)
y_test = onehot(y_test)

In [10]:
y_train[:5]


Out[10]:
array([[ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])

In [11]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)

In [12]:
def norm_input(x): return (x-mean_px)/std_px

Linear model


In [13]:
def get_lin_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [14]:
lm = get_lin_model()


C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\layers\core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_1 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))

In [15]:
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-15-f1f1b4b36f3b> in <module>()
      1 gen = image.ImageDataGenerator()
----> 2 batches = gen.flow(X_train, y_train, batch_size=64)
      3 test_batches = gen.flow(X_test, y_test, batch_size=64)

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\preprocessing\image.py in flow(self, X, y, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format)
    425             save_to_dir=save_to_dir,
    426             save_prefix=save_prefix,
--> 427             save_format=save_format)
    428 
    429     def flow_from_directory(self, directory,

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\preprocessing\image.py in __init__(self, x, y, image_data_generator, batch_size, shuffle, seed, dim_ordering, save_to_dir, save_prefix, save_format)
    688                              'either 1, 3 or 4 channels on axis ' + str(channels_axis) + '. '
    689                              'However, it was passed an array with shape ' + str(self.x.shape) +
--> 690                              ' (' + str(self.x.shape[channels_axis]) + ' channels).')
    691         if y is not None:
    692             self.y = np.asarray(y)

ValueError: NumpyArrayIterator is set to use the dimension ordering convention "tf" (channels on axis 3), i.e. expected either 1, 3 or 4 channels on axis 3. However, it was passed an array with shape (60000, 1, 28, 28) (28 channels).

In [17]:
lm.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-196d05338dd4> in <module>()
----> 1 lm.fit_generator(batches, batches.N, nb_epoch=1, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'batches' is not defined

In [18]:
lm.optimizer.lr=0.1

In [19]:
lm.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-196d05338dd4> in <module>()
----> 1 lm.fit_generator(batches, batches.N, nb_epoch=1, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'batches' is not defined

In [20]:
lm.optimizer.lr=0.01

In [21]:
lm.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-780fd7693bf6> in <module>()
----> 1 lm.fit_generator(batches, batches.N, nb_epoch=4, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'batches' is not defined

Single dense layer


In [22]:
def get_fc_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(512, activation='softmax'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [23]:
fc = get_fc_model()


C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\layers\core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_2 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))

In [24]:
fc.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-9ded8bf2ee82> in <module>()
----> 1 fc.fit_generator(batches, batches.N, nb_epoch=1, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'batches' is not defined

In [25]:
fc.optimizer.lr=0.1

In [26]:
fc.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-26-d09d95a68087> in <module>()
----> 1 fc.fit_generator(batches, batches.N, nb_epoch=4, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'batches' is not defined

In [27]:
fc.optimizer.lr=0.01

In [28]:
fc.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-28-d09d95a68087> in <module>()
----> 1 fc.fit_generator(batches, batches.N, nb_epoch=4, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'batches' is not defined

Basic 'VGG-style' CNN


In [29]:
def get_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        Convolution2D(64,3,3, activation='relu'),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [30]:
model = get_model()


C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\layers\core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_3 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))
---------------------------------------------------------------------------
OverflowError                             Traceback (most recent call last)
<ipython-input-30-98e1902305d7> in <module>()
----> 1 model = get_model()

<ipython-input-29-c526dde7fbc9> in get_model()
     10         Flatten(),
     11         Dense(512, activation='relu'),
---> 12         Dense(10, activation='softmax')
     13         ])
     14     model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\models.py in __init__(self, layers, name)
    271         if layers:
    272             for layer in layers:
--> 273                 self.add(layer)
    274 
    275     def add(self, layer):

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\models.py in add(self, layer)
    330                  output_shapes=[self.outputs[0]._keras_shape])
    331         else:
--> 332             output_tensor = layer(self.outputs[0])
    333             if isinstance(output_tensor, list):
    334                 raise TypeError('All layers in a Sequential model '

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\topology.py in __call__(self, x, mask)
    544                                      '`layer.build(batch_input_shape)`')
    545             if len(input_shapes) == 1:
--> 546                 self.build(input_shapes[0])
    547             else:
    548                 self.build(input_shapes)

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\layers\core.py in build(self, input_shape)
    796                                  name='{}_W'.format(self.name),
    797                                  regularizer=self.W_regularizer,
--> 798                                  constraint=self.W_constraint)
    799         if self.bias:
    800             self.b = self.add_weight((self.output_dim,),

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\topology.py in add_weight(self, shape, initializer, name, trainable, regularizer, constraint)
    416         """
    417         initializer = initializations.get(initializer)
--> 418         weight = initializer(shape, name=name)
    419         if regularizer is not None:
    420             self.add_loss(regularizer(weight))

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\initializations.py in glorot_uniform(shape, name, dim_ordering)
     64     fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
     65     s = np.sqrt(6. / (fan_in + fan_out))
---> 66     return uniform(shape, s, name=name)
     67 
     68 

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\initializations.py in uniform(shape, scale, name, dim_ordering)
     31 
     32 def uniform(shape, scale=0.05, name=None, dim_ordering='th'):
---> 33     return K.random_uniform_variable(shape, -scale, scale, name=name)
     34 
     35 

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\backend\theano_backend.py in random_uniform_variable(shape, low, high, dtype, name)
    186 
    187 def random_uniform_variable(shape, low, high, dtype=None, name=None):
--> 188     return variable(np.random.uniform(low=low, high=high, size=shape),
    189                     dtype=dtype, name=name)
    190 

mtrand.pyx in mtrand.RandomState.uniform (numpy\random\mtrand\mtrand.c:19267)()

OverflowError: Range exceeds valid bounds

In [ ]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [ ]:
model.optimizer.lr=0.1

In [ ]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [31]:
model.optimizer.lr=0.01


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-31-9c05ae347078> in <module>()
----> 1 model.optimizer.lr=0.01

NameError: name 'model' is not defined

In [32]:
model.fit_generator(batches, batches.N, nb_epoch=8, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-32-25750f55d727> in <module>()
----> 1 model.fit_generator(batches, batches.N, nb_epoch=8, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'model' is not defined

Data augmentation


In [33]:
model = get_model()


C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\layers\core.py:622: UserWarning: `output_shape` argument not specified for layer lambda_4 and cannot be automatically inferred with the Theano backend. Defaulting to output shape `(None, 1, 28, 28)` (same as input shape). If the expected output shape is different, specify it via the `output_shape` argument.
  .format(self.name, input_shape))
---------------------------------------------------------------------------
OverflowError                             Traceback (most recent call last)
<ipython-input-33-98e1902305d7> in <module>()
----> 1 model = get_model()

<ipython-input-29-c526dde7fbc9> in get_model()
     10         Flatten(),
     11         Dense(512, activation='relu'),
---> 12         Dense(10, activation='softmax')
     13         ])
     14     model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\models.py in __init__(self, layers, name)
    271         if layers:
    272             for layer in layers:
--> 273                 self.add(layer)
    274 
    275     def add(self, layer):

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\models.py in add(self, layer)
    330                  output_shapes=[self.outputs[0]._keras_shape])
    331         else:
--> 332             output_tensor = layer(self.outputs[0])
    333             if isinstance(output_tensor, list):
    334                 raise TypeError('All layers in a Sequential model '

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\topology.py in __call__(self, x, mask)
    544                                      '`layer.build(batch_input_shape)`')
    545             if len(input_shapes) == 1:
--> 546                 self.build(input_shapes[0])
    547             else:
    548                 self.build(input_shapes)

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\layers\core.py in build(self, input_shape)
    796                                  name='{}_W'.format(self.name),
    797                                  regularizer=self.W_regularizer,
--> 798                                  constraint=self.W_constraint)
    799         if self.bias:
    800             self.b = self.add_weight((self.output_dim,),

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\topology.py in add_weight(self, shape, initializer, name, trainable, regularizer, constraint)
    416         """
    417         initializer = initializations.get(initializer)
--> 418         weight = initializer(shape, name=name)
    419         if regularizer is not None:
    420             self.add_loss(regularizer(weight))

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\initializations.py in glorot_uniform(shape, name, dim_ordering)
     64     fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering)
     65     s = np.sqrt(6. / (fan_in + fan_out))
---> 66     return uniform(shape, s, name=name)
     67 
     68 

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\initializations.py in uniform(shape, scale, name, dim_ordering)
     31 
     32 def uniform(shape, scale=0.05, name=None, dim_ordering='th'):
---> 33     return K.random_uniform_variable(shape, -scale, scale, name=name)
     34 
     35 

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\backend\theano_backend.py in random_uniform_variable(shape, low, high, dtype, name)
    186 
    187 def random_uniform_variable(shape, low, high, dtype=None, name=None):
--> 188     return variable(np.random.uniform(low=low, high=high, size=shape),
    189                     dtype=dtype, name=name)
    190 

mtrand.pyx in mtrand.RandomState.uniform (numpy\random\mtrand\mtrand.c:19267)()

OverflowError: Range exceeds valid bounds

In [34]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-34-33628e9bfc5a> in <module>()
      1 gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
      2                                height_shift_range=0.08, zoom_range=0.08)
----> 3 batches = gen.flow(X_train, y_train, batch_size=64)
      4 test_batches = gen.flow(X_test, y_test, batch_size=64)

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\preprocessing\image.py in flow(self, X, y, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format)
    425             save_to_dir=save_to_dir,
    426             save_prefix=save_prefix,
--> 427             save_format=save_format)
    428 
    429     def flow_from_directory(self, directory,

C:\Users\user\Anaconda3\envs\tensorflow\lib\site-packages\keras\preprocessing\image.py in __init__(self, x, y, image_data_generator, batch_size, shuffle, seed, dim_ordering, save_to_dir, save_prefix, save_format)
    688                              'either 1, 3 or 4 channels on axis ' + str(channels_axis) + '. '
    689                              'However, it was passed an array with shape ' + str(self.x.shape) +
--> 690                              ' (' + str(self.x.shape[channels_axis]) + ' channels).')
    691         if y is not None:
    692             self.y = np.asarray(y)

ValueError: NumpyArrayIterator is set to use the dimension ordering convention "tf" (channels on axis 3), i.e. expected either 1, 3 or 4 channels on axis 3. However, it was passed an array with shape (60000, 1, 28, 28) (28 channels).

In [35]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-35-0c9ccf957a20> in <module>()
----> 1 model.fit_generator(batches, batches.N, nb_epoch=1, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'model' is not defined

In [36]:
model.optimizer.lr=0.1


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-36-a1cc9f26018b> in <module>()
----> 1 model.optimizer.lr=0.1

NameError: name 'model' is not defined

In [37]:
model.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-37-3b9ff93591ab> in <module>()
----> 1 model.fit_generator(batches, batches.N, nb_epoch=4, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'model' is not defined

In [38]:
model.optimizer.lr=0.01


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-38-9c05ae347078> in <module>()
----> 1 model.optimizer.lr=0.01

NameError: name 'model' is not defined

In [39]:
model.fit_generator(batches, batches.N, nb_epoch=8, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-39-25750f55d727> in <module>()
----> 1 model.fit_generator(batches, batches.N, nb_epoch=8, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'model' is not defined

In [40]:
model.optimizer.lr=0.001


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-40-300ba9dc31f4> in <module>()
----> 1 model.optimizer.lr=0.001

NameError: name 'model' is not defined

In [41]:
model.fit_generator(batches, batches.N, nb_epoch=14, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-41-4a95961484a3> in <module>()
----> 1 model.fit_generator(batches, batches.N, nb_epoch=14, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'model' is not defined

In [42]:
model.optimizer.lr=0.0001


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-42-2c375fc92b87> in <module>()
----> 1 model.optimizer.lr=0.0001

NameError: name 'model' is not defined

In [43]:
model.fit_generator(batches, batches.N, nb_epoch=10, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-43-411be85b7c70> in <module>()
----> 1 model.fit_generator(batches, batches.N, nb_epoch=10, 
      2                     validation_data=test_batches, nb_val_samples=test_batches.N)

NameError: name 'model' is not defined

Batchnorm + data augmentation


In [44]:
def get_model_bn():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [126]:
model = get_model_bn()

In [127]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 12s - loss: 0.1273 - acc: 0.9605 - val_loss: 0.0559 - val_acc: 0.9833
Out[127]:
<keras.callbacks.History at 0x7f37acf896d0>

In [128]:
model.optimizer.lr=0.1

In [129]:
model.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/4
60000/60000 [==============================] - 13s - loss: 0.0555 - acc: 0.9827 - val_loss: 0.0439 - val_acc: 0.9859
Epoch 2/4
60000/60000 [==============================] - 13s - loss: 0.0455 - acc: 0.9859 - val_loss: 0.0337 - val_acc: 0.9899
Epoch 3/4
60000/60000 [==============================] - 13s - loss: 0.0377 - acc: 0.9882 - val_loss: 0.0332 - val_acc: 0.9890
Epoch 4/4
60000/60000 [==============================] - 13s - loss: 0.0372 - acc: 0.9884 - val_loss: 0.0303 - val_acc: 0.9904
Out[129]:
<keras.callbacks.History at 0x7f37acc5b450>

In [130]:
model.optimizer.lr=0.01

In [131]:
model.fit_generator(batches, batches.N, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/12
60000/60000 [==============================] - 13s - loss: 0.0327 - acc: 0.9900 - val_loss: 0.0312 - val_acc: 0.9911
Epoch 2/12
60000/60000 [==============================] - 12s - loss: 0.0290 - acc: 0.9911 - val_loss: 0.0349 - val_acc: 0.9893
Epoch 3/12
60000/60000 [==============================] - 13s - loss: 0.0293 - acc: 0.9912 - val_loss: 0.0452 - val_acc: 0.9853
Epoch 4/12
60000/60000 [==============================] - 13s - loss: 0.0266 - acc: 0.9915 - val_loss: 0.0260 - val_acc: 0.9924
Epoch 5/12
60000/60000 [==============================] - 12s - loss: 0.0236 - acc: 0.9924 - val_loss: 0.0234 - val_acc: 0.9927
Epoch 6/12
60000/60000 [==============================] - 13s - loss: 0.0234 - acc: 0.9927 - val_loss: 0.0305 - val_acc: 0.9901
Epoch 7/12
60000/60000 [==============================] - 12s - loss: 0.0234 - acc: 0.9929 - val_loss: 0.0164 - val_acc: 0.9960
Epoch 8/12
60000/60000 [==============================] - 13s - loss: 0.0198 - acc: 0.9935 - val_loss: 0.0333 - val_acc: 0.9898
Epoch 9/12
60000/60000 [==============================] - 12s - loss: 0.0201 - acc: 0.9939 - val_loss: 0.0184 - val_acc: 0.9940
Epoch 10/12
60000/60000 [==============================] - 12s - loss: 0.0173 - acc: 0.9945 - val_loss: 0.0194 - val_acc: 0.9938
Epoch 11/12
60000/60000 [==============================] - 13s - loss: 0.0183 - acc: 0.9940 - val_loss: 0.0323 - val_acc: 0.9904
Epoch 12/12
60000/60000 [==============================] - 13s - loss: 0.0177 - acc: 0.9945 - val_loss: 0.0294 - val_acc: 0.9918
Out[131]:
<keras.callbacks.History at 0x7f37b176aa50>

In [132]:
model.optimizer.lr=0.001

In [133]:
model.fit_generator(batches, batches.N, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/12
60000/60000 [==============================] - 13s - loss: 0.0166 - acc: 0.9947 - val_loss: 0.0205 - val_acc: 0.9933
Epoch 2/12
60000/60000 [==============================] - 13s - loss: 0.0168 - acc: 0.9950 - val_loss: 0.0194 - val_acc: 0.9942
Epoch 3/12
60000/60000 [==============================] - 12s - loss: 0.0151 - acc: 0.9953 - val_loss: 0.0197 - val_acc: 0.9942
Epoch 4/12
60000/60000 [==============================] - 13s - loss: 0.0135 - acc: 0.9954 - val_loss: 0.0179 - val_acc: 0.9938
Epoch 5/12
60000/60000 [==============================] - 12s - loss: 0.0143 - acc: 0.9953 - val_loss: 0.0257 - val_acc: 0.9925
Epoch 6/12
60000/60000 [==============================] - 12s - loss: 0.0139 - acc: 0.9954 - val_loss: 0.0150 - val_acc: 0.9949
Epoch 7/12
60000/60000 [==============================] - 13s - loss: 0.0127 - acc: 0.9958 - val_loss: 0.0218 - val_acc: 0.9932
Epoch 8/12
60000/60000 [==============================] - 13s - loss: 0.0121 - acc: 0.9962 - val_loss: 0.0264 - val_acc: 0.9917
Epoch 9/12
60000/60000 [==============================] - 13s - loss: 0.0120 - acc: 0.9960 - val_loss: 0.0209 - val_acc: 0.9935
Epoch 10/12
60000/60000 [==============================] - 13s - loss: 0.0130 - acc: 0.9957 - val_loss: 0.0171 - val_acc: 0.9948
Epoch 11/12
60000/60000 [==============================] - 13s - loss: 0.0132 - acc: 0.9958 - val_loss: 0.0227 - val_acc: 0.9932
Epoch 12/12
60000/60000 [==============================] - 12s - loss: 0.0115 - acc: 0.9964 - val_loss: 0.0172 - val_acc: 0.9945
Out[133]:
<keras.callbacks.History at 0x7f37b1789c50>

Batchnorm + dropout + data augmentation


In [79]:
def get_model_bn_do():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [80]:
model = get_model_bn_do()

In [81]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 13s - loss: 0.1894 - acc: 0.9419 - val_loss: 0.0605 - val_acc: 0.9815
Out[81]:
<keras.callbacks.History at 0x7fa7cea0d950>

In [82]:
model.optimizer.lr=0.1

In [83]:
model.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/4
60000/60000 [==============================] - 13s - loss: 0.0766 - acc: 0.9764 - val_loss: 0.0394 - val_acc: 0.9871
Epoch 2/4
60000/60000 [==============================] - 13s - loss: 0.0622 - acc: 0.9806 - val_loss: 0.0360 - val_acc: 0.9885
Epoch 3/4
60000/60000 [==============================] - 13s - loss: 0.0576 - acc: 0.9830 - val_loss: 0.0364 - val_acc: 0.9882
Epoch 4/4
60000/60000 [==============================] - 14s - loss: 0.0512 - acc: 0.9842 - val_loss: 0.0347 - val_acc: 0.9911
Out[83]:
<keras.callbacks.History at 0x7fa7ce2c69d0>

In [84]:
model.optimizer.lr=0.01

In [85]:
model.fit_generator(batches, batches.N, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/12
60000/60000 [==============================] - 14s - loss: 0.0464 - acc: 0.9862 - val_loss: 0.0300 - val_acc: 0.9904
Epoch 2/12
60000/60000 [==============================] - 13s - loss: 0.0474 - acc: 0.9856 - val_loss: 0.0287 - val_acc: 0.9912
Epoch 3/12
60000/60000 [==============================] - 13s - loss: 0.0400 - acc: 0.9880 - val_loss: 0.0408 - val_acc: 0.9879
Epoch 4/12
60000/60000 [==============================] - 14s - loss: 0.0379 - acc: 0.9884 - val_loss: 0.0255 - val_acc: 0.9918
Epoch 5/12
60000/60000 [==============================] - 13s - loss: 0.0394 - acc: 0.9881 - val_loss: 0.0247 - val_acc: 0.9923
Epoch 6/12
60000/60000 [==============================] - 14s - loss: 0.0344 - acc: 0.9893 - val_loss: 0.0267 - val_acc: 0.9921
Epoch 7/12
60000/60000 [==============================] - 14s - loss: 0.0342 - acc: 0.9895 - val_loss: 0.0208 - val_acc: 0.9938
Epoch 8/12
60000/60000 [==============================] - 14s - loss: 0.0291 - acc: 0.9908 - val_loss: 0.0251 - val_acc: 0.9914
Epoch 9/12
60000/60000 [==============================] - 14s - loss: 0.0309 - acc: 0.9907 - val_loss: 0.0253 - val_acc: 0.9919
Epoch 10/12
60000/60000 [==============================] - 14s - loss: 0.0299 - acc: 0.9906 - val_loss: 0.0205 - val_acc: 0.9934
Epoch 11/12
60000/60000 [==============================] - 14s - loss: 0.0276 - acc: 0.9912 - val_loss: 0.0200 - val_acc: 0.9940
Epoch 12/12
60000/60000 [==============================] - 13s - loss: 0.0268 - acc: 0.9918 - val_loss: 0.0201 - val_acc: 0.9929
Out[85]:
<keras.callbacks.History at 0x7fa7ce2e1810>

In [86]:
model.optimizer.lr=0.001

In [89]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)


Epoch 1/1
60000/60000 [==============================] - 13s - loss: 0.0186 - acc: 0.9942 - val_loss: 0.0193 - val_acc: 0.9945
Out[89]:
<keras.callbacks.History at 0x7fa7ce5cf290>

Ensembling


In [90]:
def fit_model():
    model = get_model_bn_do()
    model.fit_generator(batches, batches.N, nb_epoch=1, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.1
    model.fit_generator(batches, batches.N, nb_epoch=4, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.01
    model.fit_generator(batches, batches.N, nb_epoch=12, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.001
    model.fit_generator(batches, batches.N, nb_epoch=18, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    return model

In [91]:
models = [fit_model() for i in range(6)]

In [92]:
path = "data/mnist/"
model_path = path + 'models/'

In [93]:
for i,m in enumerate(models):
    m.save_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')

In [94]:
evals = np.array([m.evaluate(X_test, y_test, batch_size=256) for m in models])


 9984/10000 [============================>.] - ETA: 0s

In [95]:
evals.mean(axis=0)


Out[95]:
array([ 0.016,  0.995])

In [96]:
all_preds = np.stack([m.predict(X_test, batch_size=256) for m in models])

In [97]:
all_preds.shape


Out[97]:
(6, 10000, 10)

In [98]:
avg_preds = all_preds.mean(axis=0)

In [99]:
keras.metrics.categorical_accuracy(y_test, avg_preds).eval()


Out[99]:
array(0.9969000220298767, dtype=float32)

In [ ]: